duckdb 0.4.1-dev89.0 → 0.4.1-dev93.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +114 -67
- package/src/duckdb.hpp +15 -2
- package/src/parquet-amalgamation.cpp +36864 -36864
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -43597,6 +43597,7 @@ int64_t Timestamp::GetEpochNanoSeconds(timestamp_t timestamp) {
|
|
|
43597
43597
|
} // namespace duckdb
|
|
43598
43598
|
|
|
43599
43599
|
|
|
43600
|
+
|
|
43600
43601
|
namespace duckdb {
|
|
43601
43602
|
|
|
43602
43603
|
bool UUID::FromString(string str, hugeint_t &result) {
|
|
@@ -43681,6 +43682,45 @@ void UUID::ToString(hugeint_t input, char *buf) {
|
|
|
43681
43682
|
byte_to_hex(input.lower & 0xFF, buf, pos);
|
|
43682
43683
|
}
|
|
43683
43684
|
|
|
43685
|
+
hugeint_t UUID::GenerateRandomUUID(RandomEngine &engine) {
|
|
43686
|
+
uint8_t bytes[16];
|
|
43687
|
+
for (int i = 0; i < 16; i += 4) {
|
|
43688
|
+
*reinterpret_cast<uint32_t *>(bytes + i) = engine.NextRandomInteger();
|
|
43689
|
+
}
|
|
43690
|
+
// variant must be 10xxxxxx
|
|
43691
|
+
bytes[8] &= 0xBF;
|
|
43692
|
+
bytes[8] |= 0x80;
|
|
43693
|
+
// version must be 0100xxxx
|
|
43694
|
+
bytes[6] &= 0x4F;
|
|
43695
|
+
bytes[6] |= 0x40;
|
|
43696
|
+
|
|
43697
|
+
hugeint_t result;
|
|
43698
|
+
result.upper = 0;
|
|
43699
|
+
result.upper |= ((int64_t)bytes[0] << 56);
|
|
43700
|
+
result.upper |= ((int64_t)bytes[1] << 48);
|
|
43701
|
+
result.upper |= ((int64_t)bytes[3] << 40);
|
|
43702
|
+
result.upper |= ((int64_t)bytes[4] << 32);
|
|
43703
|
+
result.upper |= ((int64_t)bytes[5] << 24);
|
|
43704
|
+
result.upper |= ((int64_t)bytes[6] << 16);
|
|
43705
|
+
result.upper |= ((int64_t)bytes[7] << 8);
|
|
43706
|
+
result.upper |= bytes[8];
|
|
43707
|
+
result.lower = 0;
|
|
43708
|
+
result.lower |= ((uint64_t)bytes[8] << 56);
|
|
43709
|
+
result.lower |= ((uint64_t)bytes[9] << 48);
|
|
43710
|
+
result.lower |= ((uint64_t)bytes[10] << 40);
|
|
43711
|
+
result.lower |= ((uint64_t)bytes[11] << 32);
|
|
43712
|
+
result.lower |= ((uint64_t)bytes[12] << 24);
|
|
43713
|
+
result.lower |= ((uint64_t)bytes[13] << 16);
|
|
43714
|
+
result.lower |= ((uint64_t)bytes[14] << 8);
|
|
43715
|
+
result.lower |= bytes[15];
|
|
43716
|
+
return result;
|
|
43717
|
+
}
|
|
43718
|
+
|
|
43719
|
+
hugeint_t UUID::GenerateRandomUUID() {
|
|
43720
|
+
RandomEngine engine;
|
|
43721
|
+
return GenerateRandomUUID(engine);
|
|
43722
|
+
}
|
|
43723
|
+
|
|
43684
43724
|
} // namespace duckdb
|
|
43685
43725
|
|
|
43686
43726
|
|
|
@@ -69630,6 +69670,22 @@ void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
|
|
|
69630
69670
|
}
|
|
69631
69671
|
}
|
|
69632
69672
|
|
|
69673
|
+
void BufferedCSVReaderOptions::SetDateFormat(LogicalTypeId type, const string &format, bool read_format) {
|
|
69674
|
+
string error;
|
|
69675
|
+
if (read_format) {
|
|
69676
|
+
auto &date_format = this->date_format[type];
|
|
69677
|
+
error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
|
|
69678
|
+
date_format.format_specifier = format;
|
|
69679
|
+
} else {
|
|
69680
|
+
auto &date_format = this->write_date_format[type];
|
|
69681
|
+
error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
|
|
69682
|
+
}
|
|
69683
|
+
if (!error.empty()) {
|
|
69684
|
+
throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
|
|
69685
|
+
}
|
|
69686
|
+
has_format[type] = true;
|
|
69687
|
+
}
|
|
69688
|
+
|
|
69633
69689
|
void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value &value,
|
|
69634
69690
|
vector<string> &expected_names) {
|
|
69635
69691
|
if (SetBaseOption(loption, value)) {
|
|
@@ -69674,22 +69730,10 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
|
69674
69730
|
force_not_null = ParseColumnList(value, expected_names, loption);
|
|
69675
69731
|
} else if (loption == "date_format" || loption == "dateformat") {
|
|
69676
69732
|
string format = ParseString(value, loption);
|
|
69677
|
-
|
|
69678
|
-
string error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
|
|
69679
|
-
date_format.format_specifier = format;
|
|
69680
|
-
if (!error.empty()) {
|
|
69681
|
-
throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
|
|
69682
|
-
}
|
|
69683
|
-
has_format[LogicalTypeId::DATE] = true;
|
|
69733
|
+
SetDateFormat(LogicalTypeId::DATE, format, true);
|
|
69684
69734
|
} else if (loption == "timestamp_format" || loption == "timestampformat") {
|
|
69685
69735
|
string format = ParseString(value, loption);
|
|
69686
|
-
|
|
69687
|
-
string error = StrTimeFormat::ParseFormatSpecifier(format, timestamp_format);
|
|
69688
|
-
timestamp_format.format_specifier = format;
|
|
69689
|
-
if (!error.empty()) {
|
|
69690
|
-
throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
|
|
69691
|
-
}
|
|
69692
|
-
has_format[LogicalTypeId::TIMESTAMP] = true;
|
|
69736
|
+
SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
|
|
69693
69737
|
} else if (loption == "escape") {
|
|
69694
69738
|
escape = ParseString(value, loption);
|
|
69695
69739
|
has_escape = true;
|
|
@@ -69707,6 +69751,15 @@ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value
|
|
|
69707
69751
|
|
|
69708
69752
|
if (loption == "force_quote") {
|
|
69709
69753
|
force_quote = ParseColumnList(value, names, loption);
|
|
69754
|
+
} else if (loption == "date_format" || loption == "dateformat") {
|
|
69755
|
+
string format = ParseString(value, loption);
|
|
69756
|
+
SetDateFormat(LogicalTypeId::DATE, format, false);
|
|
69757
|
+
} else if (loption == "timestamp_format" || loption == "timestampformat") {
|
|
69758
|
+
string format = ParseString(value, loption);
|
|
69759
|
+
if (StringUtil::Lower(format) == "iso") {
|
|
69760
|
+
format = "%Y-%m-%dT%H:%M:%S.%fZ";
|
|
69761
|
+
}
|
|
69762
|
+
SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
|
|
69710
69763
|
} else {
|
|
69711
69764
|
throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
|
|
69712
69765
|
}
|
|
@@ -93245,6 +93298,25 @@ static unique_ptr<FunctionData> StrfTimeBindFunction(ClientContext &context, Sca
|
|
|
93245
93298
|
return make_unique<StrfTimeBindData>(format, format_string);
|
|
93246
93299
|
}
|
|
93247
93300
|
|
|
93301
|
+
void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
|
|
93302
|
+
D_ASSERT(input.GetType().id() == LogicalTypeId::DATE);
|
|
93303
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
|
|
93304
|
+
UnaryExecutor::ExecuteWithNulls<date_t, string_t>(input, result, count,
|
|
93305
|
+
[&](date_t input, ValidityMask &mask, idx_t idx) {
|
|
93306
|
+
if (Date::IsFinite(input)) {
|
|
93307
|
+
dtime_t time(0);
|
|
93308
|
+
idx_t len = GetLength(input, time, 0, nullptr);
|
|
93309
|
+
string_t target = StringVector::EmptyString(result, len);
|
|
93310
|
+
FormatString(input, time, target.GetDataWriteable());
|
|
93311
|
+
target.Finalize();
|
|
93312
|
+
return target;
|
|
93313
|
+
} else {
|
|
93314
|
+
mask.SetInvalid(idx);
|
|
93315
|
+
return string_t();
|
|
93316
|
+
}
|
|
93317
|
+
});
|
|
93318
|
+
}
|
|
93319
|
+
|
|
93248
93320
|
template <bool REVERSED>
|
|
93249
93321
|
static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
93250
93322
|
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
@@ -93255,13 +93327,21 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector
|
|
|
93255
93327
|
ConstantVector::SetNull(result, true);
|
|
93256
93328
|
return;
|
|
93257
93329
|
}
|
|
93258
|
-
|
|
93259
|
-
|
|
93260
|
-
|
|
93261
|
-
|
|
93262
|
-
|
|
93330
|
+
info.format.ConvertDateVector(args.data[REVERSED ? 1 : 0], result, args.size());
|
|
93331
|
+
}
|
|
93332
|
+
|
|
93333
|
+
void StrfTimeFormat::ConvertTimestampVector(Vector &input, Vector &result, idx_t count) {
|
|
93334
|
+
D_ASSERT(input.GetType().id() == LogicalTypeId::TIMESTAMP);
|
|
93335
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
|
|
93336
|
+
UnaryExecutor::ExecuteWithNulls<timestamp_t, string_t>(
|
|
93337
|
+
input, result, count, [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
|
|
93338
|
+
if (Timestamp::IsFinite(input)) {
|
|
93339
|
+
date_t date;
|
|
93340
|
+
dtime_t time;
|
|
93341
|
+
Timestamp::Convert(input, date, time);
|
|
93342
|
+
idx_t len = GetLength(date, time, 0, nullptr);
|
|
93263
93343
|
string_t target = StringVector::EmptyString(result, len);
|
|
93264
|
-
|
|
93344
|
+
FormatString(date, time, target.GetDataWriteable());
|
|
93265
93345
|
target.Finalize();
|
|
93266
93346
|
return target;
|
|
93267
93347
|
} else {
|
|
@@ -93281,23 +93361,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V
|
|
|
93281
93361
|
ConstantVector::SetNull(result, true);
|
|
93282
93362
|
return;
|
|
93283
93363
|
}
|
|
93284
|
-
|
|
93285
|
-
UnaryExecutor::ExecuteWithNulls<timestamp_t, string_t>(
|
|
93286
|
-
args.data[REVERSED ? 1 : 0], result, args.size(), [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
|
|
93287
|
-
if (Timestamp::IsFinite(input)) {
|
|
93288
|
-
date_t date;
|
|
93289
|
-
dtime_t time;
|
|
93290
|
-
Timestamp::Convert(input, date, time);
|
|
93291
|
-
idx_t len = info.format.GetLength(date, time, 0, nullptr);
|
|
93292
|
-
string_t target = StringVector::EmptyString(result, len);
|
|
93293
|
-
info.format.FormatString(date, time, target.GetDataWriteable());
|
|
93294
|
-
target.Finalize();
|
|
93295
|
-
return target;
|
|
93296
|
-
} else {
|
|
93297
|
-
mask.SetInvalid(idx);
|
|
93298
|
-
return string_t();
|
|
93299
|
-
}
|
|
93300
|
-
});
|
|
93364
|
+
info.format.ConvertTimestampVector(args.data[REVERSED ? 1 : 0], result, args.size());
|
|
93301
93365
|
}
|
|
93302
93366
|
|
|
93303
93367
|
void StrfTimeFun::RegisterFunction(BuiltinFunctions &set) {
|
|
@@ -98591,35 +98655,7 @@ static void GenerateUUIDFunction(DataChunk &args, ExpressionState &state, Vector
|
|
|
98591
98655
|
auto result_data = FlatVector::GetData<hugeint_t>(result);
|
|
98592
98656
|
|
|
98593
98657
|
for (idx_t i = 0; i < args.size(); i++) {
|
|
98594
|
-
|
|
98595
|
-
for (int i = 0; i < 16; i += 4) {
|
|
98596
|
-
*reinterpret_cast<uint32_t *>(bytes + i) = lstate.random_engine.NextRandomInteger();
|
|
98597
|
-
}
|
|
98598
|
-
// variant must be 10xxxxxx
|
|
98599
|
-
bytes[8] &= 0xBF;
|
|
98600
|
-
bytes[8] |= 0x80;
|
|
98601
|
-
// version must be 0100xxxx
|
|
98602
|
-
bytes[6] &= 0x4F;
|
|
98603
|
-
bytes[6] |= 0x40;
|
|
98604
|
-
|
|
98605
|
-
result_data[i].upper = 0;
|
|
98606
|
-
result_data[i].upper |= ((int64_t)bytes[0] << 56);
|
|
98607
|
-
result_data[i].upper |= ((int64_t)bytes[1] << 48);
|
|
98608
|
-
result_data[i].upper |= ((int64_t)bytes[3] << 40);
|
|
98609
|
-
result_data[i].upper |= ((int64_t)bytes[4] << 32);
|
|
98610
|
-
result_data[i].upper |= ((int64_t)bytes[5] << 24);
|
|
98611
|
-
result_data[i].upper |= ((int64_t)bytes[6] << 16);
|
|
98612
|
-
result_data[i].upper |= ((int64_t)bytes[7] << 8);
|
|
98613
|
-
result_data[i].upper |= bytes[8];
|
|
98614
|
-
result_data[i].lower = 0;
|
|
98615
|
-
result_data[i].lower |= ((uint64_t)bytes[8] << 56);
|
|
98616
|
-
result_data[i].lower |= ((uint64_t)bytes[9] << 48);
|
|
98617
|
-
result_data[i].lower |= ((uint64_t)bytes[10] << 40);
|
|
98618
|
-
result_data[i].lower |= ((uint64_t)bytes[11] << 32);
|
|
98619
|
-
result_data[i].lower |= ((uint64_t)bytes[12] << 24);
|
|
98620
|
-
result_data[i].lower |= ((uint64_t)bytes[13] << 16);
|
|
98621
|
-
result_data[i].lower |= ((uint64_t)bytes[14] << 8);
|
|
98622
|
-
result_data[i].lower |= bytes[15];
|
|
98658
|
+
result_data[i] = UUID::GenerateRandomUUID(lstate.random_engine);
|
|
98623
98659
|
}
|
|
98624
98660
|
}
|
|
98625
98661
|
|
|
@@ -106893,6 +106929,15 @@ static void WriteCSVSink(ClientContext &context, FunctionData &bind_data, Global
|
|
|
106893
106929
|
if (csv_data.sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
|
|
106894
106930
|
// VARCHAR, just create a reference
|
|
106895
106931
|
cast_chunk.data[col_idx].Reference(input.data[col_idx]);
|
|
106932
|
+
} else if (options.has_format[LogicalTypeId::DATE] && csv_data.sql_types[col_idx].id() == LogicalTypeId::DATE) {
|
|
106933
|
+
// use the date format to cast the chunk
|
|
106934
|
+
csv_data.options.write_date_format[LogicalTypeId::DATE].ConvertDateVector(
|
|
106935
|
+
input.data[col_idx], cast_chunk.data[col_idx], input.size());
|
|
106936
|
+
} else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
|
|
106937
|
+
csv_data.sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
|
|
106938
|
+
// use the timestamp format to cast the chunk
|
|
106939
|
+
csv_data.options.write_date_format[LogicalTypeId::TIMESTAMP].ConvertTimestampVector(
|
|
106940
|
+
input.data[col_idx], cast_chunk.data[col_idx], input.size());
|
|
106896
106941
|
} else {
|
|
106897
106942
|
// non varchar column, perform the cast
|
|
106898
106943
|
VectorOperations::Cast(input.data[col_idx], cast_chunk.data[col_idx], input.size());
|
|
@@ -118308,6 +118353,7 @@ ExtensionLoadResult ExtensionHelper::LoadExtensionInternal(DuckDB &db, const std
|
|
|
118308
118353
|
|
|
118309
118354
|
|
|
118310
118355
|
|
|
118356
|
+
|
|
118311
118357
|
#ifndef DISABLE_DUCKDB_REMOTE_INSTALL
|
|
118312
118358
|
|
|
118313
118359
|
|
|
@@ -126551,7 +126597,8 @@ void ExtensionHelper::InstallExtension(DatabaseInstance &db, const string &exten
|
|
|
126551
126597
|
return;
|
|
126552
126598
|
}
|
|
126553
126599
|
|
|
126554
|
-
|
|
126600
|
+
auto uuid = UUID::ToString(UUID::GenerateRandomUUID());
|
|
126601
|
+
string temp_path = local_extension_path + ".tmp-" + uuid;
|
|
126555
126602
|
if (fs.FileExists(temp_path)) {
|
|
126556
126603
|
fs.RemoveFile(temp_path);
|
|
126557
126604
|
}
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "b8df3c77b"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev93"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -19093,6 +19093,8 @@ public:
|
|
|
19093
19093
|
|
|
19094
19094
|
|
|
19095
19095
|
namespace duckdb {
|
|
19096
|
+
class ClientContext;
|
|
19097
|
+
struct RandomEngine;
|
|
19096
19098
|
|
|
19097
19099
|
//! The UUID class contains static operations for the UUID type
|
|
19098
19100
|
class UUID {
|
|
@@ -19107,6 +19109,10 @@ public:
|
|
|
19107
19109
|
//! Convert a hugeint object to a uuid style string
|
|
19108
19110
|
static void ToString(hugeint_t input, char *buf);
|
|
19109
19111
|
|
|
19112
|
+
//! Convert a hugeint object to a uuid style string
|
|
19113
|
+
static hugeint_t GenerateRandomUUID(RandomEngine &engine);
|
|
19114
|
+
static hugeint_t GenerateRandomUUID();
|
|
19115
|
+
|
|
19110
19116
|
//! Convert a hugeint object to a uuid style string
|
|
19111
19117
|
static string ToString(hugeint_t input) {
|
|
19112
19118
|
char buff[STRING_SIZE];
|
|
@@ -22488,6 +22494,9 @@ struct StrfTimeFormat : public StrTimeFormat {
|
|
|
22488
22494
|
|
|
22489
22495
|
DUCKDB_API static string Format(timestamp_t timestamp, const string &format);
|
|
22490
22496
|
|
|
22497
|
+
DUCKDB_API void ConvertDateVector(Vector &input, Vector &result, idx_t count);
|
|
22498
|
+
DUCKDB_API void ConvertTimestampVector(Vector &input, Vector &result, idx_t count);
|
|
22499
|
+
|
|
22491
22500
|
protected:
|
|
22492
22501
|
//! The variable-length specifiers. To determine total string size, these need to be checked.
|
|
22493
22502
|
vector<StrTimeSpecifier> var_length_specifiers;
|
|
@@ -22675,6 +22684,9 @@ struct BufferedCSVReaderOptions {
|
|
|
22675
22684
|
|
|
22676
22685
|
//! The date format to use (if any is specified)
|
|
22677
22686
|
std::map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
|
|
22687
|
+
//! The date format to use for writing (if any is specified)
|
|
22688
|
+
std::map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}},
|
|
22689
|
+
{LogicalTypeId::TIMESTAMP, {}}};
|
|
22678
22690
|
//! Whether or not a type format is specified
|
|
22679
22691
|
std::map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
|
|
22680
22692
|
|
|
@@ -22689,6 +22701,7 @@ struct BufferedCSVReaderOptions {
|
|
|
22689
22701
|
void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
|
|
22690
22702
|
|
|
22691
22703
|
void SetWriteOption(const string &loption, const Value &value);
|
|
22704
|
+
void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
|
|
22692
22705
|
|
|
22693
22706
|
std::string ToString() const;
|
|
22694
22707
|
};
|