duckdb 0.4.1-dev91.0 → 0.4.1-dev93.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +70 -37
- package/src/duckdb.hpp +9 -2
- package/src/parquet-amalgamation.cpp +36834 -36834
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -69670,6 +69670,22 @@ void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
|
|
|
69670
69670
|
}
|
|
69671
69671
|
}
|
|
69672
69672
|
|
|
69673
|
+
void BufferedCSVReaderOptions::SetDateFormat(LogicalTypeId type, const string &format, bool read_format) {
|
|
69674
|
+
string error;
|
|
69675
|
+
if (read_format) {
|
|
69676
|
+
auto &date_format = this->date_format[type];
|
|
69677
|
+
error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
|
|
69678
|
+
date_format.format_specifier = format;
|
|
69679
|
+
} else {
|
|
69680
|
+
auto &date_format = this->write_date_format[type];
|
|
69681
|
+
error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
|
|
69682
|
+
}
|
|
69683
|
+
if (!error.empty()) {
|
|
69684
|
+
throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
|
|
69685
|
+
}
|
|
69686
|
+
has_format[type] = true;
|
|
69687
|
+
}
|
|
69688
|
+
|
|
69673
69689
|
void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value &value,
|
|
69674
69690
|
vector<string> &expected_names) {
|
|
69675
69691
|
if (SetBaseOption(loption, value)) {
|
|
@@ -69714,22 +69730,10 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
|
69714
69730
|
force_not_null = ParseColumnList(value, expected_names, loption);
|
|
69715
69731
|
} else if (loption == "date_format" || loption == "dateformat") {
|
|
69716
69732
|
string format = ParseString(value, loption);
|
|
69717
|
-
|
|
69718
|
-
string error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
|
|
69719
|
-
date_format.format_specifier = format;
|
|
69720
|
-
if (!error.empty()) {
|
|
69721
|
-
throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
|
|
69722
|
-
}
|
|
69723
|
-
has_format[LogicalTypeId::DATE] = true;
|
|
69733
|
+
SetDateFormat(LogicalTypeId::DATE, format, true);
|
|
69724
69734
|
} else if (loption == "timestamp_format" || loption == "timestampformat") {
|
|
69725
69735
|
string format = ParseString(value, loption);
|
|
69726
|
-
|
|
69727
|
-
string error = StrTimeFormat::ParseFormatSpecifier(format, timestamp_format);
|
|
69728
|
-
timestamp_format.format_specifier = format;
|
|
69729
|
-
if (!error.empty()) {
|
|
69730
|
-
throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
|
|
69731
|
-
}
|
|
69732
|
-
has_format[LogicalTypeId::TIMESTAMP] = true;
|
|
69736
|
+
SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
|
|
69733
69737
|
} else if (loption == "escape") {
|
|
69734
69738
|
escape = ParseString(value, loption);
|
|
69735
69739
|
has_escape = true;
|
|
@@ -69747,6 +69751,15 @@ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value
|
|
|
69747
69751
|
|
|
69748
69752
|
if (loption == "force_quote") {
|
|
69749
69753
|
force_quote = ParseColumnList(value, names, loption);
|
|
69754
|
+
} else if (loption == "date_format" || loption == "dateformat") {
|
|
69755
|
+
string format = ParseString(value, loption);
|
|
69756
|
+
SetDateFormat(LogicalTypeId::DATE, format, false);
|
|
69757
|
+
} else if (loption == "timestamp_format" || loption == "timestampformat") {
|
|
69758
|
+
string format = ParseString(value, loption);
|
|
69759
|
+
if (StringUtil::Lower(format) == "iso") {
|
|
69760
|
+
format = "%Y-%m-%dT%H:%M:%S.%fZ";
|
|
69761
|
+
}
|
|
69762
|
+
SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
|
|
69750
69763
|
} else {
|
|
69751
69764
|
throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
|
|
69752
69765
|
}
|
|
@@ -93285,6 +93298,25 @@ static unique_ptr<FunctionData> StrfTimeBindFunction(ClientContext &context, Sca
|
|
|
93285
93298
|
return make_unique<StrfTimeBindData>(format, format_string);
|
|
93286
93299
|
}
|
|
93287
93300
|
|
|
93301
|
+
void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
|
|
93302
|
+
D_ASSERT(input.GetType().id() == LogicalTypeId::DATE);
|
|
93303
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
|
|
93304
|
+
UnaryExecutor::ExecuteWithNulls<date_t, string_t>(input, result, count,
|
|
93305
|
+
[&](date_t input, ValidityMask &mask, idx_t idx) {
|
|
93306
|
+
if (Date::IsFinite(input)) {
|
|
93307
|
+
dtime_t time(0);
|
|
93308
|
+
idx_t len = GetLength(input, time, 0, nullptr);
|
|
93309
|
+
string_t target = StringVector::EmptyString(result, len);
|
|
93310
|
+
FormatString(input, time, target.GetDataWriteable());
|
|
93311
|
+
target.Finalize();
|
|
93312
|
+
return target;
|
|
93313
|
+
} else {
|
|
93314
|
+
mask.SetInvalid(idx);
|
|
93315
|
+
return string_t();
|
|
93316
|
+
}
|
|
93317
|
+
});
|
|
93318
|
+
}
|
|
93319
|
+
|
|
93288
93320
|
template <bool REVERSED>
|
|
93289
93321
|
static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
93290
93322
|
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
@@ -93295,13 +93327,21 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector
|
|
|
93295
93327
|
ConstantVector::SetNull(result, true);
|
|
93296
93328
|
return;
|
|
93297
93329
|
}
|
|
93298
|
-
|
|
93299
|
-
|
|
93300
|
-
|
|
93301
|
-
|
|
93302
|
-
|
|
93330
|
+
info.format.ConvertDateVector(args.data[REVERSED ? 1 : 0], result, args.size());
|
|
93331
|
+
}
|
|
93332
|
+
|
|
93333
|
+
void StrfTimeFormat::ConvertTimestampVector(Vector &input, Vector &result, idx_t count) {
|
|
93334
|
+
D_ASSERT(input.GetType().id() == LogicalTypeId::TIMESTAMP);
|
|
93335
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
|
|
93336
|
+
UnaryExecutor::ExecuteWithNulls<timestamp_t, string_t>(
|
|
93337
|
+
input, result, count, [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
|
|
93338
|
+
if (Timestamp::IsFinite(input)) {
|
|
93339
|
+
date_t date;
|
|
93340
|
+
dtime_t time;
|
|
93341
|
+
Timestamp::Convert(input, date, time);
|
|
93342
|
+
idx_t len = GetLength(date, time, 0, nullptr);
|
|
93303
93343
|
string_t target = StringVector::EmptyString(result, len);
|
|
93304
|
-
|
|
93344
|
+
FormatString(date, time, target.GetDataWriteable());
|
|
93305
93345
|
target.Finalize();
|
|
93306
93346
|
return target;
|
|
93307
93347
|
} else {
|
|
@@ -93321,23 +93361,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V
|
|
|
93321
93361
|
ConstantVector::SetNull(result, true);
|
|
93322
93362
|
return;
|
|
93323
93363
|
}
|
|
93324
|
-
|
|
93325
|
-
UnaryExecutor::ExecuteWithNulls<timestamp_t, string_t>(
|
|
93326
|
-
args.data[REVERSED ? 1 : 0], result, args.size(), [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
|
|
93327
|
-
if (Timestamp::IsFinite(input)) {
|
|
93328
|
-
date_t date;
|
|
93329
|
-
dtime_t time;
|
|
93330
|
-
Timestamp::Convert(input, date, time);
|
|
93331
|
-
idx_t len = info.format.GetLength(date, time, 0, nullptr);
|
|
93332
|
-
string_t target = StringVector::EmptyString(result, len);
|
|
93333
|
-
info.format.FormatString(date, time, target.GetDataWriteable());
|
|
93334
|
-
target.Finalize();
|
|
93335
|
-
return target;
|
|
93336
|
-
} else {
|
|
93337
|
-
mask.SetInvalid(idx);
|
|
93338
|
-
return string_t();
|
|
93339
|
-
}
|
|
93340
|
-
});
|
|
93364
|
+
info.format.ConvertTimestampVector(args.data[REVERSED ? 1 : 0], result, args.size());
|
|
93341
93365
|
}
|
|
93342
93366
|
|
|
93343
93367
|
void StrfTimeFun::RegisterFunction(BuiltinFunctions &set) {
|
|
@@ -106905,6 +106929,15 @@ static void WriteCSVSink(ClientContext &context, FunctionData &bind_data, Global
|
|
|
106905
106929
|
if (csv_data.sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
|
|
106906
106930
|
// VARCHAR, just create a reference
|
|
106907
106931
|
cast_chunk.data[col_idx].Reference(input.data[col_idx]);
|
|
106932
|
+
} else if (options.has_format[LogicalTypeId::DATE] && csv_data.sql_types[col_idx].id() == LogicalTypeId::DATE) {
|
|
106933
|
+
// use the date format to cast the chunk
|
|
106934
|
+
csv_data.options.write_date_format[LogicalTypeId::DATE].ConvertDateVector(
|
|
106935
|
+
input.data[col_idx], cast_chunk.data[col_idx], input.size());
|
|
106936
|
+
} else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
|
|
106937
|
+
csv_data.sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
|
|
106938
|
+
// use the timestamp format to cast the chunk
|
|
106939
|
+
csv_data.options.write_date_format[LogicalTypeId::TIMESTAMP].ConvertTimestampVector(
|
|
106940
|
+
input.data[col_idx], cast_chunk.data[col_idx], input.size());
|
|
106908
106941
|
} else {
|
|
106909
106942
|
// non varchar column, perform the cast
|
|
106910
106943
|
VectorOperations::Cast(input.data[col_idx], cast_chunk.data[col_idx], input.size());
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "b8df3c77b"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev93"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -22494,6 +22494,9 @@ struct StrfTimeFormat : public StrTimeFormat {
|
|
|
22494
22494
|
|
|
22495
22495
|
DUCKDB_API static string Format(timestamp_t timestamp, const string &format);
|
|
22496
22496
|
|
|
22497
|
+
DUCKDB_API void ConvertDateVector(Vector &input, Vector &result, idx_t count);
|
|
22498
|
+
DUCKDB_API void ConvertTimestampVector(Vector &input, Vector &result, idx_t count);
|
|
22499
|
+
|
|
22497
22500
|
protected:
|
|
22498
22501
|
//! The variable-length specifiers. To determine total string size, these need to be checked.
|
|
22499
22502
|
vector<StrTimeSpecifier> var_length_specifiers;
|
|
@@ -22681,6 +22684,9 @@ struct BufferedCSVReaderOptions {
|
|
|
22681
22684
|
|
|
22682
22685
|
//! The date format to use (if any is specified)
|
|
22683
22686
|
std::map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
|
|
22687
|
+
//! The date format to use for writing (if any is specified)
|
|
22688
|
+
std::map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}},
|
|
22689
|
+
{LogicalTypeId::TIMESTAMP, {}}};
|
|
22684
22690
|
//! Whether or not a type format is specified
|
|
22685
22691
|
std::map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
|
|
22686
22692
|
|
|
@@ -22695,6 +22701,7 @@ struct BufferedCSVReaderOptions {
|
|
|
22695
22701
|
void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
|
|
22696
22702
|
|
|
22697
22703
|
void SetWriteOption(const string &loption, const Value &value);
|
|
22704
|
+
void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
|
|
22698
22705
|
|
|
22699
22706
|
std::string ToString() const;
|
|
22700
22707
|
};
|