duckdb 0.7.2-dev1684.0 → 0.7.2-dev1803.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-datefunc.cpp +20 -8
  3. package/src/duckdb/extension/icu/icu-strptime.cpp +117 -29
  4. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -0
  5. package/src/duckdb/src/common/local_file_system.cpp +13 -2
  6. package/src/duckdb/src/common/sort/partition_state.cpp +644 -0
  7. package/src/duckdb/src/common/types.cpp +2 -2
  8. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +77 -849
  9. package/src/duckdb/src/function/scalar/math/numeric.cpp +57 -0
  10. package/src/duckdb/src/function/scalar/math_functions.cpp +1 -0
  11. package/src/duckdb/src/function/scalar/string/hex.cpp +261 -78
  12. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +2 -2
  13. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  14. package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +147 -0
  15. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -0
  16. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +247 -0
  17. package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -0
  18. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -1
  19. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +4 -0
  20. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -8
  21. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +79 -0
  22. package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +12 -0
  23. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -59
  24. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp128.hpp +1 -0
  25. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp_utils.hpp +0 -97
  26. package/src/duckdb/src/include/duckdb/storage/compression/patas/algorithm/patas.hpp +1 -0
  27. package/src/duckdb/src/main/extension/extension_install.cpp +11 -0
  28. package/src/duckdb/src/main/extension/extension_load.cpp +29 -3
  29. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +8 -7
  30. package/src/duckdb/src/storage/buffer/block_handle.cpp +128 -0
  31. package/src/duckdb/src/storage/buffer/block_manager.cpp +81 -0
  32. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +136 -0
  33. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +32 -0
  34. package/src/duckdb/src/storage/buffer_manager.cpp +7 -358
  35. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +3 -5
  36. package/src/duckdb/ub_src_common_sort.cpp +2 -0
  37. package/src/duckdb/ub_src_storage_buffer.cpp +8 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev1684.0",
5
+ "version": "0.7.2-dev1803.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -65,24 +65,36 @@ timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros)
65
65
  return timestamp_t(millis * Interval::MICROS_PER_MSEC + micros);
66
66
  }
67
67
 
68
- timestamp_t ICUDateFunc::GetTime(icu::Calendar *calendar, uint64_t micros) {
68
+ bool ICUDateFunc::TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result) {
69
69
  // Extract the new time
70
70
  UErrorCode status = U_ZERO_ERROR;
71
71
  auto millis = int64_t(calendar->getTime(status));
72
72
  if (U_FAILURE(status)) {
73
- throw Exception("Unable to get ICU calendar time.");
73
+ return false;
74
74
  }
75
75
 
76
76
  // UDate is a double, so it can't overflow (it just loses accuracy), but converting back to µs can.
77
- millis = MultiplyOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(millis, Interval::MICROS_PER_MSEC);
78
- millis = AddOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(millis, micros);
77
+ if (!TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(millis, Interval::MICROS_PER_MSEC, millis)) {
78
+ return false;
79
+ }
80
+ if (!TryAddOperator::Operation<int64_t, int64_t, int64_t>(millis, micros, millis)) {
81
+ return false;
82
+ }
79
83
 
80
84
  // Now make sure the value is in range
81
- date_t d;
82
- dtime_t t;
83
- Timestamp::Convert(timestamp_t(millis), d, t);
85
+ result = timestamp_t(millis);
86
+ date_t out_date = Timestamp::GetDate(result);
87
+ int64_t days_micros;
88
+ return TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(out_date.days, Interval::MICROS_PER_DAY,
89
+ days_micros);
90
+ }
84
91
 
85
- return timestamp_t(millis);
92
+ timestamp_t ICUDateFunc::GetTime(icu::Calendar *calendar, uint64_t micros) {
93
+ timestamp_t result;
94
+ if (!TryGetTime(calendar, micros, result)) {
95
+ throw ConversionException("Unable to convert ICU date to timestamp");
96
+ }
97
+ return result;
86
98
  }
87
99
 
88
100
  uint64_t ICUDateFunc::SetTime(icu::Calendar *calendar, timestamp_t date) {
@@ -19,17 +19,31 @@
19
19
  namespace duckdb {
20
20
 
21
21
  struct ICUStrptime : public ICUDateFunc {
22
+ using ParseResult = StrpTimeFormat::ParseResult;
23
+
22
24
  struct ICUStrptimeBindData : public BindData {
23
- ICUStrptimeBindData(ClientContext &context, const StrpTimeFormat &format) : BindData(context), format(format) {
25
+ ICUStrptimeBindData(ClientContext &context, const StrpTimeFormat &format)
26
+ : BindData(context), formats(1, format) {
27
+ }
28
+ ICUStrptimeBindData(ClientContext &context, vector<StrpTimeFormat> formats_p)
29
+ : BindData(context), formats(std::move(formats_p)) {
24
30
  }
25
- ICUStrptimeBindData(const ICUStrptimeBindData &other) : BindData(other), format(other.format) {
31
+ ICUStrptimeBindData(const ICUStrptimeBindData &other) : BindData(other), formats(other.formats) {
26
32
  }
27
33
 
28
- StrpTimeFormat format;
34
+ vector<StrpTimeFormat> formats;
29
35
 
30
36
  bool Equals(const FunctionData &other_p) const override {
31
37
  auto &other = other_p.Cast<ICUStrptimeBindData>();
32
- return format.format_specifier == other.format.format_specifier;
38
+ if (formats.size() != other.formats.size()) {
39
+ return false;
40
+ }
41
+ for (size_t i = 0; i < formats.size(); ++i) {
42
+ if (formats[i].format_specifier != other.formats[i].format_specifier) {
43
+ return false;
44
+ }
45
+ }
46
+ return true;
33
47
  }
34
48
  duckdb::unique_ptr<FunctionData> Copy() const override {
35
49
  return make_uniq<ICUStrptimeBindData>(*this);
@@ -53,13 +67,7 @@ struct ICUStrptime : public ICUDateFunc {
53
67
  }
54
68
  }
55
69
 
56
- static timestamp_t Operation(icu::Calendar *calendar, string_t input, StrpTimeFormat &format) {
57
- StrpTimeFormat::ParseResult parsed;
58
- format.Parse(input, parsed);
59
- if (!parsed.error_message.empty()) {
60
- throw InvalidInputException(parsed.FormatError(input, format.format_specifier));
61
- }
62
-
70
+ static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
63
71
  // Set TZ first, if any.
64
72
  // Note that empty TZ names are not allowed,
65
73
  // but unknown names will map to GMT.
@@ -83,18 +91,19 @@ struct ICUStrptime : public ICUDateFunc {
83
91
  calendar->set(UCAL_ZONE_OFFSET, parsed.data[7] * Interval::MSECS_PER_SEC * Interval::SECS_PER_MINUTE);
84
92
  }
85
93
 
86
- return GetTime(calendar, micros);
94
+ return micros;
87
95
  }
88
96
 
89
- static void ICUStrptimeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
97
+ static void Parse(DataChunk &args, ExpressionState &state, Vector &result) {
90
98
  D_ASSERT(args.ColumnCount() == 2);
91
99
  auto &str_arg = args.data[0];
92
100
  auto &fmt_arg = args.data[1];
93
101
 
94
102
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
95
103
  auto &info = func_expr.bind_info->Cast<ICUStrptimeBindData>();
96
- CalendarPtr calendar(info.calendar->clone());
97
- auto &format = info.format;
104
+ CalendarPtr calendar_ptr(info.calendar->clone());
105
+ auto calendar = calendar_ptr.get();
106
+ auto &formats = info.formats;
98
107
 
99
108
  D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR);
100
109
 
@@ -102,12 +111,55 @@ struct ICUStrptime : public ICUDateFunc {
102
111
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
103
112
  ConstantVector::SetNull(result, true);
104
113
  } else {
105
- UnaryExecutor::Execute<string_t, timestamp_t>(
106
- str_arg, result, args.size(), [&](string_t input) { return Operation(calendar.get(), input, format); });
114
+ UnaryExecutor::Execute<string_t, timestamp_t>(str_arg, result, args.size(), [&](string_t input) {
115
+ ParseResult parsed;
116
+ for (auto &format : info.formats) {
117
+ if (format.Parse(input, parsed)) {
118
+ return GetTime(calendar, ToMicros(calendar, parsed, format));
119
+ }
120
+ }
121
+
122
+ throw InvalidInputException(parsed.FormatError(input, info.formats[0].format_specifier));
123
+ });
107
124
  }
108
125
  }
109
126
 
110
- static bind_scalar_function_t bind;
127
+ static void TryParse(DataChunk &args, ExpressionState &state, Vector &result) {
128
+ D_ASSERT(args.ColumnCount() == 2);
129
+ auto &str_arg = args.data[0];
130
+ auto &fmt_arg = args.data[1];
131
+
132
+ auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
133
+ auto &info = func_expr.bind_info->Cast<ICUStrptimeBindData>();
134
+ CalendarPtr calendar_ptr(info.calendar->clone());
135
+ auto calendar = calendar_ptr.get();
136
+ auto &formats = info.formats;
137
+
138
+ D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR);
139
+
140
+ if (ConstantVector::IsNull(fmt_arg)) {
141
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
142
+ ConstantVector::SetNull(result, true);
143
+ } else {
144
+ UnaryExecutor::ExecuteWithNulls<string_t, timestamp_t>(
145
+ str_arg, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
146
+ ParseResult parsed;
147
+ for (auto &format : info.formats) {
148
+ if (format.Parse(input, parsed)) {
149
+ timestamp_t result;
150
+ if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
151
+ return result;
152
+ }
153
+ }
154
+ }
155
+
156
+ mask.SetInvalid(idx);
157
+ return timestamp_t();
158
+ });
159
+ }
160
+ }
161
+
162
+ static bind_scalar_function_t bind_strptime;
111
163
 
112
164
  static duckdb::unique_ptr<FunctionData> StrpTimeBindFunction(ClientContext &context, ScalarFunction &bound_function,
113
165
  vector<duckdb::unique_ptr<Expression>> &arguments) {
@@ -117,10 +169,14 @@ struct ICUStrptime : public ICUDateFunc {
117
169
  if (!arguments[1]->IsFoldable()) {
118
170
  throw InvalidInputException("strptime format must be a constant");
119
171
  }
120
- Value options_str = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
172
+ scalar_function_t function = (bound_function.name == "try_strptime") ? TryParse : Parse;
173
+ Value format_value = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
174
+ string format_string;
121
175
  StrpTimeFormat format;
122
- if (!options_str.IsNull()) {
123
- auto format_string = options_str.ToString();
176
+ if (format_value.IsNull()) {
177
+ ;
178
+ } else if (format_value.type().id() == LogicalTypeId::VARCHAR) {
179
+ format_string = format_value.ToString();
124
180
  format.format_specifier = format_string;
125
181
  string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
126
182
  if (!error.empty()) {
@@ -129,24 +185,46 @@ struct ICUStrptime : public ICUDateFunc {
129
185
 
130
186
  // If we have a time zone, we should use ICU for parsing and return a TSTZ instead.
131
187
  if (format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME)) {
132
- bound_function.function = ICUStrptimeFunction;
188
+ bound_function.function = function;
133
189
  bound_function.return_type = LogicalType::TIMESTAMP_TZ;
134
190
  return make_uniq<ICUStrptimeBindData>(context, format);
135
191
  }
192
+ } else if (format_value.type() == LogicalType::LIST(LogicalType::VARCHAR)) {
193
+ const auto &children = ListValue::GetChildren(format_value);
194
+ if (children.empty()) {
195
+ throw InvalidInputException("strptime format list must not be empty");
196
+ }
197
+ vector<StrpTimeFormat> formats;
198
+ bool has_tz = true;
199
+ for (const auto &child : children) {
200
+ format_string = child.ToString();
201
+ format.format_specifier = format_string;
202
+ string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
203
+ if (!error.empty()) {
204
+ throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
205
+ }
206
+ // If any format has UTC offsets, then we have to produce TSTZ
207
+ has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME);
208
+ formats.emplace_back(format);
209
+ }
210
+ if (has_tz) {
211
+ bound_function.function = function;
212
+ bound_function.return_type = LogicalType::TIMESTAMP_TZ;
213
+ return make_uniq<ICUStrptimeBindData>(context, formats);
214
+ }
136
215
  }
137
216
 
138
217
  // Fall back to faster, non-TZ parsing
139
- bound_function.bind = bind;
140
- return bind(context, bound_function, arguments);
218
+ bound_function.bind = bind_strptime;
219
+ return bind_strptime(context, bound_function, arguments);
141
220
  }
142
221
 
143
- static void AddBinaryTimestampFunction(const string &name, ClientContext &context) {
222
+ static void TailPatch(const string &name, ClientContext &context, const vector<LogicalType> &types) {
144
223
  // Find the old function
145
224
  auto &catalog = Catalog::GetSystemCatalog(context);
146
225
  auto entry = catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, name);
147
226
  D_ASSERT(entry && entry->type == CatalogType::SCALAR_FUNCTION_ENTRY);
148
227
  auto &func = entry->Cast<ScalarFunctionCatalogEntry>();
149
- vector<LogicalType> types {LogicalType::VARCHAR, LogicalType::VARCHAR};
150
228
  string error;
151
229
 
152
230
  FunctionBinder function_binder(context);
@@ -157,10 +235,18 @@ struct ICUStrptime : public ICUDateFunc {
157
235
 
158
236
  // Tail patch the old binder
159
237
  auto &bound_function = func.functions.GetFunctionReferenceByOffset(best_function);
160
- bind = bound_function.bind;
238
+ bind_strptime = bound_function.bind;
161
239
  bound_function.bind = StrpTimeBindFunction;
162
240
  }
163
241
 
242
+ static void AddBinaryTimestampFunction(const string &name, ClientContext &context) {
243
+ vector<LogicalType> types {LogicalType::VARCHAR, LogicalType::VARCHAR};
244
+ TailPatch(name, context, types);
245
+
246
+ types[1] = LogicalType::LIST(LogicalType::VARCHAR);
247
+ TailPatch(name, context, types);
248
+ }
249
+
164
250
  static bool CastFromVarchar(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
165
251
  auto &cast_data = parameters.cast_data->Cast<CastData>();
166
252
  auto info = (BindData *)cast_data.info.get();
@@ -233,7 +319,7 @@ struct ICUStrptime : public ICUDateFunc {
233
319
  }
234
320
  };
235
321
 
236
- bind_scalar_function_t ICUStrptime::bind = nullptr;
322
+ bind_scalar_function_t ICUStrptime::bind_strptime = nullptr;
237
323
 
238
324
  struct ICUStrftime : public ICUDateFunc {
239
325
  static void ParseFormatSpecifier(string_t &format_str, StrfTimeFormat &format) {
@@ -284,7 +370,7 @@ struct ICUStrftime : public ICUDateFunc {
284
370
  auto &fmt_arg = args.data[1];
285
371
 
286
372
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
287
- auto &info = (BindData &)*func_expr.bind_info;
373
+ auto &info = func_expr.bind_info->Cast<BindData>();
288
374
  CalendarPtr calendar(info.calendar->clone());
289
375
  const auto tz_name = info.tz_setting.c_str();
290
376
 
@@ -420,6 +506,8 @@ struct ICUStrftime : public ICUDateFunc {
420
506
 
421
507
  void RegisterICUStrptimeFunctions(ClientContext &context) {
422
508
  ICUStrptime::AddBinaryTimestampFunction("strptime", context);
509
+ ICUStrptime::AddBinaryTimestampFunction("try_strptime", context);
510
+
423
511
  ICUStrftime::AddBinaryTimestampFunction("strftime", context);
424
512
 
425
513
  // Add string casts
@@ -49,6 +49,8 @@ struct ICUDateFunc {
49
49
  //! Sets the time zone for the calendar.
50
50
  static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
51
51
  //! Gets the timestamp from the calendar, throwing if it is not in range.
52
+ static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);
53
+ //! Gets the timestamp from the calendar, throwing if it is not in range.
52
54
  static timestamp_t GetTime(icu::Calendar *calendar, uint64_t micros = 0);
53
55
  //! Gets the timestamp from the calendar, assuming it is in range.
54
56
  static timestamp_t GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros = 0);
@@ -977,6 +977,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
977
977
  throw IOException("Cannot use multiple \'**\' in one path");
978
978
  }
979
979
 
980
+ bool recursive_search = false;
980
981
  for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) {
981
982
  bool is_last_chunk = i + 1 == splits.size();
982
983
  bool has_glob = HasGlob(splits[i]);
@@ -988,12 +989,22 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
988
989
  if (previous_directories.empty()) {
989
990
  result.push_back(splits[i]);
990
991
  } else {
991
- for (auto &prev_directory : previous_directories) {
992
- result.push_back(JoinPath(prev_directory, splits[i]));
992
+ if (recursive_search && is_last_chunk) {
993
+ for (auto &prev_directory : previous_directories) {
994
+ const string filename = JoinPath(prev_directory, splits[i]);
995
+ if (FileExists(filename) || DirectoryExists(filename)) {
996
+ result.push_back(filename);
997
+ }
998
+ }
999
+ } else {
1000
+ for (auto &prev_directory : previous_directories) {
1001
+ result.push_back(JoinPath(prev_directory, splits[i]));
1002
+ }
993
1003
  }
994
1004
  }
995
1005
  } else {
996
1006
  if (IsCrawl(splits[i])) {
1007
+ recursive_search = true;
997
1008
  if (!is_last_chunk) {
998
1009
  result = previous_directories;
999
1010
  }