duckdb 0.8.2-dev2700.0 → 0.8.2-dev2809.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  4. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  5. package/src/duckdb/src/common/enum_util.cpp +5 -0
  6. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  7. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  8. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  9. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  10. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  11. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  12. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  13. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  14. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  15. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  16. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  17. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  18. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  19. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  20. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  21. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  22. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  23. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  24. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  25. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  26. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  27. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  28. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  29. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  30. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  31. package/src/duckdb/src/include/duckdb.h +11 -1
  32. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  33. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  34. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  35. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  36. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  37. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  38. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  39. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  40. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  41. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  42. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  43. package/src/duckdb/src/parallel/executor.cpp +6 -0
  44. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  45. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  46. package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
  47. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev2700.0",
5
+ "version": "0.8.2-dev2809.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -1,4 +1,6 @@
1
+ #include "duckdb/common/operator/add.hpp"
1
2
  #include "duckdb/common/operator/cast_operators.hpp"
3
+ #include "duckdb/common/operator/subtract.hpp"
2
4
  #include "duckdb/common/types/date.hpp"
3
5
  #include "duckdb/common/types/time.hpp"
4
6
  #include "duckdb/common/types/timestamp.hpp"
@@ -66,7 +68,11 @@ struct ICUMakeDate : public ICUDateFunc {
66
68
  struct ICUMakeTimestampTZFunc : public ICUDateFunc {
67
69
  template <typename T>
68
70
  static inline timestamp_t Operation(icu::Calendar *calendar, T yyyy, T mm, T dd, T hr, T mn, double ss) {
69
- const auto year = yyyy + (yyyy < 0);
71
+ const auto year = Cast::Operation<T, int32_t>(AddOperator::Operation<T, T, T>(yyyy, (yyyy < 0)));
72
+ const auto month = Cast::Operation<T, int32_t>(SubtractOperatorOverflowCheck::Operation<T, T, T>(mm, 1));
73
+ const auto day = Cast::Operation<T, int32_t>(dd);
74
+ const auto hour = Cast::Operation<T, int32_t>(hr);
75
+ const auto min = Cast::Operation<T, int32_t>(mn);
70
76
 
71
77
  const auto secs = Cast::Operation<double, int32_t>(ss);
72
78
  ss -= secs;
@@ -74,11 +80,11 @@ struct ICUMakeTimestampTZFunc : public ICUDateFunc {
74
80
  const auto millis = int32_t(ss);
75
81
  int64_t micros = std::round((ss - millis) * Interval::MICROS_PER_MSEC);
76
82
 
77
- calendar->set(UCAL_YEAR, int32_t(year));
78
- calendar->set(UCAL_MONTH, int32_t(mm - 1));
79
- calendar->set(UCAL_DATE, int32_t(dd));
80
- calendar->set(UCAL_HOUR_OF_DAY, int32_t(hr));
81
- calendar->set(UCAL_MINUTE, int32_t(mn));
83
+ calendar->set(UCAL_YEAR, year);
84
+ calendar->set(UCAL_MONTH, month);
85
+ calendar->set(UCAL_DATE, day);
86
+ calendar->set(UCAL_HOUR_OF_DAY, hour);
87
+ calendar->set(UCAL_MINUTE, min);
82
88
  calendar->set(UCAL_SECOND, secs);
83
89
  calendar->set(UCAL_MILLISECOND, millis);
84
90
 
@@ -586,16 +586,14 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr
586
586
 
587
587
  // this is a nop for us
588
588
  AdbcStatusCode StatementPrepare(struct AdbcStatement *statement, struct AdbcError *error) {
589
- auto status = SetErrorMaybe(statement, error, "Missing statement object");
590
- if (status != ADBC_STATUS_OK) {
591
- return status;
589
+ if (!statement) {
590
+ SetError(error, "Missing statement object");
591
+ return ADBC_STATUS_INVALID_ARGUMENT;
592
592
  }
593
-
594
- status = SetErrorMaybe(statement->private_data, error, "Invalid statement object");
595
- if (status != ADBC_STATUS_OK) {
596
- return status;
593
+ if (!error) {
594
+ SetError(error, "Missing error object");
595
+ return ADBC_STATUS_INVALID_ARGUMENT;
597
596
  }
598
-
599
597
  return ADBC_STATUS_OK;
600
598
  }
601
599
 
@@ -687,10 +685,6 @@ AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct ArrowArra
687
685
  AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth, const char *catalog,
688
686
  const char *db_schema, const char *table_name, const char **table_type,
689
687
  const char *column_name, struct ArrowArrayStream *out, struct AdbcError *error) {
690
- if (depth != 0) {
691
- SetError(error, "Depth parameter not yet supported");
692
- return ADBC_STATUS_NOT_IMPLEMENTED;
693
- }
694
688
  if (catalog != nullptr) {
695
689
  if (strcmp(catalog, "duckdb") == 0) {
696
690
  SetError(error, "catalog must be NULL or 'duckdb'");
@@ -702,16 +696,53 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
702
696
  SetError(error, "Table types parameter not yet supported");
703
697
  return ADBC_STATUS_NOT_IMPLEMENTED;
704
698
  }
699
+ std::string query;
700
+ switch (depth) {
701
+ case ADBC_OBJECT_DEPTH_CATALOGS:
702
+ SetError(error, "ADBC_OBJECT_DEPTH_CATALOGS not yet supported");
703
+ return ADBC_STATUS_NOT_IMPLEMENTED;
704
+ case ADBC_OBJECT_DEPTH_DB_SCHEMAS:
705
+ // Return metadata on catalogs and schemas.
706
+ query = duckdb::StringUtil::Format(R"(
707
+ SELECT table_schema db_schema_name
708
+ FROM information_schema.columns
709
+ WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' ;
710
+ )",
711
+ db_schema ? db_schema : "%", table_name ? table_name : "%",
712
+ column_name ? column_name : "%");
713
+ break;
714
+ case ADBC_OBJECT_DEPTH_TABLES:
715
+ // Return metadata on catalogs, schemas, and tables.
716
+ query = duckdb::StringUtil::Format(R"(
717
+ SELECT table_schema db_schema_name, LIST(table_schema_list) db_schema_tables
718
+ FROM (
719
+ SELECT table_schema, { table_name : table_name} table_schema_list
720
+ FROM information_schema.columns
721
+ WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' GROUP BY table_schema, table_name
722
+ ) GROUP BY table_schema;
723
+ )",
724
+ db_schema ? db_schema : "%", table_name ? table_name : "%",
725
+ column_name ? column_name : "%");
726
+ break;
727
+ case ADBC_OBJECT_DEPTH_COLUMNS:
728
+ // Return metadata on catalogs, schemas, tables, and columns.
729
+ query = duckdb::StringUtil::Format(R"(
730
+ SELECT table_schema db_schema_name, LIST(table_schema_list) db_schema_tables
731
+ FROM (
732
+ SELECT table_schema, { table_name : table_name, table_columns : LIST({column_name : column_name, ordinal_position : ordinal_position + 1, remarks : ''})} table_schema_list
733
+ FROM information_schema.columns
734
+ WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' GROUP BY table_schema, table_name
735
+ ) GROUP BY table_schema;
736
+ )",
737
+ db_schema ? db_schema : "%", table_name ? table_name : "%",
738
+ column_name ? column_name : "%");
739
+ break;
740
+ default:
741
+ SetError(error, "Invalid value of Depth");
742
+ return ADBC_STATUS_INVALID_ARGUMENT;
743
+ }
705
744
 
706
- auto q = duckdb::StringUtil::Format(R"(
707
- SELECT table_schema db_schema_name, LIST(table_schema_list) db_schema_tables FROM (
708
- SELECT table_schema, { table_name : table_name, table_columns : LIST({column_name : column_name, ordinal_position : ordinal_position + 1, remarks : ''})} table_schema_list FROM information_schema.columns WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' GROUP BY table_schema, table_name
709
- ) GROUP BY table_schema;
710
- )",
711
- db_schema ? db_schema : "%", table_name ? table_name : "%",
712
- column_name ? column_name : "%");
713
-
714
- return QueryInternal(connection, out, q.c_str(), error);
745
+ return QueryInternal(connection, out, query.c_str(), error);
715
746
  }
716
747
 
717
748
  AdbcStatusCode ConnectionGetTableTypes(struct AdbcConnection *connection, struct ArrowArrayStream *out,
@@ -304,7 +304,12 @@ AdbcStatusCode AdbcConnectionGetObjects(struct AdbcConnection *connection, int d
304
304
  const char *db_schema, const char *table_name, const char **table_types,
305
305
  const char *column_name, struct ArrowArrayStream *stream,
306
306
  struct AdbcError *error) {
307
- if (!connection->private_driver) {
307
+ if (!connection) {
308
+ SetError(error, "connection can't be null");
309
+ return ADBC_STATUS_INVALID_STATE;
310
+ }
311
+ if (!connection->private_data) {
312
+ SetError(error, "connection must be initialized");
308
313
  return ADBC_STATUS_INVALID_STATE;
309
314
  }
310
315
  return connection->private_driver->ConnectionGetObjects(connection, depth, catalog, db_schema, table_name,
@@ -474,7 +479,12 @@ AdbcStatusCode AdbcStatementNew(struct AdbcConnection *connection, struct AdbcSt
474
479
  }
475
480
 
476
481
  AdbcStatusCode AdbcStatementPrepare(struct AdbcStatement *statement, struct AdbcError *error) {
477
- if (!statement->private_driver) {
482
+ auto status = SetErrorMaybe(statement, error, "Missing statement object");
483
+ if (status != ADBC_STATUS_OK) {
484
+ return status;
485
+ }
486
+ status = SetErrorMaybe(statement->private_data, error, "Invalid statement object");
487
+ if (status != ADBC_STATUS_OK) {
478
488
  return ADBC_STATUS_INVALID_STATE;
479
489
  }
480
490
  return statement->private_driver->StatementPrepare(statement, error);
@@ -3872,6 +3872,8 @@ const char* EnumUtil::ToChars<PendingExecutionResult>(PendingExecutionResult val
3872
3872
  return "RESULT_NOT_READY";
3873
3873
  case PendingExecutionResult::EXECUTION_ERROR:
3874
3874
  return "EXECUTION_ERROR";
3875
+ case PendingExecutionResult::NO_TASKS_AVAILABLE:
3876
+ return "NO_TASKS_AVAILABLE";
3875
3877
  default:
3876
3878
  throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
3877
3879
  }
@@ -3888,6 +3890,9 @@ PendingExecutionResult EnumUtil::FromString<PendingExecutionResult>(const char *
3888
3890
  if (StringUtil::Equals(value, "EXECUTION_ERROR")) {
3889
3891
  return PendingExecutionResult::EXECUTION_ERROR;
3890
3892
  }
3893
+ if (StringUtil::Equals(value, "NO_TASKS_AVAILABLE")) {
3894
+ return PendingExecutionResult::NO_TASKS_AVAILABLE;
3895
+ }
3891
3896
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
3892
3897
  }
3893
3898
 
@@ -4,6 +4,8 @@
4
4
  #include "duckdb/common/types/row/row_data_collection.hpp"
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
6
 
7
+ #include <numeric>
8
+
7
9
  namespace duckdb {
8
10
 
9
11
  void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block_collection,
@@ -155,6 +157,31 @@ RowDataCollectionScanner::RowDataCollectionScanner(RowDataCollection &rows_p, Ro
155
157
  ValidateUnscannedBlock();
156
158
  }
157
159
 
160
+ RowDataCollectionScanner::RowDataCollectionScanner(RowDataCollection &rows_p, RowDataCollection &heap_p,
161
+ const RowLayout &layout_p, bool external_p, idx_t block_idx,
162
+ bool flush_p)
163
+ : rows(rows_p), heap(heap_p), layout(layout_p), read_state(*this), total_count(rows.count), total_scanned(0),
164
+ external(external_p), flush(flush_p), unswizzling(!layout.AllConstant() && external && !heap.keep_pinned) {
165
+
166
+ if (unswizzling) {
167
+ D_ASSERT(rows.blocks.size() == heap.blocks.size());
168
+ }
169
+
170
+ D_ASSERT(block_idx < rows.blocks.size());
171
+ read_state.block_idx = block_idx;
172
+ read_state.entry_idx = 0;
173
+
174
+ // Pretend that we have scanned up to the start block
175
+ // and will stop at the end
176
+ auto begin = rows.blocks.begin();
177
+ auto end = begin + block_idx;
178
+ total_scanned =
179
+ std::accumulate(begin, end, idx_t(0), [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
180
+ total_count = total_scanned + (*end)->count;
181
+
182
+ ValidateUnscannedBlock();
183
+ }
184
+
158
185
  void RowDataCollectionScanner::SwizzleBlock(RowDataBlock &data_block, RowDataBlock &heap_block) {
159
186
  // Pin the data block and swizzle the pointers within the rows
160
187
  D_ASSERT(!data_block.block->IsSwizzled());
@@ -190,7 +217,7 @@ void RowDataCollectionScanner::ReSwizzle() {
190
217
  }
191
218
 
192
219
  void RowDataCollectionScanner::ValidateUnscannedBlock() const {
193
- if (unswizzling && read_state.block_idx < rows.blocks.size()) {
220
+ if (unswizzling && read_state.block_idx < rows.blocks.size() && Remaining()) {
194
221
  D_ASSERT(rows.blocks[read_state.block_idx]->block->IsSwizzled());
195
222
  }
196
223
  }
@@ -202,6 +229,9 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
202
229
  return;
203
230
  }
204
231
 
232
+ // Only flush blocks we processed.
233
+ const auto flush_block_idx = read_state.block_idx;
234
+
205
235
  const idx_t &row_width = layout.GetRowWidth();
206
236
  // Set up a batch of pointers to scan data from
207
237
  idx_t scanned = 0;
@@ -227,6 +257,8 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
227
257
  }
228
258
  // Update state indices
229
259
  read_state.entry_idx += next;
260
+ scanned += next;
261
+ total_scanned += next;
230
262
  if (read_state.entry_idx == data_block->count) {
231
263
  // Pin completed blocks so we don't lose them
232
264
  pinned_blocks.emplace_back(rows.buffer_manager.Pin(data_block->block));
@@ -238,7 +270,6 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
238
270
  read_state.entry_idx = 0;
239
271
  ValidateUnscannedBlock();
240
272
  }
241
- scanned += next;
242
273
  }
243
274
  D_ASSERT(scanned == count);
244
275
  // Deserialize the payload data
@@ -248,14 +279,13 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
248
279
  }
249
280
  chunk.SetCardinality(count);
250
281
  chunk.Verify();
251
- total_scanned += scanned;
252
282
 
253
283
  // Switch to a new set of pinned blocks
254
284
  read_state.pinned_blocks.swap(pinned_blocks);
255
285
 
256
286
  if (flush) {
257
287
  // Release blocks we have passed.
258
- for (idx_t i = 0; i < read_state.block_idx; ++i) {
288
+ for (idx_t i = flush_block_idx; i < read_state.block_idx; ++i) {
259
289
  rows.blocks[i]->block = nullptr;
260
290
  if (unswizzling) {
261
291
  heap.blocks[i]->block = nullptr;
@@ -263,7 +293,7 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
263
293
  }
264
294
  } else if (unswizzling) {
265
295
  // Reswizzle blocks we have passed so they can be flushed safely.
266
- for (idx_t i = 0; i < read_state.block_idx; ++i) {
296
+ for (idx_t i = flush_block_idx; i < read_state.block_idx; ++i) {
267
297
  auto &data_block = rows.blocks[i];
268
298
  if (data_block->block && !data_block->block->IsSwizzled()) {
269
299
  SwizzleBlock(*data_block, *heap.blocks[i]);