duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
  5. package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
  6. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  7. package/src/duckdb/extension/json/json_serializer.cpp +11 -10
  8. package/src/duckdb/extension/json/serialize_json.cpp +44 -44
  9. package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
  10. package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
  11. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  12. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  13. package/src/duckdb/src/common/enum_util.cpp +5 -0
  14. package/src/duckdb/src/common/extra_type_info.cpp +2 -2
  15. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
  16. package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
  17. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  18. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  19. package/src/duckdb/src/common/types/value.cpp +33 -33
  20. package/src/duckdb/src/common/types/vector.cpp +20 -20
  21. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
  22. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
  23. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
  24. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
  25. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  26. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  27. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  28. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  29. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  30. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  31. package/src/duckdb/src/function/table/read_csv.cpp +4 -4
  32. package/src/duckdb/src/function/table/table_scan.cpp +14 -14
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  34. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
  38. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
  39. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
  40. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
  41. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
  42. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  43. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  44. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
  45. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  50. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  51. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  52. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  53. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  54. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  55. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  56. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  57. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  58. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  59. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  60. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  61. package/src/duckdb/src/include/duckdb.h +11 -1
  62. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  63. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  64. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  65. package/src/duckdb/src/main/relation.cpp +4 -4
  66. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  67. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  68. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  70. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  71. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  72. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  73. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  74. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  75. package/src/duckdb/src/parallel/executor.cpp +6 -0
  76. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  77. package/src/duckdb/src/parser/parser.cpp +18 -3
  78. package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
  79. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  80. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
  81. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
  82. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
  83. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
  84. package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
  85. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
  86. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
  87. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
  88. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
  89. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
  90. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
  91. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
  92. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
  93. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
  94. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
  95. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
  96. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
  97. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
  98. package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
  99. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  100. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -33,7 +33,7 @@ namespace duckdb {
33
33
  class WindowGlobalSinkState : public GlobalSinkState {
34
34
  public:
35
35
  WindowGlobalSinkState(const PhysicalWindow &op, ClientContext &context)
36
- : mode(DBConfig::GetConfig(context).options.window_mode) {
36
+ : op(op), mode(DBConfig::GetConfig(context).options.window_mode) {
37
37
 
38
38
  D_ASSERT(op.select_list[0]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
39
39
  auto &wexpr = op.select_list[0]->Cast<BoundWindowExpression>();
@@ -43,6 +43,7 @@ public:
43
43
  wexpr.partitions_stats, op.estimated_cardinality);
44
44
  }
45
45
 
46
+ const PhysicalWindow &op;
46
47
  unique_ptr<PartitionGlobalSinkState> global_partition;
47
48
  WindowAggregationMode mode;
48
49
  };
@@ -172,66 +173,93 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
172
173
  //===--------------------------------------------------------------------===//
173
174
  // Source
174
175
  //===--------------------------------------------------------------------===//
176
+ class WindowPartitionSourceState;
177
+
175
178
  class WindowGlobalSourceState : public GlobalSourceState {
176
179
  public:
177
- explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : gsink(*gsink.global_partition), next_bin(0) {
178
- }
180
+ using HashGroupSourcePtr = unique_ptr<WindowPartitionSourceState>;
181
+ using ScannerPtr = unique_ptr<RowDataCollectionScanner>;
182
+ using Task = std::pair<WindowPartitionSourceState *, ScannerPtr>;
183
+
184
+ WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p);
179
185
 
180
- PartitionGlobalSinkState &gsink;
181
- //! The output read position.
182
- atomic<idx_t> next_bin;
186
+ //! Get the next task
187
+ Task NextTask(idx_t hash_bin);
188
+
189
+ //! Context for executing computations
190
+ ClientContext &context;
191
+ //! All the sunk data
192
+ WindowGlobalSinkState &gsink;
193
+ //! The next group to build.
194
+ atomic<idx_t> next_build;
195
+ //! The built groups
196
+ vector<HashGroupSourcePtr> built;
197
+ //! Serialise access to the built hash groups
198
+ mutable mutex built_lock;
199
+ //! The number of unfinished tasks
200
+ atomic<idx_t> tasks_remaining;
183
201
 
184
202
  public:
185
203
  idx_t MaxThreads() override {
186
- // If there is only one partition, we have to process it on one thread.
187
- if (!gsink.grouping_data) {
188
- return 1;
189
- }
204
+ return tasks_remaining;
205
+ }
206
+
207
+ private:
208
+ Task CreateTask(idx_t hash_bin);
209
+ Task StealWork();
210
+ };
190
211
 
191
- // If there is not a lot of data, process serially.
192
- if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
193
- return 1;
212
+ WindowGlobalSourceState::WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p)
213
+ : context(context_p), gsink(gsink_p), next_build(0), tasks_remaining(0) {
214
+ auto &hash_groups = gsink.global_partition->hash_groups;
215
+
216
+ auto &gpart = gsink.global_partition;
217
+ if (hash_groups.empty()) {
218
+ // OVER()
219
+ built.resize(1);
220
+ if (gpart->rows) {
221
+ tasks_remaining += gpart->rows->blocks.size();
194
222
  }
223
+ } else {
224
+ built.resize(hash_groups.size());
225
+ for (auto &hash_group : hash_groups) {
226
+ if (!hash_group) {
227
+ continue;
228
+ }
229
+ auto &global_sort_state = *hash_group->global_sort;
230
+ if (global_sort_state.sorted_blocks.empty()) {
231
+ continue;
232
+ }
195
233
 
196
- return gsink.hash_groups.size();
234
+ D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
235
+ auto &sb = *global_sort_state.sorted_blocks[0];
236
+ auto &sd = *sb.payload_data;
237
+ tasks_remaining += sd.data_blocks.size();
238
+ }
197
239
  }
198
- };
240
+ }
199
241
 
200
- // Per-thread read state
201
- class WindowLocalSourceState : public LocalSourceState {
242
+ // Per-bin evaluation state (build and evaluate)
243
+ class WindowPartitionSourceState {
202
244
  public:
203
245
  using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
204
246
  using ExecutorPtr = unique_ptr<WindowExecutor>;
205
247
  using Executors = vector<ExecutorPtr>;
206
- using LocalStatePtr = unique_ptr<WindowExecutorState>;
207
- using LocalStates = vector<LocalStatePtr>;
208
-
209
- WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
210
- : context(context.client), op(op_p), gsink(gsource.gsink) {
211
-
212
- vector<LogicalType> output_types;
213
- for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
214
- D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
215
- auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
216
- output_types.emplace_back(wexpr.return_type);
217
- }
218
- output_chunk.Initialize(Allocator::Get(context.client), output_types);
219
248
 
220
- const auto &input_types = gsink.payload_types;
221
- layout.Initialize(input_types);
222
- input_chunk.Initialize(gsink.allocator, input_types);
249
+ WindowPartitionSourceState(ClientContext &context, WindowGlobalSourceState &gsource)
250
+ : context(context), op(gsource.gsink.op), gsource(gsource), read_block_idx(0), unscanned(0) {
251
+ layout.Initialize(gsource.gsink.global_partition->payload_types);
223
252
  }
224
253
 
254
+ unique_ptr<RowDataCollectionScanner> GetScanner() const;
225
255
  void MaterializeSortedData();
226
- void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
227
- void Scan(DataChunk &chunk);
256
+ void BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
228
257
 
229
- HashGroupPtr hash_group;
230
258
  ClientContext &context;
231
259
  const PhysicalWindow &op;
260
+ WindowGlobalSourceState &gsource;
232
261
 
233
- PartitionGlobalSinkState &gsink;
234
-
262
+ HashGroupPtr hash_group;
235
263
  //! The generated input chunks
236
264
  unique_ptr<RowDataCollection> rows;
237
265
  unique_ptr<RowDataCollection> heap;
@@ -242,21 +270,21 @@ public:
242
270
  //! The order boundary mask
243
271
  vector<validity_t> order_bits;
244
272
  ValidityMask order_mask;
273
+ //! External paging
274
+ bool external;
245
275
  //! The current execution functions
246
276
  Executors executors;
247
- LocalStates local_states;
248
277
 
249
- //! The read partition
278
+ //! The bin number
250
279
  idx_t hash_bin;
251
- //! The read cursor
252
- unique_ptr<RowDataCollectionScanner> scanner;
253
- //! Buffer for the inputs
254
- DataChunk input_chunk;
255
- //! Buffer for window results
256
- DataChunk output_chunk;
280
+
281
+ //! The next block to read.
282
+ mutable atomic<idx_t> read_block_idx;
283
+ //! The number of remaining unscanned blocks.
284
+ atomic<idx_t> unscanned;
257
285
  };
258
286
 
259
- void WindowLocalSourceState::MaterializeSortedData() {
287
+ void WindowPartitionSourceState::MaterializeSortedData() {
260
288
  auto &global_sort_state = *hash_group->global_sort;
261
289
  if (global_sort_state.sorted_blocks.empty()) {
262
290
  return;
@@ -295,7 +323,21 @@ void WindowLocalSourceState::MaterializeSortedData() {
295
323
  [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
296
324
  }
297
325
 
298
- void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
326
+ unique_ptr<RowDataCollectionScanner> WindowPartitionSourceState::GetScanner() const {
327
+ auto &gsink = *gsource.gsink.global_partition;
328
+ if ((gsink.rows && !hash_bin) || hash_bin < gsink.hash_groups.size()) {
329
+ const auto block_idx = read_block_idx++;
330
+ if (block_idx >= rows->blocks.size()) {
331
+ return nullptr;
332
+ }
333
+ // Second pass can flush
334
+ --gsource.tasks_remaining;
335
+ return make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, block_idx, true);
336
+ }
337
+ return nullptr;
338
+ }
339
+
340
+ void WindowPartitionSourceState::BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
299
341
  // Get rid of any stale data
300
342
  hash_bin = hash_bin_p;
301
343
 
@@ -305,11 +347,12 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
305
347
  // 3. Multiple partitions (sorting and hashing)
306
348
 
307
349
  // How big is the partition?
350
+ auto &gpart = *gsource.gsink.global_partition;
308
351
  idx_t count = 0;
309
- if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
310
- count = gsink.hash_groups[hash_bin]->count;
311
- } else if (gsink.rows && !hash_bin) {
312
- count = gsink.count;
352
+ if (hash_bin < gpart.hash_groups.size() && gpart.hash_groups[hash_bin]) {
353
+ count = gpart.hash_groups[hash_bin]->count;
354
+ } else if (gpart.rows && !hash_bin) {
355
+ count = gpart.count;
313
356
  } else {
314
357
  return;
315
358
  }
@@ -325,19 +368,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
325
368
  order_mask.Initialize(order_bits.data());
326
369
 
327
370
  // Scan the sorted data into new Collections
328
- auto external = gsink.external;
329
- if (gsink.rows && !hash_bin) {
371
+ external = gpart.external;
372
+ if (gpart.rows && !hash_bin) {
330
373
  // Simple mask
331
374
  partition_mask.SetValidUnsafe(0);
332
375
  order_mask.SetValidUnsafe(0);
333
376
  // No partition - align the heap blocks with the row blocks
334
- rows = gsink.rows->CloneEmpty(gsink.rows->keep_pinned);
335
- heap = gsink.strings->CloneEmpty(gsink.strings->keep_pinned);
336
- RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gsink.rows, *gsink.strings, layout);
377
+ rows = gpart.rows->CloneEmpty(gpart.rows->keep_pinned);
378
+ heap = gpart.strings->CloneEmpty(gpart.strings->keep_pinned);
379
+ RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gpart.rows, *gpart.strings, layout);
337
380
  external = true;
338
- } else if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
381
+ } else if (hash_bin < gpart.hash_groups.size()) {
339
382
  // Overwrite the collections with the sorted data
340
- hash_group = std::move(gsink.hash_groups[hash_bin]);
383
+ D_ASSERT(gpart.hash_groups[hash_bin].get());
384
+ hash_group = std::move(gpart.hash_groups[hash_bin]);
341
385
  hash_group->ComputeMasks(partition_mask, order_mask);
342
386
  external = hash_group->global_sort->external;
343
387
  MaterializeSortedData();
@@ -346,7 +390,6 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
346
390
  }
347
391
 
348
392
  // Create the executors for each function
349
- local_states.clear();
350
393
  executors.clear();
351
394
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
352
395
  D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
@@ -356,8 +399,9 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
356
399
  }
357
400
 
358
401
  // First pass over the input without flushing
359
- // TODO: Factor out the constructor data as global state
360
- scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
402
+ DataChunk input_chunk;
403
+ input_chunk.Initialize(gpart.allocator, gpart.payload_types);
404
+ auto scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
361
405
  idx_t input_idx = 0;
362
406
  while (true) {
363
407
  input_chunk.Reset();
@@ -376,30 +420,196 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
376
420
  // TODO: Parallelization opportunity
377
421
  for (auto &wexec : executors) {
378
422
  wexec->Finalize();
379
- local_states.emplace_back(wexec->GetExecutorState());
380
423
  }
381
424
 
382
425
  // External scanning assumes all blocks are swizzled.
383
426
  scanner->ReSwizzle();
384
427
 
385
- // Second pass can flush
386
- scanner->Reset(true);
428
+ // Start the block countdown
429
+ unscanned = rows->blocks.size();
430
+ }
431
+
432
+ // Per-thread scan state
433
+ class WindowLocalSourceState : public LocalSourceState {
434
+ public:
435
+ using ReadStatePtr = unique_ptr<WindowExecutorState>;
436
+ using ReadStates = vector<ReadStatePtr>;
437
+
438
+ explicit WindowLocalSourceState(WindowGlobalSourceState &gsource);
439
+ bool NextPartition();
440
+ void Scan(DataChunk &chunk);
441
+
442
+ //! The shared source state
443
+ WindowGlobalSourceState &gsource;
444
+ //! The current bin being processed
445
+ idx_t hash_bin;
446
+ //! The current source being processed
447
+ optional_ptr<WindowPartitionSourceState> partition_source;
448
+ //! The read cursor
449
+ unique_ptr<RowDataCollectionScanner> scanner;
450
+ //! Buffer for the inputs
451
+ DataChunk input_chunk;
452
+ //! Executor read states.
453
+ ReadStates read_states;
454
+ //! Buffer for window results
455
+ DataChunk output_chunk;
456
+ };
457
+
458
+ WindowLocalSourceState::WindowLocalSourceState(WindowGlobalSourceState &gsource)
459
+ : gsource(gsource), hash_bin(gsource.built.size()) {
460
+ auto &gsink = *gsource.gsink.global_partition;
461
+ auto &op = gsource.gsink.op;
462
+
463
+ input_chunk.Initialize(gsink.allocator, gsink.payload_types);
464
+
465
+ vector<LogicalType> output_types;
466
+ for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
467
+ D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
468
+ auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
469
+ output_types.emplace_back(wexpr.return_type);
470
+ }
471
+ output_chunk.Initialize(Allocator::Get(gsource.context), output_types);
472
+ }
473
+
474
+ WindowGlobalSourceState::Task WindowGlobalSourceState::CreateTask(idx_t hash_bin) {
475
+ // Build outside the lock so no one tries to steal before we are done.
476
+ auto partition_source = make_uniq<WindowPartitionSourceState>(context, *this);
477
+ partition_source->BuildPartition(gsink, hash_bin);
478
+ Task result(partition_source.get(), partition_source->GetScanner());
479
+
480
+ // Is there any data to scan?
481
+ if (result.second) {
482
+ lock_guard<mutex> built_guard(built_lock);
483
+ built[hash_bin] = std::move(partition_source);
484
+
485
+ return result;
486
+ }
487
+
488
+ return Task();
489
+ }
490
+
491
+ WindowGlobalSourceState::Task WindowGlobalSourceState::StealWork() {
492
+ for (idx_t hash_bin = 0; hash_bin < built.size(); ++hash_bin) {
493
+ lock_guard<mutex> built_guard(built_lock);
494
+ auto &partition_source = built[hash_bin];
495
+ if (!partition_source) {
496
+ continue;
497
+ }
498
+
499
+ Task result(partition_source.get(), partition_source->GetScanner());
500
+
501
+ // Is there any data to scan?
502
+ if (result.second) {
503
+ return result;
504
+ }
505
+ }
506
+
507
+ // Nothing to steal
508
+ return Task();
509
+ }
510
+
511
+ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin) {
512
+ auto &hash_groups = gsink.global_partition->hash_groups;
513
+ const auto bin_count = built.size();
514
+
515
+ // Flush unneeded data
516
+ if (hash_bin < bin_count) {
517
+ // Lock and delete when all blocks have been scanned
518
+ // We do this here instead of in NextScan so the WindowLocalSourceState
519
+ // has a chance to delete its state objects first,
520
+ // which may reference the partition_source
521
+
522
+ // Delete data outside the lock in case it is slow
523
+ HashGroupSourcePtr killed;
524
+ lock_guard<mutex> built_guard(built_lock);
525
+ auto &partition_source = built[hash_bin];
526
+ if (partition_source && !partition_source->unscanned) {
527
+ killed = std::move(partition_source);
528
+ }
529
+ }
530
+
531
+ hash_bin = next_build++;
532
+ if (hash_bin < bin_count) {
533
+ // Find a non-empty hash group.
534
+ for (; hash_bin < hash_groups.size(); hash_bin = next_build++) {
535
+ if (hash_groups[hash_bin]) {
536
+ auto result = CreateTask(hash_bin);
537
+ if (result.second) {
538
+ return result;
539
+ }
540
+ }
541
+ }
542
+
543
+ // OVER() doesn't have a hash_group
544
+ if (hash_groups.empty()) {
545
+ auto result = CreateTask(hash_bin);
546
+ if (result.second) {
547
+ return result;
548
+ }
549
+ }
550
+ }
551
+
552
+ // Work stealing
553
+ while (tasks_remaining) {
554
+ auto result = StealWork();
555
+ if (result.second) {
556
+ return result;
557
+ }
558
+
559
+ // If there is nothing to steal but there are unfinished partitions,
560
+ // yield until any pending builds are done.
561
+ TaskScheduler::GetScheduler(context).YieldThread();
562
+ }
563
+
564
+ return Task();
565
+ }
566
+
567
+ bool WindowLocalSourceState::NextPartition() {
568
+ // Release old states before the source
569
+ scanner.reset();
570
+ read_states.clear();
571
+
572
+ // Get a partition_source that is not finished
573
+ while (!scanner) {
574
+ auto task = gsource.NextTask(hash_bin);
575
+ if (!task.first) {
576
+ return false;
577
+ }
578
+ partition_source = task.first;
579
+ scanner = std::move(task.second);
580
+ hash_bin = partition_source->hash_bin;
581
+ }
582
+
583
+ for (auto &wexec : partition_source->executors) {
584
+ read_states.emplace_back(wexec->GetExecutorState());
585
+ }
586
+
587
+ return true;
387
588
  }
388
589
 
389
590
  void WindowLocalSourceState::Scan(DataChunk &result) {
390
591
  D_ASSERT(scanner);
391
592
  if (!scanner->Remaining()) {
392
- return;
593
+ lock_guard<mutex> built_guard(gsource.built_lock);
594
+ --partition_source->unscanned;
595
+ scanner = partition_source->GetScanner();
596
+
597
+ if (!scanner) {
598
+ partition_source = nullptr;
599
+ read_states.clear();
600
+ return;
601
+ }
393
602
  }
394
603
 
395
604
  const auto position = scanner->Scanned();
396
605
  input_chunk.Reset();
397
606
  scanner->Scan(input_chunk);
398
607
 
608
+ auto &executors = partition_source->executors;
399
609
  output_chunk.Reset();
400
610
  for (idx_t expr_idx = 0; expr_idx < executors.size(); ++expr_idx) {
401
611
  auto &executor = *executors[expr_idx];
402
- auto &lstate = *local_states[expr_idx];
612
+ auto &lstate = *read_states[expr_idx];
403
613
  auto &result = output_chunk.data[expr_idx];
404
614
  executor.Evaluate(position, input_chunk, result, lstate);
405
615
  }
@@ -418,43 +628,25 @@ void WindowLocalSourceState::Scan(DataChunk &result) {
418
628
  }
419
629
 
420
630
  unique_ptr<LocalSourceState> PhysicalWindow::GetLocalSourceState(ExecutionContext &context,
421
- GlobalSourceState &gstate_p) const {
422
- auto &gstate = gstate_p.Cast<WindowGlobalSourceState>();
423
- return make_uniq<WindowLocalSourceState>(*this, context, gstate);
631
+ GlobalSourceState &gsource_p) const {
632
+ auto &gsource = gsource_p.Cast<WindowGlobalSourceState>();
633
+ return make_uniq<WindowLocalSourceState>(gsource);
424
634
  }
425
635
 
426
636
  unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext &context) const {
427
637
  auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
428
- return make_uniq<WindowGlobalSourceState>(gsink);
638
+ return make_uniq<WindowGlobalSourceState>(context, gsink);
429
639
  }
430
640
 
431
641
  SourceResultType PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk,
432
642
  OperatorSourceInput &input) const {
433
643
  auto &lsource = input.local_state.Cast<WindowLocalSourceState>();
434
- auto &gsource = input.global_state.Cast<WindowGlobalSourceState>();
435
- auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
436
-
437
- auto &hash_groups = gsink.global_partition->hash_groups;
438
- const auto bin_count = hash_groups.empty() ? 1 : hash_groups.size();
439
-
440
644
  while (chunk.size() == 0) {
441
645
  // Move to the next bin if we are done.
442
- while (!lsource.scanner || !lsource.scanner->Remaining()) {
443
- lsource.scanner.reset();
444
- lsource.rows.reset();
445
- lsource.heap.reset();
446
- lsource.hash_group.reset();
447
- auto hash_bin = gsource.next_bin++;
448
- if (hash_bin >= bin_count) {
646
+ while (!lsource.scanner) {
647
+ if (!lsource.NextPartition()) {
449
648
  return chunk.size() > 0 ? SourceResultType::HAVE_MORE_OUTPUT : SourceResultType::FINISHED;
450
649
  }
451
-
452
- for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
453
- if (hash_groups[hash_bin]) {
454
- break;
455
- }
456
- }
457
- lsource.GeneratePartition(gsink, hash_bin);
458
650
  }
459
651
 
460
652
  lsource.Scan(chunk);
@@ -55,7 +55,7 @@ OperatorResultType PhysicalFilter::ExecuteInternal(ExecutionContext &context, Da
55
55
  string PhysicalFilter::ParamsToString() const {
56
56
  auto result = expression->GetName();
57
57
  result += "\n[INFOSEPARATOR]\n";
58
- result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
58
+ result += StringUtil::Format("EC: %llu", estimated_cardinality);
59
59
  return result;
60
60
  }
61
61
 
@@ -31,8 +31,7 @@ string PhysicalComparisonJoin::ParamsToString() const {
31
31
  extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n";
32
32
  }
33
33
  extra_info += "\n[INFOSEPARATOR]\n";
34
- extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
35
- extra_info += StringUtil::Format("Cost: %llu", estimated_props->GetCost<idx_t>());
34
+ extra_info += StringUtil::Format("EC: %llu\n", estimated_cardinality);
36
35
  return extra_info;
37
36
  }
38
37
 
@@ -145,7 +145,7 @@ string PhysicalTableScan::ParamsToString() const {
145
145
  result += "File Filters: " + extra_info.file_filters;
146
146
  }
147
147
  result += "\n[INFOSEPARATOR]\n";
148
- result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
148
+ result += StringUtil::Format("EC: %llu", estimated_cardinality);
149
149
  return result;
150
150
  }
151
151
 
@@ -225,12 +225,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalOperator &
225
225
  throw InternalException("Physical plan generator - no plan generated");
226
226
  }
227
227
 
228
- if (op.estimated_props) {
229
- plan->estimated_cardinality = op.estimated_props->GetCardinality<idx_t>();
230
- plan->estimated_props = op.estimated_props->Copy();
231
- } else {
232
- plan->estimated_props = make_uniq<EstimatedProperties>();
233
- }
228
+ plan->estimated_cardinality = op.estimated_cardinality;
234
229
 
235
230
  return plan;
236
231
  }
@@ -315,6 +315,7 @@ struct WindowBoundariesState {
315
315
  const bool has_following_range;
316
316
  const bool needs_peer;
317
317
 
318
+ idx_t next_pos = 0;
318
319
  idx_t partition_start = 0;
319
320
  idx_t partition_end = 0;
320
321
  idx_t peer_start = 0;
@@ -339,12 +340,19 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
339
340
  // determine partition and peer group boundaries to ultimately figure out window size
340
341
  const auto is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
341
342
  const auto is_peer = !order_mask.RowIsValidUnsafe(row_idx);
343
+ const auto is_jump = (next_pos != row_idx);
342
344
 
343
345
  // when the partition changes, recompute the boundaries
344
- if (!is_same_partition) {
346
+ if (!is_same_partition || is_jump) {
345
347
  partition_start = row_idx;
346
348
  peer_start = row_idx;
347
349
 
350
+ if (is_jump) {
351
+ // Go back as far as the previous partition start
352
+ idx_t n = 1;
353
+ partition_start = FindPrevStart(partition_mask, partition_start, row_idx + 1, n);
354
+ }
355
+
348
356
  // find end of partition
349
357
  partition_end = input_size;
350
358
  if (partition_count) {
@@ -393,6 +401,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
393
401
  partition_end = input_size;
394
402
  peer_end = partition_end;
395
403
  }
404
+ next_pos = row_idx + 1;
396
405
 
397
406
  // determine window boundaries depending on the type of expression
398
407
  window_start = -1;
@@ -1240,14 +1240,14 @@ static unique_ptr<FunctionData> CSVReaderDeserialize(PlanDeserializationState &s
1240
1240
  static void CSVReaderFormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
1241
1241
  const TableFunction &function) {
1242
1242
  auto &bind_data = bind_data_p->Cast<ReadCSVData>();
1243
- serializer.WriteProperty("extra_info", function.extra_info);
1244
- serializer.WriteProperty("csv_data", bind_data);
1243
+ serializer.WriteProperty(100, "extra_info", function.extra_info);
1244
+ serializer.WriteProperty(101, "csv_data", bind_data);
1245
1245
  }
1246
1246
 
1247
1247
  static unique_ptr<FunctionData> CSVReaderFormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
1248
1248
  unique_ptr<ReadCSVData> result;
1249
- deserializer.ReadProperty("extra_info", function.extra_info);
1250
- deserializer.ReadProperty("csv_data", result);
1249
+ deserializer.ReadProperty(100, "extra_info", function.extra_info);
1250
+ deserializer.ReadProperty(101, "csv_data", result);
1251
1251
  return std::move(result);
1252
1252
  }
1253
1253
 
@@ -452,29 +452,29 @@ static unique_ptr<FunctionData> TableScanDeserialize(PlanDeserializationState &s
452
452
  static void TableScanFormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
453
453
  const TableFunction &function) {
454
454
  auto &bind_data = bind_data_p->Cast<TableScanBindData>();
455
- serializer.WriteProperty("catalog", bind_data.table.schema.catalog.GetName());
456
- serializer.WriteProperty("schema", bind_data.table.schema.name);
457
- serializer.WriteProperty("table", bind_data.table.name);
458
- serializer.WriteProperty("is_index_scan", bind_data.is_index_scan);
459
- serializer.WriteProperty("is_create_index", bind_data.is_create_index);
460
- serializer.WriteProperty("result_ids", bind_data.result_ids);
461
- serializer.WriteProperty("result_ids", bind_data.result_ids);
455
+ serializer.WriteProperty(100, "catalog", bind_data.table.schema.catalog.GetName());
456
+ serializer.WriteProperty(101, "schema", bind_data.table.schema.name);
457
+ serializer.WriteProperty(102, "table", bind_data.table.name);
458
+ serializer.WriteProperty(103, "is_index_scan", bind_data.is_index_scan);
459
+ serializer.WriteProperty(104, "is_create_index", bind_data.is_create_index);
460
+ serializer.WriteProperty(105, "result_ids", bind_data.result_ids);
461
+ serializer.WriteProperty(106, "result_ids", bind_data.result_ids);
462
462
  }
463
463
 
464
464
  static unique_ptr<FunctionData> TableScanFormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
465
- auto catalog = deserializer.ReadProperty<string>("catalog");
466
- auto schema = deserializer.ReadProperty<string>("schema");
467
- auto table = deserializer.ReadProperty<string>("table");
465
+ auto catalog = deserializer.ReadProperty<string>(100, "catalog");
466
+ auto schema = deserializer.ReadProperty<string>(101, "schema");
467
+ auto table = deserializer.ReadProperty<string>(102, "table");
468
468
  auto &catalog_entry =
469
469
  Catalog::GetEntry<TableCatalogEntry>(deserializer.Get<ClientContext &>(), catalog, schema, table);
470
470
  if (catalog_entry.type != CatalogType::TABLE_ENTRY) {
471
471
  throw SerializationException("Cant find table for %s.%s", schema, table);
472
472
  }
473
473
  auto result = make_uniq<TableScanBindData>(catalog_entry.Cast<DuckTableEntry>());
474
- deserializer.ReadProperty("is_index_scan", result->is_index_scan);
475
- deserializer.ReadProperty("is_create_index", result->is_create_index);
476
- deserializer.ReadProperty("result_ids", result->result_ids);
477
- deserializer.ReadProperty("result_ids", result->result_ids);
474
+ deserializer.ReadProperty(103, "is_index_scan", result->is_index_scan);
475
+ deserializer.ReadProperty(104, "is_create_index", result->is_create_index);
476
+ deserializer.ReadProperty(105, "result_ids", result->result_ids);
477
+ deserializer.ReadProperty(106, "result_ids", result->result_ids);
478
478
  return std::move(result);
479
479
  }
480
480