duckdb 0.7.2-dev2366.0 → 0.7.2-dev2430.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  3. package/src/duckdb/src/common/file_buffer.cpp +8 -0
  4. package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
  5. package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
  6. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  7. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
  8. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
  9. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
  10. package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
  11. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  12. package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
  13. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
  14. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  15. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
  16. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
  17. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
  18. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
  19. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
  20. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
  21. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  22. package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
  23. package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
  24. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
  25. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
  27. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
  28. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
  29. package/src/duckdb/src/main/config.cpp +26 -0
  30. package/src/duckdb/src/main/settings/settings.cpp +31 -8
  31. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -5
  32. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
  33. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
  34. package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
  35. package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
  36. package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
  37. package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
  38. package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
  39. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  40. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
  41. package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev2366.0",
5
+ "version": "0.7.2-dev2430.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -61,6 +61,8 @@ string PhysicalOperatorToString(PhysicalOperatorType type) {
61
61
  return "PIECEWISE_MERGE_JOIN";
62
62
  case PhysicalOperatorType::IE_JOIN:
63
63
  return "IE_JOIN";
64
+ case PhysicalOperatorType::ASOF_JOIN:
65
+ return "ASOF_JOIN";
64
66
  case PhysicalOperatorType::CROSS_PRODUCT:
65
67
  return "CROSS_PRODUCT";
66
68
  case PhysicalOperatorType::POSITIONAL_JOIN:
@@ -97,4 +97,12 @@ void FileBuffer::Clear() {
97
97
  memset(internal_buffer, 0, internal_size);
98
98
  }
99
99
 
100
+ void FileBuffer::Initialize(DebugInitialize initialize) {
101
+ if (initialize == DebugInitialize::NO_INITIALIZE) {
102
+ return;
103
+ }
104
+ uint8_t value = initialize == DebugInitialize::DEBUG_ZERO_INITIALIZE ? 0 : 0xFF;
105
+ memset(internal_buffer, value, internal_size);
106
+ }
107
+
100
108
  } // namespace duckdb
@@ -60,6 +60,40 @@ idx_t RadixPartitioning::Select(Vector &hashes, const SelectionVector *sel, idx_
60
60
  return RadixBitsSwitch<SelectFunctor, idx_t>(radix_bits, hashes, sel, count, cutoff, true_sel, false_sel);
61
61
  }
62
62
 
63
+ struct HashsToBinsFunctor {
64
+ template <idx_t radix_bits>
65
+ static void Operation(Vector &hashes, Vector &bins, idx_t count) {
66
+ using CONSTANTS = RadixPartitioningConstants<radix_bits>;
67
+ UnaryExecutor::Execute<hash_t, hash_t>(hashes, bins, count,
68
+ [&](hash_t hash) { return CONSTANTS::ApplyMask(hash); });
69
+ }
70
+ };
71
+
72
+ void RadixPartitioning::HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count) {
73
+ return RadixBitsSwitch<HashsToBinsFunctor, void>(radix_bits, hashes, bins, count);
74
+ }
75
+
76
+ //===--------------------------------------------------------------------===//
77
+ // Row Data Partitioning
78
+ //===--------------------------------------------------------------------===//
79
+ template <idx_t radix_bits>
80
+ static void InitPartitions(BufferManager &buffer_manager, vector<unique_ptr<RowDataCollection>> &partition_collections,
81
+ RowDataBlock *partition_blocks[], vector<BufferHandle> &partition_handles,
82
+ data_ptr_t partition_ptrs[], idx_t block_capacity, idx_t row_width) {
83
+ using CONSTANTS = RadixPartitioningConstants<radix_bits>;
84
+
85
+ partition_collections.reserve(CONSTANTS::NUM_PARTITIONS);
86
+ partition_handles.reserve(CONSTANTS::NUM_PARTITIONS);
87
+ for (idx_t i = 0; i < CONSTANTS::NUM_PARTITIONS; i++) {
88
+ partition_collections.push_back(make_uniq<RowDataCollection>(buffer_manager, block_capacity, row_width));
89
+ partition_blocks[i] = &partition_collections[i]->CreateBlock();
90
+ partition_handles.push_back(buffer_manager.Pin(partition_blocks[i]->block));
91
+ if (partition_ptrs) {
92
+ partition_ptrs[i] = partition_handles[i].Ptr();
93
+ }
94
+ }
95
+ }
96
+
63
97
  struct ComputePartitionIndicesFunctor {
64
98
  template <idx_t radix_bits>
65
99
  static void Operation(Vector &hashes, Vector &partition_indices, idx_t count) {
@@ -18,15 +18,24 @@ PartitionGlobalHashGroup::PartitionGlobalHashGroup(BufferManager &buffer_manager
18
18
  global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
19
19
  global_sort->external = external;
20
20
 
21
+ // Set up a comparator for the partition subset
21
22
  partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
22
23
  }
23
24
 
25
+ int PartitionGlobalHashGroup::ComparePartitions(const SBIterator &left, const SBIterator &right) const {
26
+ int part_cmp = 0;
27
+ if (partition_layout.all_constant) {
28
+ part_cmp = FastMemcmp(left.entry_ptr, right.entry_ptr, partition_layout.comparison_size);
29
+ } else {
30
+ part_cmp = Comparators::CompareTuple(left.scan, right.scan, left.entry_ptr, right.entry_ptr, partition_layout,
31
+ left.external);
32
+ }
33
+ return part_cmp;
34
+ }
35
+
24
36
  void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask) {
25
37
  D_ASSERT(count > 0);
26
38
 
27
- // Set up a comparator for the partition subset
28
- const auto partition_size = partition_layout.comparison_size;
29
-
30
39
  SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
31
40
  SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
32
41
 
@@ -34,13 +43,8 @@ void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, Validi
34
43
  order_mask.SetValidUnsafe(0);
35
44
  for (++curr; curr.GetIndex() < count; ++curr) {
36
45
  // Compare the partition subset first because if that differs, then so does the full ordering
37
- int part_cmp = 0;
38
- if (partition_layout.all_constant) {
39
- part_cmp = FastMemcmp(prev.entry_ptr, curr.entry_ptr, partition_size);
40
- } else {
41
- part_cmp = Comparators::CompareTuple(prev.scan, curr.scan, prev.entry_ptr, curr.entry_ptr, partition_layout,
42
- prev.external);
43
- }
46
+ const auto part_cmp = ComparePartitions(prev, curr);
47
+ ;
44
48
 
45
49
  if (part_cmp) {
46
50
  partition_mask.SetValidUnsafe(curr.GetIndex());
@@ -52,31 +56,40 @@ void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, Validi
52
56
  }
53
57
  }
54
58
 
55
- PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
56
- const vector<unique_ptr<Expression>> &partitions_p,
57
- const vector<BoundOrderByNode> &orders_p, const Types &payload_types,
58
- const vector<unique_ptr<BaseStatistics>> &partitions_stats,
59
- idx_t estimated_cardinality)
60
- : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
61
- payload_types(payload_types), memory_per_thread(0), count(0) {
59
+ void PartitionGlobalSinkState::GenerateOrderings(Orders &partitions, Orders &orders,
60
+ const vector<unique_ptr<Expression>> &partition_bys,
61
+ const Orders &order_bys,
62
+ const vector<unique_ptr<BaseStatistics>> &partition_stats) {
62
63
 
63
64
  // we sort by both 1) partition by expression list and 2) order by expressions
64
- const auto partition_cols = partitions_p.size();
65
+ const auto partition_cols = partition_bys.size();
65
66
  for (idx_t prt_idx = 0; prt_idx < partition_cols; prt_idx++) {
66
- auto &pexpr = partitions_p[prt_idx];
67
+ auto &pexpr = partition_bys[prt_idx];
67
68
 
68
- if (partitions_stats.empty() || !partitions_stats[prt_idx]) {
69
+ if (partition_stats.empty() || !partition_stats[prt_idx]) {
69
70
  orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(), nullptr);
70
71
  } else {
71
72
  orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(),
72
- partitions_stats[prt_idx]->ToUnique());
73
+ partition_stats[prt_idx]->ToUnique());
73
74
  }
74
75
  partitions.emplace_back(orders.back().Copy());
75
76
  }
76
77
 
77
- for (const auto &order : orders_p) {
78
+ for (const auto &order : order_bys) {
78
79
  orders.emplace_back(order.Copy());
79
80
  }
81
+ }
82
+
83
+ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
84
+ const vector<unique_ptr<Expression>> &partition_bys,
85
+ const vector<BoundOrderByNode> &order_bys,
86
+ const Types &payload_types,
87
+ const vector<unique_ptr<BaseStatistics>> &partition_stats,
88
+ idx_t estimated_cardinality)
89
+ : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
90
+ payload_types(payload_types), memory_per_thread(0), count(0) {
91
+
92
+ GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
80
93
 
81
94
  memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
82
95
  external = ClientConfig::GetConfig(context).force_external;
@@ -337,7 +350,8 @@ void PartitionLocalSinkState::Combine() {
337
350
  gstate.CombineLocalPartition(local_partition, local_append);
338
351
  }
339
352
 
340
- PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data)
353
+ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data,
354
+ hash_t hash_bin)
341
355
  : sink(sink), group_data(std::move(group_data)), stage(PartitionSortStage::INIT), total_tasks(0), tasks_assigned(0),
342
356
  tasks_completed(0) {
343
357
 
@@ -348,6 +362,8 @@ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &s
348
362
 
349
363
  hash_group = sink.hash_groups[group_idx].get();
350
364
  global_sort = sink.hash_groups[group_idx]->global_sort.get();
365
+
366
+ sink.bin_groups[hash_bin] = group_idx;
351
367
  }
352
368
 
353
369
  void PartitionLocalMergeState::Prepare() {
@@ -445,10 +461,13 @@ bool PartitionGlobalMergeState::TryPrepareNextStage() {
445
461
 
446
462
  PartitionGlobalMergeStates::PartitionGlobalMergeStates(PartitionGlobalSinkState &sink) {
447
463
  // Schedule all the sorts for maximum thread utilisation
448
- for (auto &group_data : sink.grouping_data->GetPartitions()) {
464
+ auto &partitions = sink.grouping_data->GetPartitions();
465
+ sink.bin_groups.resize(partitions.size(), partitions.size());
466
+ for (hash_t hash_bin = 0; hash_bin < partitions.size(); ++hash_bin) {
467
+ auto &group_data = partitions[hash_bin];
449
468
  // Prepare for merge sort phase
450
469
  if (group_data->Count()) {
451
- auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data));
470
+ auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data), hash_bin);
452
471
  states.emplace_back(std::move(state));
453
472
  }
454
473
  }
@@ -542,103 +561,4 @@ void PartitionMergeEvent::Schedule() {
542
561
  SetTasks(std::move(merge_tasks));
543
562
  }
544
563
 
545
- PartitionLocalSourceState::PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p) : gstate(gstate_p) {
546
- const auto &input_types = gstate.payload_types;
547
- layout.Initialize(input_types);
548
- input_chunk.Initialize(gstate.allocator, input_types);
549
- }
550
-
551
- void PartitionLocalSourceState::MaterializeSortedData() {
552
- auto &global_sort_state = *hash_group->global_sort;
553
- if (global_sort_state.sorted_blocks.empty()) {
554
- return;
555
- }
556
-
557
- // scan the sorted row data
558
- D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
559
- auto &sb = *global_sort_state.sorted_blocks[0];
560
-
561
- // Free up some memory before allocating more
562
- sb.radix_sorting_data.clear();
563
- sb.blob_sorting_data = nullptr;
564
-
565
- // Move the sorting row blocks into our RDCs
566
- auto &buffer_manager = global_sort_state.buffer_manager;
567
- auto &sd = *sb.payload_data;
568
-
569
- // Data blocks are required
570
- D_ASSERT(!sd.data_blocks.empty());
571
- auto &block = sd.data_blocks[0];
572
- rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
573
- rows->blocks = std::move(sd.data_blocks);
574
- rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
575
- [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
576
-
577
- // Heap blocks are optional, but we want both for iteration.
578
- if (!sd.heap_blocks.empty()) {
579
- auto &block = sd.heap_blocks[0];
580
- heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
581
- heap->blocks = std::move(sd.heap_blocks);
582
- hash_group.reset();
583
- } else {
584
- heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
585
- }
586
- heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
587
- [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
588
- }
589
-
590
- idx_t PartitionLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
591
- // Get rid of any stale data
592
- hash_bin = hash_bin_p;
593
-
594
- // There are three types of partitions:
595
- // 1. No partition (no sorting)
596
- // 2. One partition (sorting, but no hashing)
597
- // 3. Multiple partitions (sorting and hashing)
598
-
599
- // How big is the partition?
600
- idx_t count = 0;
601
- if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
602
- count = gstate.hash_groups[hash_bin]->count;
603
- } else if (gstate.rows && !hash_bin) {
604
- count = gstate.count;
605
- } else {
606
- return count;
607
- }
608
-
609
- // Initialise masks to false
610
- const auto bit_count = ValidityMask::ValidityMaskSize(count);
611
- partition_bits.clear();
612
- partition_bits.resize(bit_count, 0);
613
- partition_mask.Initialize(partition_bits.data());
614
-
615
- order_bits.clear();
616
- order_bits.resize(bit_count, 0);
617
- order_mask.Initialize(order_bits.data());
618
-
619
- // Scan the sorted data into new Collections
620
- auto external = gstate.external;
621
- if (gstate.rows && !hash_bin) {
622
- // Simple mask
623
- partition_mask.SetValidUnsafe(0);
624
- order_mask.SetValidUnsafe(0);
625
- // No partition - align the heap blocks with the row blocks
626
- rows = gstate.rows->CloneEmpty(gstate.rows->keep_pinned);
627
- heap = gstate.strings->CloneEmpty(gstate.strings->keep_pinned);
628
- RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gstate.rows, *gstate.strings, layout);
629
- external = true;
630
- } else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
631
- // Overwrite the collections with the sorted data
632
- hash_group = std::move(gstate.hash_groups[hash_bin]);
633
- hash_group->ComputeMasks(partition_mask, order_mask);
634
- MaterializeSortedData();
635
- } else {
636
- return count;
637
- }
638
-
639
- scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
640
-
641
- return count;
642
- }
643
-
644
564
  } // namespace duckdb
@@ -367,7 +367,7 @@ int SBIterator::ComparisonValue(ExpressionType comparison) {
367
367
  }
368
368
 
369
369
  static idx_t GetBlockCountWithEmptyCheck(const GlobalSortState &gss) {
370
- D_ASSERT(gss.sorted_blocks.size() > 0);
370
+ D_ASSERT(!gss.sorted_blocks.empty());
371
371
  return gss.sorted_blocks[0]->radix_sorting_data.size();
372
372
  }
373
373
 
@@ -1093,14 +1093,26 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
1093
1093
  //===--------------------------------------------------------------------===//
1094
1094
  class WindowGlobalSourceState : public GlobalSourceState {
1095
1095
  public:
1096
- explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : partition_source(*gsink.global_partition) {
1096
+ explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : gsink(*gsink.global_partition), next_bin(0) {
1097
1097
  }
1098
1098
 
1099
- PartitionGlobalSourceState partition_source;
1099
+ PartitionGlobalSinkState &gsink;
1100
+ //! The output read position.
1101
+ atomic<idx_t> next_bin;
1100
1102
 
1101
1103
  public:
1102
1104
  idx_t MaxThreads() override {
1103
- return partition_source.MaxThreads();
1105
+ // If there is only one partition, we have to process it on one thread.
1106
+ if (!gsink.grouping_data) {
1107
+ return 1;
1108
+ }
1109
+
1110
+ // If there is not a lot of data, process serially.
1111
+ if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
1112
+ return 1;
1113
+ }
1114
+
1115
+ return gsink.hash_groups.size();
1104
1116
  }
1105
1117
  };
1106
1118
 
@@ -1112,7 +1124,7 @@ public:
1112
1124
  using WindowExecutors = vector<WindowExecutorPtr>;
1113
1125
 
1114
1126
  WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
1115
- : partition_source(gsource.partition_source.gsink), context(context.client), op(op_p) {
1127
+ : context(context.client), op(op_p), gsink(gsource.gsink) {
1116
1128
 
1117
1129
  vector<LogicalType> output_types;
1118
1130
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
@@ -1121,29 +1133,134 @@ public:
1121
1133
  output_types.emplace_back(wexpr.return_type);
1122
1134
  }
1123
1135
  output_chunk.Initialize(Allocator::Get(context.client), output_types);
1136
+
1137
+ const auto &input_types = gsink.payload_types;
1138
+ layout.Initialize(input_types);
1139
+ input_chunk.Initialize(gsink.allocator, input_types);
1124
1140
  }
1125
1141
 
1142
+ void MaterializeSortedData();
1126
1143
  void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
1127
1144
  void Scan(DataChunk &chunk);
1128
1145
 
1129
- PartitionLocalSourceState partition_source;
1146
+ HashGroupPtr hash_group;
1130
1147
  ClientContext &context;
1131
1148
  const PhysicalWindow &op;
1132
1149
 
1150
+ PartitionGlobalSinkState &gsink;
1151
+
1152
+ //! The generated input chunks
1153
+ unique_ptr<RowDataCollection> rows;
1154
+ unique_ptr<RowDataCollection> heap;
1155
+ RowLayout layout;
1156
+ //! The partition boundary mask
1157
+ vector<validity_t> partition_bits;
1158
+ ValidityMask partition_mask;
1159
+ //! The order boundary mask
1160
+ vector<validity_t> order_bits;
1161
+ ValidityMask order_mask;
1133
1162
  //! The current execution functions
1134
1163
  WindowExecutors window_execs;
1164
+
1165
+ //! The read partition
1166
+ idx_t hash_bin;
1167
+ //! The read cursor
1168
+ unique_ptr<RowDataCollectionScanner> scanner;
1169
+ //! Buffer for the inputs
1170
+ DataChunk input_chunk;
1135
1171
  //! Buffer for window results
1136
1172
  DataChunk output_chunk;
1137
1173
  };
1138
1174
 
1175
+ void WindowLocalSourceState::MaterializeSortedData() {
1176
+ auto &global_sort_state = *hash_group->global_sort;
1177
+ if (global_sort_state.sorted_blocks.empty()) {
1178
+ return;
1179
+ }
1180
+
1181
+ // scan the sorted row data
1182
+ D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
1183
+ auto &sb = *global_sort_state.sorted_blocks[0];
1184
+
1185
+ // Free up some memory before allocating more
1186
+ sb.radix_sorting_data.clear();
1187
+ sb.blob_sorting_data = nullptr;
1188
+
1189
+ // Move the sorting row blocks into our RDCs
1190
+ auto &buffer_manager = global_sort_state.buffer_manager;
1191
+ auto &sd = *sb.payload_data;
1192
+
1193
+ // Data blocks are required
1194
+ D_ASSERT(!sd.data_blocks.empty());
1195
+ auto &block = sd.data_blocks[0];
1196
+ rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
1197
+ rows->blocks = std::move(sd.data_blocks);
1198
+ rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
1199
+ [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
1200
+
1201
+ // Heap blocks are optional, but we want both for iteration.
1202
+ if (!sd.heap_blocks.empty()) {
1203
+ auto &block = sd.heap_blocks[0];
1204
+ heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
1205
+ heap->blocks = std::move(sd.heap_blocks);
1206
+ hash_group.reset();
1207
+ } else {
1208
+ heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
1209
+ }
1210
+ heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
1211
+ [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
1212
+ }
1213
+
1139
1214
  void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
1140
- const auto count = partition_source.GeneratePartition(hash_bin_p);
1141
- if (!count) {
1215
+ // Get rid of any stale data
1216
+ hash_bin = hash_bin_p;
1217
+
1218
+ // There are three types of partitions:
1219
+ // 1. No partition (no sorting)
1220
+ // 2. One partition (sorting, but no hashing)
1221
+ // 3. Multiple partitions (sorting and hashing)
1222
+
1223
+ // How big is the partition?
1224
+ idx_t count = 0;
1225
+ if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
1226
+ count = gsink.hash_groups[hash_bin]->count;
1227
+ } else if (gsink.rows && !hash_bin) {
1228
+ count = gsink.count;
1229
+ } else {
1230
+ return;
1231
+ }
1232
+
1233
+ // Initialise masks to false
1234
+ const auto bit_count = ValidityMask::ValidityMaskSize(count);
1235
+ partition_bits.clear();
1236
+ partition_bits.resize(bit_count, 0);
1237
+ partition_mask.Initialize(partition_bits.data());
1238
+
1239
+ order_bits.clear();
1240
+ order_bits.resize(bit_count, 0);
1241
+ order_mask.Initialize(order_bits.data());
1242
+
1243
+ // Scan the sorted data into new Collections
1244
+ auto external = gsink.external;
1245
+ if (gsink.rows && !hash_bin) {
1246
+ // Simple mask
1247
+ partition_mask.SetValidUnsafe(0);
1248
+ order_mask.SetValidUnsafe(0);
1249
+ // No partition - align the heap blocks with the row blocks
1250
+ rows = gsink.rows->CloneEmpty(gsink.rows->keep_pinned);
1251
+ heap = gsink.strings->CloneEmpty(gsink.strings->keep_pinned);
1252
+ RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gsink.rows, *gsink.strings, layout);
1253
+ external = true;
1254
+ } else if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
1255
+ // Overwrite the collections with the sorted data
1256
+ hash_group = std::move(gsink.hash_groups[hash_bin]);
1257
+ hash_group->ComputeMasks(partition_mask, order_mask);
1258
+ MaterializeSortedData();
1259
+ } else {
1142
1260
  return;
1143
1261
  }
1144
1262
 
1145
1263
  // Create the executors for each function
1146
- auto &partition_mask = partition_source.partition_mask;
1147
1264
  window_execs.clear();
1148
1265
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
1149
1266
  D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
@@ -1154,19 +1271,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1154
1271
 
1155
1272
  // First pass over the input without flushing
1156
1273
  // TODO: Factor out the constructor data as global state
1274
+ scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
1157
1275
  idx_t input_idx = 0;
1158
1276
  while (true) {
1159
- partition_source.input_chunk.Reset();
1160
- partition_source.scanner->Scan(partition_source.input_chunk);
1161
- if (partition_source.input_chunk.size() == 0) {
1277
+ input_chunk.Reset();
1278
+ scanner->Scan(input_chunk);
1279
+ if (input_chunk.size() == 0) {
1162
1280
  break;
1163
1281
  }
1164
1282
 
1165
1283
  // TODO: Parallelization opportunity
1166
1284
  for (auto &wexec : window_execs) {
1167
- wexec->Sink(partition_source.input_chunk, input_idx, partition_source.scanner->Count());
1285
+ wexec->Sink(input_chunk, input_idx, scanner->Count());
1168
1286
  }
1169
- input_idx += partition_source.input_chunk.size();
1287
+ input_idx += input_chunk.size();
1170
1288
  }
1171
1289
 
1172
1290
  // TODO: Parallelization opportunity
@@ -1175,25 +1293,22 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1175
1293
  }
1176
1294
 
1177
1295
  // External scanning assumes all blocks are swizzled.
1178
- partition_source.scanner->ReSwizzle();
1296
+ scanner->ReSwizzle();
1179
1297
 
1180
1298
  // Second pass can flush
1181
- partition_source.scanner->Reset(true);
1299
+ scanner->Reset(true);
1182
1300
  }
1183
1301
 
1184
1302
  void WindowLocalSourceState::Scan(DataChunk &result) {
1185
- D_ASSERT(partition_source.scanner);
1186
- if (!partition_source.scanner->Remaining()) {
1303
+ D_ASSERT(scanner);
1304
+ if (!scanner->Remaining()) {
1187
1305
  return;
1188
1306
  }
1189
1307
 
1190
- const auto position = partition_source.scanner->Scanned();
1191
- auto &input_chunk = partition_source.input_chunk;
1308
+ const auto position = scanner->Scanned();
1192
1309
  input_chunk.Reset();
1193
- partition_source.scanner->Scan(input_chunk);
1310
+ scanner->Scan(input_chunk);
1194
1311
 
1195
- auto &partition_mask = partition_source.partition_mask;
1196
- auto &order_mask = partition_source.order_mask;
1197
1312
  output_chunk.Reset();
1198
1313
  for (idx_t expr_idx = 0; expr_idx < window_execs.size(); ++expr_idx) {
1199
1314
  auto &executor = *window_execs[expr_idx];
@@ -1227,9 +1342,7 @@ unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext
1227
1342
  void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
1228
1343
  LocalSourceState &lstate_p) const {
1229
1344
  auto &lsource = lstate_p.Cast<WindowLocalSourceState>();
1230
- auto &lpsource = lsource.partition_source;
1231
1345
  auto &gsource = gstate_p.Cast<WindowGlobalSourceState>();
1232
- auto &gpsource = gsource.partition_source;
1233
1346
  auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
1234
1347
 
1235
1348
  auto &hash_groups = gsink.global_partition->hash_groups;
@@ -1237,17 +1350,17 @@ void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, Global
1237
1350
 
1238
1351
  while (chunk.size() == 0) {
1239
1352
  // Move to the next bin if we are done.
1240
- while (!lpsource.scanner || !lpsource.scanner->Remaining()) {
1241
- lpsource.scanner.reset();
1242
- lpsource.rows.reset();
1243
- lpsource.heap.reset();
1244
- lpsource.hash_group.reset();
1245
- auto hash_bin = gpsource.next_bin++;
1353
+ while (!lsource.scanner || !lsource.scanner->Remaining()) {
1354
+ lsource.scanner.reset();
1355
+ lsource.rows.reset();
1356
+ lsource.heap.reset();
1357
+ lsource.hash_group.reset();
1358
+ auto hash_bin = gsource.next_bin++;
1246
1359
  if (hash_bin >= bin_count) {
1247
1360
  return;
1248
1361
  }
1249
1362
 
1250
- for (; hash_bin < hash_groups.size(); hash_bin = gpsource.next_bin++) {
1363
+ for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
1251
1364
  if (hash_groups[hash_bin]) {
1252
1365
  break;
1253
1366
  }