duckdb 0.7.2-dev2366.0 → 0.7.2-dev2430.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/file_buffer.cpp +8 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
- package/src/duckdb/src/main/config.cpp +26 -0
- package/src/duckdb/src/main/settings/settings.cpp +31 -8
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -5
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
- package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
- package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
- package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
- package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
- package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
package/package.json
CHANGED
@@ -61,6 +61,8 @@ string PhysicalOperatorToString(PhysicalOperatorType type) {
|
|
61
61
|
return "PIECEWISE_MERGE_JOIN";
|
62
62
|
case PhysicalOperatorType::IE_JOIN:
|
63
63
|
return "IE_JOIN";
|
64
|
+
case PhysicalOperatorType::ASOF_JOIN:
|
65
|
+
return "ASOF_JOIN";
|
64
66
|
case PhysicalOperatorType::CROSS_PRODUCT:
|
65
67
|
return "CROSS_PRODUCT";
|
66
68
|
case PhysicalOperatorType::POSITIONAL_JOIN:
|
@@ -97,4 +97,12 @@ void FileBuffer::Clear() {
|
|
97
97
|
memset(internal_buffer, 0, internal_size);
|
98
98
|
}
|
99
99
|
|
100
|
+
void FileBuffer::Initialize(DebugInitialize initialize) {
|
101
|
+
if (initialize == DebugInitialize::NO_INITIALIZE) {
|
102
|
+
return;
|
103
|
+
}
|
104
|
+
uint8_t value = initialize == DebugInitialize::DEBUG_ZERO_INITIALIZE ? 0 : 0xFF;
|
105
|
+
memset(internal_buffer, value, internal_size);
|
106
|
+
}
|
107
|
+
|
100
108
|
} // namespace duckdb
|
@@ -60,6 +60,40 @@ idx_t RadixPartitioning::Select(Vector &hashes, const SelectionVector *sel, idx_
|
|
60
60
|
return RadixBitsSwitch<SelectFunctor, idx_t>(radix_bits, hashes, sel, count, cutoff, true_sel, false_sel);
|
61
61
|
}
|
62
62
|
|
63
|
+
struct HashsToBinsFunctor {
|
64
|
+
template <idx_t radix_bits>
|
65
|
+
static void Operation(Vector &hashes, Vector &bins, idx_t count) {
|
66
|
+
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
|
67
|
+
UnaryExecutor::Execute<hash_t, hash_t>(hashes, bins, count,
|
68
|
+
[&](hash_t hash) { return CONSTANTS::ApplyMask(hash); });
|
69
|
+
}
|
70
|
+
};
|
71
|
+
|
72
|
+
void RadixPartitioning::HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count) {
|
73
|
+
return RadixBitsSwitch<HashsToBinsFunctor, void>(radix_bits, hashes, bins, count);
|
74
|
+
}
|
75
|
+
|
76
|
+
//===--------------------------------------------------------------------===//
|
77
|
+
// Row Data Partitioning
|
78
|
+
//===--------------------------------------------------------------------===//
|
79
|
+
template <idx_t radix_bits>
|
80
|
+
static void InitPartitions(BufferManager &buffer_manager, vector<unique_ptr<RowDataCollection>> &partition_collections,
|
81
|
+
RowDataBlock *partition_blocks[], vector<BufferHandle> &partition_handles,
|
82
|
+
data_ptr_t partition_ptrs[], idx_t block_capacity, idx_t row_width) {
|
83
|
+
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
|
84
|
+
|
85
|
+
partition_collections.reserve(CONSTANTS::NUM_PARTITIONS);
|
86
|
+
partition_handles.reserve(CONSTANTS::NUM_PARTITIONS);
|
87
|
+
for (idx_t i = 0; i < CONSTANTS::NUM_PARTITIONS; i++) {
|
88
|
+
partition_collections.push_back(make_uniq<RowDataCollection>(buffer_manager, block_capacity, row_width));
|
89
|
+
partition_blocks[i] = &partition_collections[i]->CreateBlock();
|
90
|
+
partition_handles.push_back(buffer_manager.Pin(partition_blocks[i]->block));
|
91
|
+
if (partition_ptrs) {
|
92
|
+
partition_ptrs[i] = partition_handles[i].Ptr();
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
63
97
|
struct ComputePartitionIndicesFunctor {
|
64
98
|
template <idx_t radix_bits>
|
65
99
|
static void Operation(Vector &hashes, Vector &partition_indices, idx_t count) {
|
@@ -18,15 +18,24 @@ PartitionGlobalHashGroup::PartitionGlobalHashGroup(BufferManager &buffer_manager
|
|
18
18
|
global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
|
19
19
|
global_sort->external = external;
|
20
20
|
|
21
|
+
// Set up a comparator for the partition subset
|
21
22
|
partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
|
22
23
|
}
|
23
24
|
|
25
|
+
int PartitionGlobalHashGroup::ComparePartitions(const SBIterator &left, const SBIterator &right) const {
|
26
|
+
int part_cmp = 0;
|
27
|
+
if (partition_layout.all_constant) {
|
28
|
+
part_cmp = FastMemcmp(left.entry_ptr, right.entry_ptr, partition_layout.comparison_size);
|
29
|
+
} else {
|
30
|
+
part_cmp = Comparators::CompareTuple(left.scan, right.scan, left.entry_ptr, right.entry_ptr, partition_layout,
|
31
|
+
left.external);
|
32
|
+
}
|
33
|
+
return part_cmp;
|
34
|
+
}
|
35
|
+
|
24
36
|
void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask) {
|
25
37
|
D_ASSERT(count > 0);
|
26
38
|
|
27
|
-
// Set up a comparator for the partition subset
|
28
|
-
const auto partition_size = partition_layout.comparison_size;
|
29
|
-
|
30
39
|
SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
31
40
|
SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
32
41
|
|
@@ -34,13 +43,8 @@ void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, Validi
|
|
34
43
|
order_mask.SetValidUnsafe(0);
|
35
44
|
for (++curr; curr.GetIndex() < count; ++curr) {
|
36
45
|
// Compare the partition subset first because if that differs, then so does the full ordering
|
37
|
-
|
38
|
-
|
39
|
-
part_cmp = FastMemcmp(prev.entry_ptr, curr.entry_ptr, partition_size);
|
40
|
-
} else {
|
41
|
-
part_cmp = Comparators::CompareTuple(prev.scan, curr.scan, prev.entry_ptr, curr.entry_ptr, partition_layout,
|
42
|
-
prev.external);
|
43
|
-
}
|
46
|
+
const auto part_cmp = ComparePartitions(prev, curr);
|
47
|
+
;
|
44
48
|
|
45
49
|
if (part_cmp) {
|
46
50
|
partition_mask.SetValidUnsafe(curr.GetIndex());
|
@@ -52,31 +56,40 @@ void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, Validi
|
|
52
56
|
}
|
53
57
|
}
|
54
58
|
|
55
|
-
PartitionGlobalSinkState::
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
idx_t estimated_cardinality)
|
60
|
-
: context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
|
61
|
-
payload_types(payload_types), memory_per_thread(0), count(0) {
|
59
|
+
void PartitionGlobalSinkState::GenerateOrderings(Orders &partitions, Orders &orders,
|
60
|
+
const vector<unique_ptr<Expression>> &partition_bys,
|
61
|
+
const Orders &order_bys,
|
62
|
+
const vector<unique_ptr<BaseStatistics>> &partition_stats) {
|
62
63
|
|
63
64
|
// we sort by both 1) partition by expression list and 2) order by expressions
|
64
|
-
const auto partition_cols =
|
65
|
+
const auto partition_cols = partition_bys.size();
|
65
66
|
for (idx_t prt_idx = 0; prt_idx < partition_cols; prt_idx++) {
|
66
|
-
auto &pexpr =
|
67
|
+
auto &pexpr = partition_bys[prt_idx];
|
67
68
|
|
68
|
-
if (
|
69
|
+
if (partition_stats.empty() || !partition_stats[prt_idx]) {
|
69
70
|
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(), nullptr);
|
70
71
|
} else {
|
71
72
|
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(),
|
72
|
-
|
73
|
+
partition_stats[prt_idx]->ToUnique());
|
73
74
|
}
|
74
75
|
partitions.emplace_back(orders.back().Copy());
|
75
76
|
}
|
76
77
|
|
77
|
-
for (const auto &order :
|
78
|
+
for (const auto &order : order_bys) {
|
78
79
|
orders.emplace_back(order.Copy());
|
79
80
|
}
|
81
|
+
}
|
82
|
+
|
83
|
+
PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
|
84
|
+
const vector<unique_ptr<Expression>> &partition_bys,
|
85
|
+
const vector<BoundOrderByNode> &order_bys,
|
86
|
+
const Types &payload_types,
|
87
|
+
const vector<unique_ptr<BaseStatistics>> &partition_stats,
|
88
|
+
idx_t estimated_cardinality)
|
89
|
+
: context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
|
90
|
+
payload_types(payload_types), memory_per_thread(0), count(0) {
|
91
|
+
|
92
|
+
GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
|
80
93
|
|
81
94
|
memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
|
82
95
|
external = ClientConfig::GetConfig(context).force_external;
|
@@ -337,7 +350,8 @@ void PartitionLocalSinkState::Combine() {
|
|
337
350
|
gstate.CombineLocalPartition(local_partition, local_append);
|
338
351
|
}
|
339
352
|
|
340
|
-
PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data
|
353
|
+
PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data,
|
354
|
+
hash_t hash_bin)
|
341
355
|
: sink(sink), group_data(std::move(group_data)), stage(PartitionSortStage::INIT), total_tasks(0), tasks_assigned(0),
|
342
356
|
tasks_completed(0) {
|
343
357
|
|
@@ -348,6 +362,8 @@ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &s
|
|
348
362
|
|
349
363
|
hash_group = sink.hash_groups[group_idx].get();
|
350
364
|
global_sort = sink.hash_groups[group_idx]->global_sort.get();
|
365
|
+
|
366
|
+
sink.bin_groups[hash_bin] = group_idx;
|
351
367
|
}
|
352
368
|
|
353
369
|
void PartitionLocalMergeState::Prepare() {
|
@@ -445,10 +461,13 @@ bool PartitionGlobalMergeState::TryPrepareNextStage() {
|
|
445
461
|
|
446
462
|
PartitionGlobalMergeStates::PartitionGlobalMergeStates(PartitionGlobalSinkState &sink) {
|
447
463
|
// Schedule all the sorts for maximum thread utilisation
|
448
|
-
|
464
|
+
auto &partitions = sink.grouping_data->GetPartitions();
|
465
|
+
sink.bin_groups.resize(partitions.size(), partitions.size());
|
466
|
+
for (hash_t hash_bin = 0; hash_bin < partitions.size(); ++hash_bin) {
|
467
|
+
auto &group_data = partitions[hash_bin];
|
449
468
|
// Prepare for merge sort phase
|
450
469
|
if (group_data->Count()) {
|
451
|
-
auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data));
|
470
|
+
auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data), hash_bin);
|
452
471
|
states.emplace_back(std::move(state));
|
453
472
|
}
|
454
473
|
}
|
@@ -542,103 +561,4 @@ void PartitionMergeEvent::Schedule() {
|
|
542
561
|
SetTasks(std::move(merge_tasks));
|
543
562
|
}
|
544
563
|
|
545
|
-
PartitionLocalSourceState::PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p) : gstate(gstate_p) {
|
546
|
-
const auto &input_types = gstate.payload_types;
|
547
|
-
layout.Initialize(input_types);
|
548
|
-
input_chunk.Initialize(gstate.allocator, input_types);
|
549
|
-
}
|
550
|
-
|
551
|
-
void PartitionLocalSourceState::MaterializeSortedData() {
|
552
|
-
auto &global_sort_state = *hash_group->global_sort;
|
553
|
-
if (global_sort_state.sorted_blocks.empty()) {
|
554
|
-
return;
|
555
|
-
}
|
556
|
-
|
557
|
-
// scan the sorted row data
|
558
|
-
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
559
|
-
auto &sb = *global_sort_state.sorted_blocks[0];
|
560
|
-
|
561
|
-
// Free up some memory before allocating more
|
562
|
-
sb.radix_sorting_data.clear();
|
563
|
-
sb.blob_sorting_data = nullptr;
|
564
|
-
|
565
|
-
// Move the sorting row blocks into our RDCs
|
566
|
-
auto &buffer_manager = global_sort_state.buffer_manager;
|
567
|
-
auto &sd = *sb.payload_data;
|
568
|
-
|
569
|
-
// Data blocks are required
|
570
|
-
D_ASSERT(!sd.data_blocks.empty());
|
571
|
-
auto &block = sd.data_blocks[0];
|
572
|
-
rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
573
|
-
rows->blocks = std::move(sd.data_blocks);
|
574
|
-
rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
|
575
|
-
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
576
|
-
|
577
|
-
// Heap blocks are optional, but we want both for iteration.
|
578
|
-
if (!sd.heap_blocks.empty()) {
|
579
|
-
auto &block = sd.heap_blocks[0];
|
580
|
-
heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
581
|
-
heap->blocks = std::move(sd.heap_blocks);
|
582
|
-
hash_group.reset();
|
583
|
-
} else {
|
584
|
-
heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
|
585
|
-
}
|
586
|
-
heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
|
587
|
-
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
588
|
-
}
|
589
|
-
|
590
|
-
idx_t PartitionLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
|
591
|
-
// Get rid of any stale data
|
592
|
-
hash_bin = hash_bin_p;
|
593
|
-
|
594
|
-
// There are three types of partitions:
|
595
|
-
// 1. No partition (no sorting)
|
596
|
-
// 2. One partition (sorting, but no hashing)
|
597
|
-
// 3. Multiple partitions (sorting and hashing)
|
598
|
-
|
599
|
-
// How big is the partition?
|
600
|
-
idx_t count = 0;
|
601
|
-
if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
|
602
|
-
count = gstate.hash_groups[hash_bin]->count;
|
603
|
-
} else if (gstate.rows && !hash_bin) {
|
604
|
-
count = gstate.count;
|
605
|
-
} else {
|
606
|
-
return count;
|
607
|
-
}
|
608
|
-
|
609
|
-
// Initialise masks to false
|
610
|
-
const auto bit_count = ValidityMask::ValidityMaskSize(count);
|
611
|
-
partition_bits.clear();
|
612
|
-
partition_bits.resize(bit_count, 0);
|
613
|
-
partition_mask.Initialize(partition_bits.data());
|
614
|
-
|
615
|
-
order_bits.clear();
|
616
|
-
order_bits.resize(bit_count, 0);
|
617
|
-
order_mask.Initialize(order_bits.data());
|
618
|
-
|
619
|
-
// Scan the sorted data into new Collections
|
620
|
-
auto external = gstate.external;
|
621
|
-
if (gstate.rows && !hash_bin) {
|
622
|
-
// Simple mask
|
623
|
-
partition_mask.SetValidUnsafe(0);
|
624
|
-
order_mask.SetValidUnsafe(0);
|
625
|
-
// No partition - align the heap blocks with the row blocks
|
626
|
-
rows = gstate.rows->CloneEmpty(gstate.rows->keep_pinned);
|
627
|
-
heap = gstate.strings->CloneEmpty(gstate.strings->keep_pinned);
|
628
|
-
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gstate.rows, *gstate.strings, layout);
|
629
|
-
external = true;
|
630
|
-
} else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
|
631
|
-
// Overwrite the collections with the sorted data
|
632
|
-
hash_group = std::move(gstate.hash_groups[hash_bin]);
|
633
|
-
hash_group->ComputeMasks(partition_mask, order_mask);
|
634
|
-
MaterializeSortedData();
|
635
|
-
} else {
|
636
|
-
return count;
|
637
|
-
}
|
638
|
-
|
639
|
-
scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
|
640
|
-
|
641
|
-
return count;
|
642
|
-
}
|
643
|
-
|
644
564
|
} // namespace duckdb
|
@@ -367,7 +367,7 @@ int SBIterator::ComparisonValue(ExpressionType comparison) {
|
|
367
367
|
}
|
368
368
|
|
369
369
|
static idx_t GetBlockCountWithEmptyCheck(const GlobalSortState &gss) {
|
370
|
-
D_ASSERT(gss.sorted_blocks.
|
370
|
+
D_ASSERT(!gss.sorted_blocks.empty());
|
371
371
|
return gss.sorted_blocks[0]->radix_sorting_data.size();
|
372
372
|
}
|
373
373
|
|
@@ -1093,14 +1093,26 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
1093
1093
|
//===--------------------------------------------------------------------===//
|
1094
1094
|
class WindowGlobalSourceState : public GlobalSourceState {
|
1095
1095
|
public:
|
1096
|
-
explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) :
|
1096
|
+
explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : gsink(*gsink.global_partition), next_bin(0) {
|
1097
1097
|
}
|
1098
1098
|
|
1099
|
-
|
1099
|
+
PartitionGlobalSinkState &gsink;
|
1100
|
+
//! The output read position.
|
1101
|
+
atomic<idx_t> next_bin;
|
1100
1102
|
|
1101
1103
|
public:
|
1102
1104
|
idx_t MaxThreads() override {
|
1103
|
-
|
1105
|
+
// If there is only one partition, we have to process it on one thread.
|
1106
|
+
if (!gsink.grouping_data) {
|
1107
|
+
return 1;
|
1108
|
+
}
|
1109
|
+
|
1110
|
+
// If there is not a lot of data, process serially.
|
1111
|
+
if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
|
1112
|
+
return 1;
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
return gsink.hash_groups.size();
|
1104
1116
|
}
|
1105
1117
|
};
|
1106
1118
|
|
@@ -1112,7 +1124,7 @@ public:
|
|
1112
1124
|
using WindowExecutors = vector<WindowExecutorPtr>;
|
1113
1125
|
|
1114
1126
|
WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
|
1115
|
-
:
|
1127
|
+
: context(context.client), op(op_p), gsink(gsource.gsink) {
|
1116
1128
|
|
1117
1129
|
vector<LogicalType> output_types;
|
1118
1130
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
@@ -1121,29 +1133,134 @@ public:
|
|
1121
1133
|
output_types.emplace_back(wexpr.return_type);
|
1122
1134
|
}
|
1123
1135
|
output_chunk.Initialize(Allocator::Get(context.client), output_types);
|
1136
|
+
|
1137
|
+
const auto &input_types = gsink.payload_types;
|
1138
|
+
layout.Initialize(input_types);
|
1139
|
+
input_chunk.Initialize(gsink.allocator, input_types);
|
1124
1140
|
}
|
1125
1141
|
|
1142
|
+
void MaterializeSortedData();
|
1126
1143
|
void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
|
1127
1144
|
void Scan(DataChunk &chunk);
|
1128
1145
|
|
1129
|
-
|
1146
|
+
HashGroupPtr hash_group;
|
1130
1147
|
ClientContext &context;
|
1131
1148
|
const PhysicalWindow &op;
|
1132
1149
|
|
1150
|
+
PartitionGlobalSinkState &gsink;
|
1151
|
+
|
1152
|
+
//! The generated input chunks
|
1153
|
+
unique_ptr<RowDataCollection> rows;
|
1154
|
+
unique_ptr<RowDataCollection> heap;
|
1155
|
+
RowLayout layout;
|
1156
|
+
//! The partition boundary mask
|
1157
|
+
vector<validity_t> partition_bits;
|
1158
|
+
ValidityMask partition_mask;
|
1159
|
+
//! The order boundary mask
|
1160
|
+
vector<validity_t> order_bits;
|
1161
|
+
ValidityMask order_mask;
|
1133
1162
|
//! The current execution functions
|
1134
1163
|
WindowExecutors window_execs;
|
1164
|
+
|
1165
|
+
//! The read partition
|
1166
|
+
idx_t hash_bin;
|
1167
|
+
//! The read cursor
|
1168
|
+
unique_ptr<RowDataCollectionScanner> scanner;
|
1169
|
+
//! Buffer for the inputs
|
1170
|
+
DataChunk input_chunk;
|
1135
1171
|
//! Buffer for window results
|
1136
1172
|
DataChunk output_chunk;
|
1137
1173
|
};
|
1138
1174
|
|
1175
|
+
void WindowLocalSourceState::MaterializeSortedData() {
|
1176
|
+
auto &global_sort_state = *hash_group->global_sort;
|
1177
|
+
if (global_sort_state.sorted_blocks.empty()) {
|
1178
|
+
return;
|
1179
|
+
}
|
1180
|
+
|
1181
|
+
// scan the sorted row data
|
1182
|
+
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
1183
|
+
auto &sb = *global_sort_state.sorted_blocks[0];
|
1184
|
+
|
1185
|
+
// Free up some memory before allocating more
|
1186
|
+
sb.radix_sorting_data.clear();
|
1187
|
+
sb.blob_sorting_data = nullptr;
|
1188
|
+
|
1189
|
+
// Move the sorting row blocks into our RDCs
|
1190
|
+
auto &buffer_manager = global_sort_state.buffer_manager;
|
1191
|
+
auto &sd = *sb.payload_data;
|
1192
|
+
|
1193
|
+
// Data blocks are required
|
1194
|
+
D_ASSERT(!sd.data_blocks.empty());
|
1195
|
+
auto &block = sd.data_blocks[0];
|
1196
|
+
rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
1197
|
+
rows->blocks = std::move(sd.data_blocks);
|
1198
|
+
rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
|
1199
|
+
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
1200
|
+
|
1201
|
+
// Heap blocks are optional, but we want both for iteration.
|
1202
|
+
if (!sd.heap_blocks.empty()) {
|
1203
|
+
auto &block = sd.heap_blocks[0];
|
1204
|
+
heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
1205
|
+
heap->blocks = std::move(sd.heap_blocks);
|
1206
|
+
hash_group.reset();
|
1207
|
+
} else {
|
1208
|
+
heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
|
1209
|
+
}
|
1210
|
+
heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
|
1211
|
+
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
1212
|
+
}
|
1213
|
+
|
1139
1214
|
void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
|
1140
|
-
|
1141
|
-
|
1215
|
+
// Get rid of any stale data
|
1216
|
+
hash_bin = hash_bin_p;
|
1217
|
+
|
1218
|
+
// There are three types of partitions:
|
1219
|
+
// 1. No partition (no sorting)
|
1220
|
+
// 2. One partition (sorting, but no hashing)
|
1221
|
+
// 3. Multiple partitions (sorting and hashing)
|
1222
|
+
|
1223
|
+
// How big is the partition?
|
1224
|
+
idx_t count = 0;
|
1225
|
+
if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
|
1226
|
+
count = gsink.hash_groups[hash_bin]->count;
|
1227
|
+
} else if (gsink.rows && !hash_bin) {
|
1228
|
+
count = gsink.count;
|
1229
|
+
} else {
|
1230
|
+
return;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
// Initialise masks to false
|
1234
|
+
const auto bit_count = ValidityMask::ValidityMaskSize(count);
|
1235
|
+
partition_bits.clear();
|
1236
|
+
partition_bits.resize(bit_count, 0);
|
1237
|
+
partition_mask.Initialize(partition_bits.data());
|
1238
|
+
|
1239
|
+
order_bits.clear();
|
1240
|
+
order_bits.resize(bit_count, 0);
|
1241
|
+
order_mask.Initialize(order_bits.data());
|
1242
|
+
|
1243
|
+
// Scan the sorted data into new Collections
|
1244
|
+
auto external = gsink.external;
|
1245
|
+
if (gsink.rows && !hash_bin) {
|
1246
|
+
// Simple mask
|
1247
|
+
partition_mask.SetValidUnsafe(0);
|
1248
|
+
order_mask.SetValidUnsafe(0);
|
1249
|
+
// No partition - align the heap blocks with the row blocks
|
1250
|
+
rows = gsink.rows->CloneEmpty(gsink.rows->keep_pinned);
|
1251
|
+
heap = gsink.strings->CloneEmpty(gsink.strings->keep_pinned);
|
1252
|
+
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gsink.rows, *gsink.strings, layout);
|
1253
|
+
external = true;
|
1254
|
+
} else if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
|
1255
|
+
// Overwrite the collections with the sorted data
|
1256
|
+
hash_group = std::move(gsink.hash_groups[hash_bin]);
|
1257
|
+
hash_group->ComputeMasks(partition_mask, order_mask);
|
1258
|
+
MaterializeSortedData();
|
1259
|
+
} else {
|
1142
1260
|
return;
|
1143
1261
|
}
|
1144
1262
|
|
1145
1263
|
// Create the executors for each function
|
1146
|
-
auto &partition_mask = partition_source.partition_mask;
|
1147
1264
|
window_execs.clear();
|
1148
1265
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1149
1266
|
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
@@ -1154,19 +1271,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1154
1271
|
|
1155
1272
|
// First pass over the input without flushing
|
1156
1273
|
// TODO: Factor out the constructor data as global state
|
1274
|
+
scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
|
1157
1275
|
idx_t input_idx = 0;
|
1158
1276
|
while (true) {
|
1159
|
-
|
1160
|
-
|
1161
|
-
if (
|
1277
|
+
input_chunk.Reset();
|
1278
|
+
scanner->Scan(input_chunk);
|
1279
|
+
if (input_chunk.size() == 0) {
|
1162
1280
|
break;
|
1163
1281
|
}
|
1164
1282
|
|
1165
1283
|
// TODO: Parallelization opportunity
|
1166
1284
|
for (auto &wexec : window_execs) {
|
1167
|
-
wexec->Sink(
|
1285
|
+
wexec->Sink(input_chunk, input_idx, scanner->Count());
|
1168
1286
|
}
|
1169
|
-
input_idx +=
|
1287
|
+
input_idx += input_chunk.size();
|
1170
1288
|
}
|
1171
1289
|
|
1172
1290
|
// TODO: Parallelization opportunity
|
@@ -1175,25 +1293,22 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1175
1293
|
}
|
1176
1294
|
|
1177
1295
|
// External scanning assumes all blocks are swizzled.
|
1178
|
-
|
1296
|
+
scanner->ReSwizzle();
|
1179
1297
|
|
1180
1298
|
// Second pass can flush
|
1181
|
-
|
1299
|
+
scanner->Reset(true);
|
1182
1300
|
}
|
1183
1301
|
|
1184
1302
|
void WindowLocalSourceState::Scan(DataChunk &result) {
|
1185
|
-
D_ASSERT(
|
1186
|
-
if (!
|
1303
|
+
D_ASSERT(scanner);
|
1304
|
+
if (!scanner->Remaining()) {
|
1187
1305
|
return;
|
1188
1306
|
}
|
1189
1307
|
|
1190
|
-
const auto position =
|
1191
|
-
auto &input_chunk = partition_source.input_chunk;
|
1308
|
+
const auto position = scanner->Scanned();
|
1192
1309
|
input_chunk.Reset();
|
1193
|
-
|
1310
|
+
scanner->Scan(input_chunk);
|
1194
1311
|
|
1195
|
-
auto &partition_mask = partition_source.partition_mask;
|
1196
|
-
auto &order_mask = partition_source.order_mask;
|
1197
1312
|
output_chunk.Reset();
|
1198
1313
|
for (idx_t expr_idx = 0; expr_idx < window_execs.size(); ++expr_idx) {
|
1199
1314
|
auto &executor = *window_execs[expr_idx];
|
@@ -1227,9 +1342,7 @@ unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext
|
|
1227
1342
|
void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
|
1228
1343
|
LocalSourceState &lstate_p) const {
|
1229
1344
|
auto &lsource = lstate_p.Cast<WindowLocalSourceState>();
|
1230
|
-
auto &lpsource = lsource.partition_source;
|
1231
1345
|
auto &gsource = gstate_p.Cast<WindowGlobalSourceState>();
|
1232
|
-
auto &gpsource = gsource.partition_source;
|
1233
1346
|
auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
|
1234
1347
|
|
1235
1348
|
auto &hash_groups = gsink.global_partition->hash_groups;
|
@@ -1237,17 +1350,17 @@ void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, Global
|
|
1237
1350
|
|
1238
1351
|
while (chunk.size() == 0) {
|
1239
1352
|
// Move to the next bin if we are done.
|
1240
|
-
while (!
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
auto hash_bin =
|
1353
|
+
while (!lsource.scanner || !lsource.scanner->Remaining()) {
|
1354
|
+
lsource.scanner.reset();
|
1355
|
+
lsource.rows.reset();
|
1356
|
+
lsource.heap.reset();
|
1357
|
+
lsource.hash_group.reset();
|
1358
|
+
auto hash_bin = gsource.next_bin++;
|
1246
1359
|
if (hash_bin >= bin_count) {
|
1247
1360
|
return;
|
1248
1361
|
}
|
1249
1362
|
|
1250
|
-
for (; hash_bin < hash_groups.size(); hash_bin =
|
1363
|
+
for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
|
1251
1364
|
if (hash_groups[hash_bin]) {
|
1252
1365
|
break;
|
1253
1366
|
}
|