duckdb 0.7.2-dev1901.0 → 0.7.2-dev2144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,316 @@
|
|
1
|
+
#include "duckdb/common/types/row/partitioned_tuple_data.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
4
|
+
#include "duckdb/common/types/row/tuple_data_iterator.hpp"
|
5
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
PartitionedTupleData::PartitionedTupleData(PartitionedTupleDataType type_p, BufferManager &buffer_manager_p,
|
10
|
+
const TupleDataLayout &layout_p)
|
11
|
+
: type(type_p), buffer_manager(buffer_manager_p), layout(layout_p.Copy()),
|
12
|
+
allocators(make_shared<PartitionTupleDataAllocators>()) {
|
13
|
+
}
|
14
|
+
|
15
|
+
PartitionedTupleData::PartitionedTupleData(const PartitionedTupleData &other)
|
16
|
+
: type(other.type), buffer_manager(other.buffer_manager), layout(other.layout.Copy()) {
|
17
|
+
}
|
18
|
+
|
19
|
+
unique_ptr<PartitionedTupleData> PartitionedTupleData::CreateShared() {
|
20
|
+
switch (type) {
|
21
|
+
case PartitionedTupleDataType::RADIX:
|
22
|
+
return make_uniq<RadixPartitionedTupleData>((RadixPartitionedTupleData &)*this);
|
23
|
+
default:
|
24
|
+
throw NotImplementedException("CreateShared for this type of PartitionedTupleData");
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
PartitionedTupleData::~PartitionedTupleData() {
|
29
|
+
}
|
30
|
+
|
31
|
+
PartitionedTupleDataType PartitionedTupleData::GetType() const {
|
32
|
+
return type;
|
33
|
+
}
|
34
|
+
|
35
|
+
void PartitionedTupleData::InitializeAppendState(PartitionedTupleDataAppendState &state,
|
36
|
+
TupleDataPinProperties properties) const {
|
37
|
+
state.partition_sel.Initialize();
|
38
|
+
|
39
|
+
vector<column_t> column_ids;
|
40
|
+
column_ids.reserve(layout.ColumnCount());
|
41
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
|
42
|
+
column_ids.emplace_back(col_idx);
|
43
|
+
}
|
44
|
+
|
45
|
+
InitializeAppendStateInternal(state, properties);
|
46
|
+
}
|
47
|
+
|
48
|
+
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, DataChunk &input) {
|
49
|
+
// Compute partition indices and store them in state.partition_indices
|
50
|
+
ComputePartitionIndices(state, input);
|
51
|
+
|
52
|
+
// Build the selection vector for the partitions
|
53
|
+
BuildPartitionSel(state, input.size());
|
54
|
+
|
55
|
+
// Early out: check if everything belongs to a single partition
|
56
|
+
const auto &partition_entries = state.partition_entries;
|
57
|
+
if (partition_entries.size() == 1) {
|
58
|
+
const auto &partition_index = partition_entries.begin()->first;
|
59
|
+
auto &partition = *partitions[partition_index];
|
60
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
61
|
+
partition.Append(partition_pin_state, state.chunk_state, input);
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
|
65
|
+
TupleDataCollection::ToUnifiedFormat(state.chunk_state, input);
|
66
|
+
|
67
|
+
// Compute the heap sizes for the whole chunk
|
68
|
+
if (!layout.AllConstant()) {
|
69
|
+
TupleDataCollection::ComputeHeapSizes(state.chunk_state, input, state.partition_sel, input.size());
|
70
|
+
}
|
71
|
+
|
72
|
+
// Build the buffer space
|
73
|
+
BuildBufferSpace(state);
|
74
|
+
|
75
|
+
// Now scatter everything in one go
|
76
|
+
partitions[0]->Scatter(state.chunk_state, input, state.partition_sel, input.size());
|
77
|
+
}
|
78
|
+
|
79
|
+
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input, idx_t count) {
|
80
|
+
// Compute partition indices and store them in state.partition_indices
|
81
|
+
ComputePartitionIndices(input.row_locations, count, state.partition_indices);
|
82
|
+
|
83
|
+
// Build the selection vector for the partitions
|
84
|
+
BuildPartitionSel(state, count);
|
85
|
+
|
86
|
+
// Early out: check if everything belongs to a single partition
|
87
|
+
auto &partition_entries = state.partition_entries;
|
88
|
+
if (partition_entries.size() == 1) {
|
89
|
+
const auto &partition_index = partition_entries.begin()->first;
|
90
|
+
auto &partition = *partitions[partition_index];
|
91
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
92
|
+
|
93
|
+
state.chunk_state.heap_sizes.Reference(input.heap_sizes);
|
94
|
+
partition.Build(partition_pin_state, state.chunk_state, 0, count);
|
95
|
+
partition.CopyRows(state.chunk_state, input, *FlatVector::IncrementalSelectionVector(), count);
|
96
|
+
return;
|
97
|
+
}
|
98
|
+
|
99
|
+
// Build the buffer space
|
100
|
+
state.chunk_state.heap_sizes.Slice(input.heap_sizes, state.partition_sel, count);
|
101
|
+
state.chunk_state.heap_sizes.Flatten(count);
|
102
|
+
BuildBufferSpace(state);
|
103
|
+
|
104
|
+
// Copy the rows
|
105
|
+
partitions[0]->CopyRows(state.chunk_state, input, state.partition_sel, count);
|
106
|
+
}
|
107
|
+
|
108
|
+
void PartitionedTupleData::BuildPartitionSel(PartitionedTupleDataAppendState &state, idx_t count) {
|
109
|
+
const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
|
110
|
+
auto &partition_entries = state.partition_entries;
|
111
|
+
auto &partition_entries_arr = state.partition_entries_arr;
|
112
|
+
partition_entries.clear();
|
113
|
+
|
114
|
+
const auto max_partition_index = MaxPartitionIndex();
|
115
|
+
const auto use_arr = max_partition_index < PartitionedTupleDataAppendState::MAP_THRESHOLD;
|
116
|
+
|
117
|
+
switch (state.partition_indices.GetVectorType()) {
|
118
|
+
case VectorType::FLAT_VECTOR:
|
119
|
+
if (use_arr) {
|
120
|
+
std::fill_n(partition_entries_arr, max_partition_index + 1, list_entry_t(0, 0));
|
121
|
+
for (idx_t i = 0; i < count; i++) {
|
122
|
+
const auto &partition_index = partition_indices[i];
|
123
|
+
partition_entries_arr[partition_index].length++;
|
124
|
+
}
|
125
|
+
} else {
|
126
|
+
for (idx_t i = 0; i < count; i++) {
|
127
|
+
const auto &partition_index = partition_indices[i];
|
128
|
+
auto partition_entry = partition_entries.find(partition_index);
|
129
|
+
if (partition_entry == partition_entries.end()) {
|
130
|
+
partition_entries.emplace(partition_index, list_entry_t(0, 1));
|
131
|
+
} else {
|
132
|
+
partition_entry->second.length++;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
}
|
136
|
+
break;
|
137
|
+
case VectorType::CONSTANT_VECTOR:
|
138
|
+
partition_entries[partition_indices[0]] = list_entry_t(0, count);
|
139
|
+
break;
|
140
|
+
default:
|
141
|
+
throw InternalException("Unexpected VectorType in PartitionedTupleData::Append");
|
142
|
+
}
|
143
|
+
|
144
|
+
// Early out: check if everything belongs to a single partition
|
145
|
+
if (partition_entries.size() == 1) {
|
146
|
+
return;
|
147
|
+
}
|
148
|
+
|
149
|
+
// Compute offsets from the counts
|
150
|
+
idx_t offset = 0;
|
151
|
+
if (use_arr) {
|
152
|
+
for (idx_t partition_index = 0; partition_index <= max_partition_index; partition_index++) {
|
153
|
+
auto &partition_entry = partition_entries_arr[partition_index];
|
154
|
+
partition_entry.offset = offset;
|
155
|
+
offset += partition_entry.length;
|
156
|
+
}
|
157
|
+
} else {
|
158
|
+
for (auto &pc : partition_entries) {
|
159
|
+
auto &partition_entry = pc.second;
|
160
|
+
partition_entry.offset = offset;
|
161
|
+
offset += partition_entry.length;
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
// Now initialize a single selection vector that acts as a selection vector for every partition
|
166
|
+
auto &all_partitions_sel = state.partition_sel;
|
167
|
+
if (use_arr) {
|
168
|
+
for (idx_t i = 0; i < count; i++) {
|
169
|
+
const auto &partition_index = partition_indices[i];
|
170
|
+
auto &partition_offset = partition_entries_arr[partition_index].offset;
|
171
|
+
all_partitions_sel[partition_offset++] = i;
|
172
|
+
}
|
173
|
+
// Now just add it to the map anyway so the rest of the functionality is shared
|
174
|
+
for (idx_t partition_index = 0; partition_index <= max_partition_index; partition_index++) {
|
175
|
+
const auto &partition_entry = partition_entries_arr[partition_index];
|
176
|
+
if (partition_entry.length != 0) {
|
177
|
+
partition_entries.emplace(partition_index, partition_entry);
|
178
|
+
}
|
179
|
+
}
|
180
|
+
} else {
|
181
|
+
for (idx_t i = 0; i < count; i++) {
|
182
|
+
const auto &partition_index = partition_indices[i];
|
183
|
+
auto &partition_offset = partition_entries[partition_index].offset;
|
184
|
+
all_partitions_sel[partition_offset++] = i;
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
void PartitionedTupleData::BuildBufferSpace(PartitionedTupleDataAppendState &state) {
|
190
|
+
for (auto &pc : state.partition_entries) {
|
191
|
+
const auto &partition_index = pc.first;
|
192
|
+
|
193
|
+
// Partition, pin state for this partition index
|
194
|
+
auto &partition = *partitions[partition_index];
|
195
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
196
|
+
|
197
|
+
// Length and offset for this partition
|
198
|
+
const auto &partition_entry = pc.second;
|
199
|
+
const auto &partition_length = partition_entry.length;
|
200
|
+
const auto partition_offset = partition_entry.offset - partition_length;
|
201
|
+
|
202
|
+
// Build out the buffer space for this partition
|
203
|
+
partition.Build(partition_pin_state, state.chunk_state, partition_offset, partition_length);
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
void PartitionedTupleData::FlushAppendState(PartitionedTupleDataAppendState &state) {
|
208
|
+
for (idx_t partition_index = 0; partition_index < partitions.size(); partition_index++) {
|
209
|
+
auto &partition = *partitions[partition_index];
|
210
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
211
|
+
partition.FinalizePinState(partition_pin_state);
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
void PartitionedTupleData::Combine(PartitionedTupleData &other) {
|
216
|
+
if (other.Count() == 0) {
|
217
|
+
return;
|
218
|
+
}
|
219
|
+
|
220
|
+
// Now combine the state's partitions into this
|
221
|
+
lock_guard<mutex> guard(lock);
|
222
|
+
|
223
|
+
if (partitions.empty()) {
|
224
|
+
// This is the first merge, we just copy them over
|
225
|
+
partitions = std::move(other.partitions);
|
226
|
+
} else {
|
227
|
+
D_ASSERT(partitions.size() == other.partitions.size());
|
228
|
+
// Combine the append state's partitions into this PartitionedTupleData
|
229
|
+
for (idx_t i = 0; i < other.partitions.size(); i++) {
|
230
|
+
partitions[i]->Combine(*other.partitions[i]);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
void PartitionedTupleData::Partition(TupleDataCollection &source, TupleDataPinProperties properties) {
|
236
|
+
#ifdef DEBUG
|
237
|
+
const auto count_before = source.Count();
|
238
|
+
#endif
|
239
|
+
|
240
|
+
PartitionedTupleDataAppendState append_state;
|
241
|
+
InitializeAppendState(append_state, properties);
|
242
|
+
|
243
|
+
TupleDataChunkIterator iterator(source, TupleDataPinProperties::DESTROY_AFTER_DONE, true);
|
244
|
+
auto &chunk_state = iterator.GetChunkState();
|
245
|
+
do {
|
246
|
+
Append(append_state, chunk_state, iterator.GetCurrentChunkCount());
|
247
|
+
} while (iterator.Next());
|
248
|
+
|
249
|
+
FlushAppendState(append_state);
|
250
|
+
source.Reset();
|
251
|
+
|
252
|
+
#ifdef DEBUG
|
253
|
+
idx_t count_after = 0;
|
254
|
+
for (const auto &partition : partitions) {
|
255
|
+
count_after += partition->Count();
|
256
|
+
}
|
257
|
+
D_ASSERT(count_before == count_after);
|
258
|
+
#endif
|
259
|
+
}
|
260
|
+
|
261
|
+
void PartitionedTupleData::Repartition(PartitionedTupleData &new_partitioned_data) {
|
262
|
+
D_ASSERT(layout.GetTypes() == new_partitioned_data.layout.GetTypes());
|
263
|
+
|
264
|
+
PartitionedTupleDataAppendState append_state;
|
265
|
+
new_partitioned_data.InitializeAppendState(append_state);
|
266
|
+
|
267
|
+
const auto reverse = RepartitionReverseOrder();
|
268
|
+
const idx_t start_idx = reverse ? partitions.size() : 0;
|
269
|
+
const idx_t end_idx = reverse ? 0 : partitions.size();
|
270
|
+
const int64_t update = reverse ? -1 : 1;
|
271
|
+
const int64_t adjustment = reverse ? -1 : 0;
|
272
|
+
|
273
|
+
for (idx_t partition_idx = start_idx; partition_idx != end_idx; partition_idx += update) {
|
274
|
+
auto actual_partition_idx = partition_idx + adjustment;
|
275
|
+
auto &partition = *partitions[actual_partition_idx];
|
276
|
+
|
277
|
+
if (partition.Count() > 0) {
|
278
|
+
TupleDataChunkIterator iterator(partition, TupleDataPinProperties::DESTROY_AFTER_DONE, true);
|
279
|
+
auto &chunk_state = iterator.GetChunkState();
|
280
|
+
do {
|
281
|
+
new_partitioned_data.Append(append_state, chunk_state, iterator.GetCurrentChunkCount());
|
282
|
+
} while (iterator.Next());
|
283
|
+
|
284
|
+
RepartitionFinalizeStates(*this, new_partitioned_data, append_state, actual_partition_idx);
|
285
|
+
}
|
286
|
+
partitions[actual_partition_idx]->Reset();
|
287
|
+
}
|
288
|
+
|
289
|
+
new_partitioned_data.FlushAppendState(append_state);
|
290
|
+
}
|
291
|
+
|
292
|
+
vector<unique_ptr<TupleDataCollection>> &PartitionedTupleData::GetPartitions() {
|
293
|
+
return partitions;
|
294
|
+
}
|
295
|
+
|
296
|
+
idx_t PartitionedTupleData::Count() const {
|
297
|
+
idx_t total_count = 0;
|
298
|
+
for (auto &partition : partitions) {
|
299
|
+
total_count += partition->Count();
|
300
|
+
}
|
301
|
+
return total_count;
|
302
|
+
}
|
303
|
+
|
304
|
+
idx_t PartitionedTupleData::SizeInBytes() const {
|
305
|
+
idx_t total_size = 0;
|
306
|
+
for (auto &partition : partitions) {
|
307
|
+
total_size += partition->SizeInBytes();
|
308
|
+
}
|
309
|
+
return total_size;
|
310
|
+
}
|
311
|
+
|
312
|
+
void PartitionedTupleData::CreateAllocator() {
|
313
|
+
allocators->allocators.emplace_back(make_shared<TupleDataAllocator>(buffer_manager, layout));
|
314
|
+
}
|
315
|
+
|
316
|
+
} // namespace duckdb
|
@@ -1,7 +1,7 @@
|
|
1
|
-
#include "duckdb/common/types/row_data_collection_scanner.hpp"
|
1
|
+
#include "duckdb/common/types/row/row_data_collection_scanner.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
4
|
-
#include "duckdb/common/types/row_data_collection.hpp"
|
4
|
+
#include "duckdb/common/types/row/row_data_collection.hpp"
|
5
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
6
6
|
|
7
7
|
namespace duckdb {
|
@@ -6,7 +6,7 @@
|
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
|
9
|
-
#include "duckdb/common/types/row_layout.hpp"
|
9
|
+
#include "duckdb/common/types/row/row_layout.hpp"
|
10
10
|
|
11
11
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
12
12
|
|