duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,316 @@
|
|
1
|
+
#include "duckdb/common/types/row/partitioned_tuple_data.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
4
|
+
#include "duckdb/common/types/row/tuple_data_iterator.hpp"
|
5
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
PartitionedTupleData::PartitionedTupleData(PartitionedTupleDataType type_p, BufferManager &buffer_manager_p,
|
10
|
+
const TupleDataLayout &layout_p)
|
11
|
+
: type(type_p), buffer_manager(buffer_manager_p), layout(layout_p.Copy()),
|
12
|
+
allocators(make_shared<PartitionTupleDataAllocators>()) {
|
13
|
+
}
|
14
|
+
|
15
|
+
PartitionedTupleData::PartitionedTupleData(const PartitionedTupleData &other)
|
16
|
+
: type(other.type), buffer_manager(other.buffer_manager), layout(other.layout.Copy()) {
|
17
|
+
}
|
18
|
+
|
19
|
+
unique_ptr<PartitionedTupleData> PartitionedTupleData::CreateShared() {
|
20
|
+
switch (type) {
|
21
|
+
case PartitionedTupleDataType::RADIX:
|
22
|
+
return make_uniq<RadixPartitionedTupleData>((RadixPartitionedTupleData &)*this);
|
23
|
+
default:
|
24
|
+
throw NotImplementedException("CreateShared for this type of PartitionedTupleData");
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
PartitionedTupleData::~PartitionedTupleData() {
|
29
|
+
}
|
30
|
+
|
31
|
+
PartitionedTupleDataType PartitionedTupleData::GetType() const {
|
32
|
+
return type;
|
33
|
+
}
|
34
|
+
|
35
|
+
void PartitionedTupleData::InitializeAppendState(PartitionedTupleDataAppendState &state,
|
36
|
+
TupleDataPinProperties properties) const {
|
37
|
+
state.partition_sel.Initialize();
|
38
|
+
|
39
|
+
vector<column_t> column_ids;
|
40
|
+
column_ids.reserve(layout.ColumnCount());
|
41
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
|
42
|
+
column_ids.emplace_back(col_idx);
|
43
|
+
}
|
44
|
+
|
45
|
+
InitializeAppendStateInternal(state, properties);
|
46
|
+
}
|
47
|
+
|
48
|
+
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, DataChunk &input) {
|
49
|
+
// Compute partition indices and store them in state.partition_indices
|
50
|
+
ComputePartitionIndices(state, input);
|
51
|
+
|
52
|
+
// Build the selection vector for the partitions
|
53
|
+
BuildPartitionSel(state, input.size());
|
54
|
+
|
55
|
+
// Early out: check if everything belongs to a single partition
|
56
|
+
const auto &partition_entries = state.partition_entries;
|
57
|
+
if (partition_entries.size() == 1) {
|
58
|
+
const auto &partition_index = partition_entries.begin()->first;
|
59
|
+
auto &partition = *partitions[partition_index];
|
60
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
61
|
+
partition.Append(partition_pin_state, state.chunk_state, input);
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
|
65
|
+
TupleDataCollection::ToUnifiedFormat(state.chunk_state, input);
|
66
|
+
|
67
|
+
// Compute the heap sizes for the whole chunk
|
68
|
+
if (!layout.AllConstant()) {
|
69
|
+
TupleDataCollection::ComputeHeapSizes(state.chunk_state, input, state.partition_sel, input.size());
|
70
|
+
}
|
71
|
+
|
72
|
+
// Build the buffer space
|
73
|
+
BuildBufferSpace(state);
|
74
|
+
|
75
|
+
// Now scatter everything in one go
|
76
|
+
partitions[0]->Scatter(state.chunk_state, input, state.partition_sel, input.size());
|
77
|
+
}
|
78
|
+
|
79
|
+
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input, idx_t count) {
|
80
|
+
// Compute partition indices and store them in state.partition_indices
|
81
|
+
ComputePartitionIndices(input.row_locations, count, state.partition_indices);
|
82
|
+
|
83
|
+
// Build the selection vector for the partitions
|
84
|
+
BuildPartitionSel(state, count);
|
85
|
+
|
86
|
+
// Early out: check if everything belongs to a single partition
|
87
|
+
auto &partition_entries = state.partition_entries;
|
88
|
+
if (partition_entries.size() == 1) {
|
89
|
+
const auto &partition_index = partition_entries.begin()->first;
|
90
|
+
auto &partition = *partitions[partition_index];
|
91
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
92
|
+
|
93
|
+
state.chunk_state.heap_sizes.Reference(input.heap_sizes);
|
94
|
+
partition.Build(partition_pin_state, state.chunk_state, 0, count);
|
95
|
+
partition.CopyRows(state.chunk_state, input, *FlatVector::IncrementalSelectionVector(), count);
|
96
|
+
return;
|
97
|
+
}
|
98
|
+
|
99
|
+
// Build the buffer space
|
100
|
+
state.chunk_state.heap_sizes.Slice(input.heap_sizes, state.partition_sel, count);
|
101
|
+
state.chunk_state.heap_sizes.Flatten(count);
|
102
|
+
BuildBufferSpace(state);
|
103
|
+
|
104
|
+
// Copy the rows
|
105
|
+
partitions[0]->CopyRows(state.chunk_state, input, state.partition_sel, count);
|
106
|
+
}
|
107
|
+
|
108
|
+
void PartitionedTupleData::BuildPartitionSel(PartitionedTupleDataAppendState &state, idx_t count) {
|
109
|
+
const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
|
110
|
+
auto &partition_entries = state.partition_entries;
|
111
|
+
auto &partition_entries_arr = state.partition_entries_arr;
|
112
|
+
partition_entries.clear();
|
113
|
+
|
114
|
+
const auto max_partition_index = MaxPartitionIndex();
|
115
|
+
const auto use_arr = max_partition_index < PartitionedTupleDataAppendState::MAP_THRESHOLD;
|
116
|
+
|
117
|
+
switch (state.partition_indices.GetVectorType()) {
|
118
|
+
case VectorType::FLAT_VECTOR:
|
119
|
+
if (use_arr) {
|
120
|
+
std::fill_n(partition_entries_arr, max_partition_index + 1, list_entry_t(0, 0));
|
121
|
+
for (idx_t i = 0; i < count; i++) {
|
122
|
+
const auto &partition_index = partition_indices[i];
|
123
|
+
partition_entries_arr[partition_index].length++;
|
124
|
+
}
|
125
|
+
} else {
|
126
|
+
for (idx_t i = 0; i < count; i++) {
|
127
|
+
const auto &partition_index = partition_indices[i];
|
128
|
+
auto partition_entry = partition_entries.find(partition_index);
|
129
|
+
if (partition_entry == partition_entries.end()) {
|
130
|
+
partition_entries.emplace(partition_index, list_entry_t(0, 1));
|
131
|
+
} else {
|
132
|
+
partition_entry->second.length++;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
}
|
136
|
+
break;
|
137
|
+
case VectorType::CONSTANT_VECTOR:
|
138
|
+
partition_entries[partition_indices[0]] = list_entry_t(0, count);
|
139
|
+
break;
|
140
|
+
default:
|
141
|
+
throw InternalException("Unexpected VectorType in PartitionedTupleData::Append");
|
142
|
+
}
|
143
|
+
|
144
|
+
// Early out: check if everything belongs to a single partition
|
145
|
+
if (partition_entries.size() == 1) {
|
146
|
+
return;
|
147
|
+
}
|
148
|
+
|
149
|
+
// Compute offsets from the counts
|
150
|
+
idx_t offset = 0;
|
151
|
+
if (use_arr) {
|
152
|
+
for (idx_t partition_index = 0; partition_index <= max_partition_index; partition_index++) {
|
153
|
+
auto &partition_entry = partition_entries_arr[partition_index];
|
154
|
+
partition_entry.offset = offset;
|
155
|
+
offset += partition_entry.length;
|
156
|
+
}
|
157
|
+
} else {
|
158
|
+
for (auto &pc : partition_entries) {
|
159
|
+
auto &partition_entry = pc.second;
|
160
|
+
partition_entry.offset = offset;
|
161
|
+
offset += partition_entry.length;
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
// Now initialize a single selection vector that acts as a selection vector for every partition
|
166
|
+
auto &all_partitions_sel = state.partition_sel;
|
167
|
+
if (use_arr) {
|
168
|
+
for (idx_t i = 0; i < count; i++) {
|
169
|
+
const auto &partition_index = partition_indices[i];
|
170
|
+
auto &partition_offset = partition_entries_arr[partition_index].offset;
|
171
|
+
all_partitions_sel[partition_offset++] = i;
|
172
|
+
}
|
173
|
+
// Now just add it to the map anyway so the rest of the functionality is shared
|
174
|
+
for (idx_t partition_index = 0; partition_index <= max_partition_index; partition_index++) {
|
175
|
+
const auto &partition_entry = partition_entries_arr[partition_index];
|
176
|
+
if (partition_entry.length != 0) {
|
177
|
+
partition_entries.emplace(partition_index, partition_entry);
|
178
|
+
}
|
179
|
+
}
|
180
|
+
} else {
|
181
|
+
for (idx_t i = 0; i < count; i++) {
|
182
|
+
const auto &partition_index = partition_indices[i];
|
183
|
+
auto &partition_offset = partition_entries[partition_index].offset;
|
184
|
+
all_partitions_sel[partition_offset++] = i;
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
void PartitionedTupleData::BuildBufferSpace(PartitionedTupleDataAppendState &state) {
|
190
|
+
for (auto &pc : state.partition_entries) {
|
191
|
+
const auto &partition_index = pc.first;
|
192
|
+
|
193
|
+
// Partition, pin state for this partition index
|
194
|
+
auto &partition = *partitions[partition_index];
|
195
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
196
|
+
|
197
|
+
// Length and offset for this partition
|
198
|
+
const auto &partition_entry = pc.second;
|
199
|
+
const auto &partition_length = partition_entry.length;
|
200
|
+
const auto partition_offset = partition_entry.offset - partition_length;
|
201
|
+
|
202
|
+
// Build out the buffer space for this partition
|
203
|
+
partition.Build(partition_pin_state, state.chunk_state, partition_offset, partition_length);
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
void PartitionedTupleData::FlushAppendState(PartitionedTupleDataAppendState &state) {
|
208
|
+
for (idx_t partition_index = 0; partition_index < partitions.size(); partition_index++) {
|
209
|
+
auto &partition = *partitions[partition_index];
|
210
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
211
|
+
partition.FinalizePinState(partition_pin_state);
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
void PartitionedTupleData::Combine(PartitionedTupleData &other) {
|
216
|
+
if (other.Count() == 0) {
|
217
|
+
return;
|
218
|
+
}
|
219
|
+
|
220
|
+
// Now combine the state's partitions into this
|
221
|
+
lock_guard<mutex> guard(lock);
|
222
|
+
|
223
|
+
if (partitions.empty()) {
|
224
|
+
// This is the first merge, we just copy them over
|
225
|
+
partitions = std::move(other.partitions);
|
226
|
+
} else {
|
227
|
+
D_ASSERT(partitions.size() == other.partitions.size());
|
228
|
+
// Combine the append state's partitions into this PartitionedTupleData
|
229
|
+
for (idx_t i = 0; i < other.partitions.size(); i++) {
|
230
|
+
partitions[i]->Combine(*other.partitions[i]);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
void PartitionedTupleData::Partition(TupleDataCollection &source, TupleDataPinProperties properties) {
|
236
|
+
#ifdef DEBUG
|
237
|
+
const auto count_before = source.Count();
|
238
|
+
#endif
|
239
|
+
|
240
|
+
PartitionedTupleDataAppendState append_state;
|
241
|
+
InitializeAppendState(append_state, properties);
|
242
|
+
|
243
|
+
TupleDataChunkIterator iterator(source, TupleDataPinProperties::DESTROY_AFTER_DONE, true);
|
244
|
+
auto &chunk_state = iterator.GetChunkState();
|
245
|
+
do {
|
246
|
+
Append(append_state, chunk_state, iterator.GetCurrentChunkCount());
|
247
|
+
} while (iterator.Next());
|
248
|
+
|
249
|
+
FlushAppendState(append_state);
|
250
|
+
source.Reset();
|
251
|
+
|
252
|
+
#ifdef DEBUG
|
253
|
+
idx_t count_after = 0;
|
254
|
+
for (const auto &partition : partitions) {
|
255
|
+
count_after += partition->Count();
|
256
|
+
}
|
257
|
+
D_ASSERT(count_before == count_after);
|
258
|
+
#endif
|
259
|
+
}
|
260
|
+
|
261
|
+
void PartitionedTupleData::Repartition(PartitionedTupleData &new_partitioned_data) {
|
262
|
+
D_ASSERT(layout.GetTypes() == new_partitioned_data.layout.GetTypes());
|
263
|
+
|
264
|
+
PartitionedTupleDataAppendState append_state;
|
265
|
+
new_partitioned_data.InitializeAppendState(append_state);
|
266
|
+
|
267
|
+
const auto reverse = RepartitionReverseOrder();
|
268
|
+
const idx_t start_idx = reverse ? partitions.size() : 0;
|
269
|
+
const idx_t end_idx = reverse ? 0 : partitions.size();
|
270
|
+
const int64_t update = reverse ? -1 : 1;
|
271
|
+
const int64_t adjustment = reverse ? -1 : 0;
|
272
|
+
|
273
|
+
for (idx_t partition_idx = start_idx; partition_idx != end_idx; partition_idx += update) {
|
274
|
+
auto actual_partition_idx = partition_idx + adjustment;
|
275
|
+
auto &partition = *partitions[actual_partition_idx];
|
276
|
+
|
277
|
+
if (partition.Count() > 0) {
|
278
|
+
TupleDataChunkIterator iterator(partition, TupleDataPinProperties::DESTROY_AFTER_DONE, true);
|
279
|
+
auto &chunk_state = iterator.GetChunkState();
|
280
|
+
do {
|
281
|
+
new_partitioned_data.Append(append_state, chunk_state, iterator.GetCurrentChunkCount());
|
282
|
+
} while (iterator.Next());
|
283
|
+
|
284
|
+
RepartitionFinalizeStates(*this, new_partitioned_data, append_state, actual_partition_idx);
|
285
|
+
}
|
286
|
+
partitions[actual_partition_idx]->Reset();
|
287
|
+
}
|
288
|
+
|
289
|
+
new_partitioned_data.FlushAppendState(append_state);
|
290
|
+
}
|
291
|
+
|
292
|
+
vector<unique_ptr<TupleDataCollection>> &PartitionedTupleData::GetPartitions() {
|
293
|
+
return partitions;
|
294
|
+
}
|
295
|
+
|
296
|
+
idx_t PartitionedTupleData::Count() const {
|
297
|
+
idx_t total_count = 0;
|
298
|
+
for (auto &partition : partitions) {
|
299
|
+
total_count += partition->Count();
|
300
|
+
}
|
301
|
+
return total_count;
|
302
|
+
}
|
303
|
+
|
304
|
+
idx_t PartitionedTupleData::SizeInBytes() const {
|
305
|
+
idx_t total_size = 0;
|
306
|
+
for (auto &partition : partitions) {
|
307
|
+
total_size += partition->SizeInBytes();
|
308
|
+
}
|
309
|
+
return total_size;
|
310
|
+
}
|
311
|
+
|
312
|
+
void PartitionedTupleData::CreateAllocator() {
|
313
|
+
allocators->allocators.emplace_back(make_shared<TupleDataAllocator>(buffer_manager, layout));
|
314
|
+
}
|
315
|
+
|
316
|
+
} // namespace duckdb
|
@@ -1,7 +1,7 @@
|
|
1
|
-
#include "duckdb/common/types/row_data_collection_scanner.hpp"
|
1
|
+
#include "duckdb/common/types/row/row_data_collection_scanner.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
4
|
-
#include "duckdb/common/types/row_data_collection.hpp"
|
4
|
+
#include "duckdb/common/types/row/row_data_collection.hpp"
|
5
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
6
6
|
|
7
7
|
namespace duckdb {
|
@@ -6,7 +6,7 @@
|
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
|
9
|
-
#include "duckdb/common/types/row_layout.hpp"
|
9
|
+
#include "duckdb/common/types/row/row_layout.hpp"
|
10
10
|
|
11
11
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
12
12
|
|