duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -1,9 +1,7 @@
|
|
1
1
|
#include "duckdb/common/radix_partitioning.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/common/
|
4
|
-
#include "duckdb/common/types/
|
5
|
-
#include "duckdb/common/types/row_data_collection.hpp"
|
6
|
-
#include "duckdb/common/types/row_layout.hpp"
|
3
|
+
#include "duckdb/common/types/column/partitioned_column_data.hpp"
|
4
|
+
#include "duckdb/common/types/row/row_data_collection.hpp"
|
7
5
|
#include "duckdb/common/types/vector.hpp"
|
8
6
|
#include "duckdb/common/vector_operations/binary_executor.hpp"
|
9
7
|
#include "duckdb/common/vector_operations/unary_executor.hpp"
|
@@ -11,7 +9,7 @@
|
|
11
9
|
namespace duckdb {
|
12
10
|
|
13
11
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
14
|
-
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...
|
12
|
+
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
|
15
13
|
D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
|
16
14
|
switch (radix_bits) {
|
17
15
|
case 1:
|
@@ -39,64 +37,6 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
|
|
39
37
|
}
|
40
38
|
}
|
41
39
|
|
42
|
-
template <class OP, class RETURN_TYPE, idx_t radix_bits_1, typename... ARGS>
|
43
|
-
RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
|
44
|
-
D_ASSERT(radix_bits_2 <= sizeof(hash_t) * 8);
|
45
|
-
switch (radix_bits_2) {
|
46
|
-
case 1:
|
47
|
-
return OP::template Operation<radix_bits_1, 1>(std::forward<ARGS>(args)...);
|
48
|
-
case 2:
|
49
|
-
return OP::template Operation<radix_bits_1, 2>(std::forward<ARGS>(args)...);
|
50
|
-
case 3:
|
51
|
-
return OP::template Operation<radix_bits_1, 3>(std::forward<ARGS>(args)...);
|
52
|
-
case 4:
|
53
|
-
return OP::template Operation<radix_bits_1, 4>(std::forward<ARGS>(args)...);
|
54
|
-
case 5:
|
55
|
-
return OP::template Operation<radix_bits_1, 5>(std::forward<ARGS>(args)...);
|
56
|
-
case 6:
|
57
|
-
return OP::template Operation<radix_bits_1, 6>(std::forward<ARGS>(args)...);
|
58
|
-
case 7:
|
59
|
-
return OP::template Operation<radix_bits_1, 7>(std::forward<ARGS>(args)...);
|
60
|
-
case 8:
|
61
|
-
return OP::template Operation<radix_bits_1, 8>(std::forward<ARGS>(args)...);
|
62
|
-
case 9:
|
63
|
-
return OP::template Operation<radix_bits_1, 9>(std::forward<ARGS>(args)...);
|
64
|
-
case 10:
|
65
|
-
return OP::template Operation<radix_bits_1, 10>(std::forward<ARGS>(args)...);
|
66
|
-
default:
|
67
|
-
throw InternalException("TODO");
|
68
|
-
}
|
69
|
-
}
|
70
|
-
|
71
|
-
template <class OP, class RETURN_TYPE, typename... ARGS>
|
72
|
-
RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&... args) {
|
73
|
-
D_ASSERT(radix_bits_1 <= sizeof(hash_t) * 8);
|
74
|
-
switch (radix_bits_1) {
|
75
|
-
case 1:
|
76
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 1>(radix_bits_2, std::forward<ARGS>(args)...);
|
77
|
-
case 2:
|
78
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 2>(radix_bits_2, std::forward<ARGS>(args)...);
|
79
|
-
case 3:
|
80
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 3>(radix_bits_2, std::forward<ARGS>(args)...);
|
81
|
-
case 4:
|
82
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 4>(radix_bits_2, std::forward<ARGS>(args)...);
|
83
|
-
case 5:
|
84
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 5>(radix_bits_2, std::forward<ARGS>(args)...);
|
85
|
-
case 6:
|
86
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 6>(radix_bits_2, std::forward<ARGS>(args)...);
|
87
|
-
case 7:
|
88
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 7>(radix_bits_2, std::forward<ARGS>(args)...);
|
89
|
-
case 8:
|
90
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 8>(radix_bits_2, std::forward<ARGS>(args)...);
|
91
|
-
case 9:
|
92
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 9>(radix_bits_2, std::forward<ARGS>(args)...);
|
93
|
-
case 10:
|
94
|
-
return DoubleRadixBitsSwitch2<OP, RETURN_TYPE, 10>(radix_bits_2, std::forward<ARGS>(args)...);
|
95
|
-
default:
|
96
|
-
throw InternalException("TODO");
|
97
|
-
}
|
98
|
-
}
|
99
|
-
|
100
40
|
template <idx_t radix_bits>
|
101
41
|
struct RadixLessThan {
|
102
42
|
static inline bool Operation(hash_t hash, hash_t cutoff) {
|
@@ -120,303 +60,16 @@ idx_t RadixPartitioning::Select(Vector &hashes, const SelectionVector *sel, idx_
|
|
120
60
|
return RadixBitsSwitch<SelectFunctor, idx_t>(radix_bits, hashes, sel, count, cutoff, true_sel, false_sel);
|
121
61
|
}
|
122
62
|
|
123
|
-
|
124
|
-
// Row Data Partitioning
|
125
|
-
//===--------------------------------------------------------------------===//
|
126
|
-
template <idx_t radix_bits>
|
127
|
-
static void InitPartitions(BufferManager &buffer_manager, vector<unique_ptr<RowDataCollection>> &partition_collections,
|
128
|
-
RowDataBlock *partition_blocks[], vector<BufferHandle> &partition_handles,
|
129
|
-
data_ptr_t partition_ptrs[], idx_t block_capacity, idx_t row_width) {
|
130
|
-
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
|
131
|
-
|
132
|
-
partition_collections.reserve(CONSTANTS::NUM_PARTITIONS);
|
133
|
-
partition_handles.reserve(CONSTANTS::NUM_PARTITIONS);
|
134
|
-
for (idx_t i = 0; i < CONSTANTS::NUM_PARTITIONS; i++) {
|
135
|
-
partition_collections.push_back(make_uniq<RowDataCollection>(buffer_manager, block_capacity, row_width));
|
136
|
-
partition_blocks[i] = &partition_collections[i]->CreateBlock();
|
137
|
-
partition_handles.push_back(buffer_manager.Pin(partition_blocks[i]->block));
|
138
|
-
if (partition_ptrs) {
|
139
|
-
partition_ptrs[i] = partition_handles[i].Ptr();
|
140
|
-
}
|
141
|
-
}
|
142
|
-
}
|
143
|
-
|
144
|
-
struct PartitionFunctor {
|
63
|
+
struct ComputePartitionIndicesFunctor {
|
145
64
|
template <idx_t radix_bits>
|
146
|
-
static void Operation(
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
const auto block_capacity = block_collection.block_capacity;
|
153
|
-
const auto row_width = layout.GetRowWidth();
|
154
|
-
const auto has_heap = !layout.AllConstant();
|
155
|
-
|
156
|
-
block_collection.VerifyBlockSizes();
|
157
|
-
string_heap.VerifyBlockSizes();
|
158
|
-
|
159
|
-
// Fixed-size data
|
160
|
-
RowDataBlock *partition_data_blocks[CONSTANTS::NUM_PARTITIONS];
|
161
|
-
vector<BufferHandle> partition_data_handles;
|
162
|
-
data_ptr_t partition_data_ptrs[CONSTANTS::NUM_PARTITIONS];
|
163
|
-
InitPartitions<radix_bits>(buffer_manager, partition_block_collections, partition_data_blocks,
|
164
|
-
partition_data_handles, partition_data_ptrs, block_capacity, row_width);
|
165
|
-
|
166
|
-
// Variable-size data
|
167
|
-
RowDataBlock *partition_heap_blocks[CONSTANTS::NUM_PARTITIONS];
|
168
|
-
vector<BufferHandle> partition_heap_handles;
|
169
|
-
if (has_heap) {
|
170
|
-
InitPartitions<radix_bits>(buffer_manager, partition_string_heaps, partition_heap_blocks,
|
171
|
-
partition_heap_handles, nullptr, (idx_t)Storage::BLOCK_SIZE, 1);
|
172
|
-
}
|
173
|
-
|
174
|
-
// We track the count of the current block for each partition in this array
|
175
|
-
uint32_t block_counts[CONSTANTS::NUM_PARTITIONS];
|
176
|
-
memset(block_counts, 0, sizeof(block_counts));
|
177
|
-
|
178
|
-
// Allocate "SWWCB" temporary buffer
|
179
|
-
auto temp_buf_ptr =
|
180
|
-
unique_ptr<data_t[]>(new data_t[CONSTANTS::TMP_BUF_SIZE * CONSTANTS::NUM_PARTITIONS * row_width]);
|
181
|
-
const auto tmp_buf = temp_buf_ptr.get();
|
182
|
-
|
183
|
-
// Initialize temporary buffer offsets
|
184
|
-
uint32_t pos[CONSTANTS::NUM_PARTITIONS];
|
185
|
-
for (uint32_t idx = 0; idx < CONSTANTS::NUM_PARTITIONS; idx++) {
|
186
|
-
pos[idx] = idx * CONSTANTS::TMP_BUF_SIZE;
|
187
|
-
}
|
188
|
-
|
189
|
-
auto &data_blocks = block_collection.blocks;
|
190
|
-
auto &heap_blocks = string_heap.blocks;
|
191
|
-
for (idx_t block_idx_plus_one = data_blocks.size(); block_idx_plus_one > 0; block_idx_plus_one--) {
|
192
|
-
// We loop through blocks in reverse to save some of that PRECIOUS I/O
|
193
|
-
idx_t block_idx = block_idx_plus_one - 1;
|
194
|
-
|
195
|
-
RowDataBlock *data_block;
|
196
|
-
BufferHandle data_handle;
|
197
|
-
data_ptr_t data_ptr;
|
198
|
-
PinAndSet(buffer_manager, *data_blocks[block_idx], &data_block, data_handle, data_ptr);
|
199
|
-
|
200
|
-
// Pin the heap block (if necessary)
|
201
|
-
RowDataBlock *heap_block;
|
202
|
-
BufferHandle heap_handle;
|
203
|
-
if (has_heap) {
|
204
|
-
heap_block = heap_blocks[block_idx].get();
|
205
|
-
heap_handle = buffer_manager.Pin(heap_block->block);
|
206
|
-
}
|
207
|
-
|
208
|
-
idx_t remaining = data_block->count;
|
209
|
-
while (remaining != 0) {
|
210
|
-
const auto next = MinValue<idx_t>(remaining, STANDARD_VECTOR_SIZE);
|
211
|
-
|
212
|
-
if (has_heap) {
|
213
|
-
// Unswizzle so that the rows that we copy have a pointer to their heap rows
|
214
|
-
RowOperations::UnswizzleHeapPointer(layout, data_ptr, heap_handle.Ptr(), next);
|
215
|
-
}
|
216
|
-
|
217
|
-
for (idx_t i = 0; i < next; i++) {
|
218
|
-
const auto bin = CONSTANTS::ApplyMask(Load<hash_t>(data_ptr + hash_offset));
|
219
|
-
|
220
|
-
// Write entry to bin in temp buf
|
221
|
-
FastMemcpy(tmp_buf + pos[bin] * row_width, data_ptr, row_width);
|
222
|
-
data_ptr += row_width;
|
223
|
-
|
224
|
-
if ((++pos[bin] & (CONSTANTS::TMP_BUF_SIZE - 1)) == 0) {
|
225
|
-
// Temp buf for this bin is full, flush temp buf to partition
|
226
|
-
auto &block_count = block_counts[bin];
|
227
|
-
FlushTempBuf(partition_data_ptrs[bin], row_width, block_count, tmp_buf, pos[bin],
|
228
|
-
CONSTANTS::TMP_BUF_SIZE);
|
229
|
-
D_ASSERT(block_count <= block_capacity);
|
230
|
-
if (block_count + CONSTANTS::TMP_BUF_SIZE > block_capacity) {
|
231
|
-
// The block can't fit the next flush of the temp buf
|
232
|
-
partition_data_blocks[bin]->count = block_count;
|
233
|
-
if (has_heap) {
|
234
|
-
// Write last bit of heap data
|
235
|
-
PartitionHeap(buffer_manager, layout, *partition_string_heaps[bin],
|
236
|
-
*partition_data_blocks[bin], partition_data_ptrs[bin],
|
237
|
-
*partition_heap_blocks[bin], partition_heap_handles[bin]);
|
238
|
-
}
|
239
|
-
// Now we can create new blocks for this partition
|
240
|
-
CreateNewBlock(buffer_manager, has_heap, partition_block_collections, partition_data_blocks,
|
241
|
-
partition_data_handles, partition_data_ptrs, partition_string_heaps,
|
242
|
-
partition_heap_blocks, partition_heap_handles, block_counts, bin);
|
243
|
-
}
|
244
|
-
}
|
245
|
-
}
|
246
|
-
remaining -= next;
|
247
|
-
}
|
248
|
-
|
249
|
-
// We are done with this input block
|
250
|
-
for (idx_t bin = 0; bin < CONSTANTS::NUM_PARTITIONS; bin++) {
|
251
|
-
auto count = pos[bin] & (CONSTANTS::TMP_BUF_SIZE - 1);
|
252
|
-
if (count != 0) {
|
253
|
-
// Clean up the temporary buffer
|
254
|
-
FlushTempBuf(partition_data_ptrs[bin], row_width, block_counts[bin], tmp_buf, pos[bin], count);
|
255
|
-
}
|
256
|
-
D_ASSERT(block_counts[bin] <= block_capacity);
|
257
|
-
partition_data_blocks[bin]->count = block_counts[bin];
|
258
|
-
if (has_heap) {
|
259
|
-
// Write heap data so we can safely unpin the current input heap block
|
260
|
-
PartitionHeap(buffer_manager, layout, *partition_string_heaps[bin], *partition_data_blocks[bin],
|
261
|
-
partition_data_ptrs[bin], *partition_heap_blocks[bin], partition_heap_handles[bin]);
|
262
|
-
}
|
263
|
-
if (block_counts[bin] + CONSTANTS::TMP_BUF_SIZE > block_capacity) {
|
264
|
-
// The block can't fit the next flush of the temp buf
|
265
|
-
CreateNewBlock(buffer_manager, has_heap, partition_block_collections, partition_data_blocks,
|
266
|
-
partition_data_handles, partition_data_ptrs, partition_string_heaps,
|
267
|
-
partition_heap_blocks, partition_heap_handles, block_counts, bin);
|
268
|
-
}
|
269
|
-
}
|
270
|
-
|
271
|
-
// Delete references to the input block we just finished processing to free up memory
|
272
|
-
data_blocks[block_idx] = nullptr;
|
273
|
-
if (has_heap) {
|
274
|
-
heap_blocks[block_idx] = nullptr;
|
275
|
-
}
|
276
|
-
}
|
277
|
-
|
278
|
-
// Update counts
|
279
|
-
for (idx_t bin = 0; bin < CONSTANTS::NUM_PARTITIONS; bin++) {
|
280
|
-
partition_block_collections[bin]->count += block_counts[bin];
|
281
|
-
if (has_heap) {
|
282
|
-
partition_string_heaps[bin]->count += block_counts[bin];
|
283
|
-
}
|
284
|
-
}
|
285
|
-
|
286
|
-
// Input data collections are empty, reset them
|
287
|
-
block_collection.Clear();
|
288
|
-
string_heap.Clear();
|
289
|
-
|
290
|
-
#ifdef DEBUG
|
291
|
-
for (idx_t bin = 0; bin < CONSTANTS::NUM_PARTITIONS; bin++) {
|
292
|
-
auto &p_block_collection = *partition_block_collections[bin];
|
293
|
-
p_block_collection.VerifyBlockSizes();
|
294
|
-
if (!layout.AllConstant()) {
|
295
|
-
partition_string_heaps[bin]->VerifyBlockSizes();
|
296
|
-
}
|
297
|
-
idx_t p_count = 0;
|
298
|
-
for (idx_t b = 0; b < p_block_collection.blocks.size(); b++) {
|
299
|
-
auto &data_block = *p_block_collection.blocks[b];
|
300
|
-
p_count += data_block.count;
|
301
|
-
if (!layout.AllConstant()) {
|
302
|
-
auto &p_string_heap = *partition_string_heaps[bin];
|
303
|
-
D_ASSERT(p_block_collection.blocks.size() == p_string_heap.blocks.size());
|
304
|
-
auto &heap_block = *p_string_heap.blocks[b];
|
305
|
-
D_ASSERT(data_block.count == heap_block.count);
|
306
|
-
}
|
307
|
-
}
|
308
|
-
D_ASSERT(p_count == p_block_collection.count);
|
309
|
-
}
|
310
|
-
#endif
|
311
|
-
}
|
312
|
-
|
313
|
-
static inline void FlushTempBuf(data_ptr_t &data_ptr, const idx_t &row_width, uint32_t &block_count,
|
314
|
-
const data_ptr_t &tmp_buf, uint32_t &pos, const idx_t count) {
|
315
|
-
pos -= count;
|
316
|
-
FastMemcpy(data_ptr, tmp_buf + pos * row_width, count * row_width);
|
317
|
-
data_ptr += count * row_width;
|
318
|
-
block_count += count;
|
319
|
-
}
|
320
|
-
|
321
|
-
static inline void CreateNewBlock(BufferManager &buffer_manager, const bool &has_heap,
|
322
|
-
vector<unique_ptr<RowDataCollection>> &partition_block_collections,
|
323
|
-
RowDataBlock *partition_data_blocks[],
|
324
|
-
vector<BufferHandle> &partition_data_handles, data_ptr_t partition_data_ptrs[],
|
325
|
-
vector<unique_ptr<RowDataCollection>> &partition_string_heaps,
|
326
|
-
RowDataBlock *partition_heap_blocks[],
|
327
|
-
vector<BufferHandle> &partition_heap_handles, uint32_t block_counts[],
|
328
|
-
const idx_t &bin) {
|
329
|
-
D_ASSERT(partition_data_blocks[bin]->count == block_counts[bin]);
|
330
|
-
partition_block_collections[bin]->count += block_counts[bin];
|
331
|
-
PinAndSet(buffer_manager, partition_block_collections[bin]->CreateBlock(), &partition_data_blocks[bin],
|
332
|
-
partition_data_handles[bin], partition_data_ptrs[bin]);
|
333
|
-
|
334
|
-
if (has_heap) {
|
335
|
-
partition_string_heaps[bin]->count += block_counts[bin];
|
336
|
-
|
337
|
-
auto &p_heap_block = *partition_heap_blocks[bin];
|
338
|
-
// Set a new heap block
|
339
|
-
if (p_heap_block.byte_offset != p_heap_block.capacity) {
|
340
|
-
// More data fits on the heap block, just copy (reference) the block
|
341
|
-
partition_string_heaps[bin]->blocks.push_back(partition_heap_blocks[bin]->Copy());
|
342
|
-
partition_string_heaps[bin]->blocks.back()->count = 0;
|
343
|
-
} else {
|
344
|
-
// Heap block is full, create a new one
|
345
|
-
partition_string_heaps[bin]->CreateBlock();
|
346
|
-
}
|
347
|
-
|
348
|
-
partition_heap_blocks[bin] = partition_string_heaps[bin]->blocks.back().get();
|
349
|
-
partition_heap_handles[bin] = buffer_manager.Pin(partition_heap_blocks[bin]->block);
|
350
|
-
}
|
351
|
-
|
352
|
-
block_counts[bin] = 0;
|
353
|
-
}
|
354
|
-
|
355
|
-
static inline void PinAndSet(BufferManager &buffer_manager, RowDataBlock &block, RowDataBlock **block_ptr,
|
356
|
-
BufferHandle &handle, data_ptr_t &ptr) {
|
357
|
-
*block_ptr = █
|
358
|
-
handle = buffer_manager.Pin(block.block);
|
359
|
-
ptr = handle.Ptr();
|
360
|
-
}
|
361
|
-
|
362
|
-
static inline void PartitionHeap(BufferManager &buffer_manager, const RowLayout &layout,
|
363
|
-
RowDataCollection &string_heap, RowDataBlock &data_block,
|
364
|
-
const data_ptr_t data_ptr, RowDataBlock &heap_block, BufferHandle &heap_handle) {
|
365
|
-
D_ASSERT(!layout.AllConstant());
|
366
|
-
D_ASSERT(heap_block.block == heap_handle.GetBlockHandle());
|
367
|
-
D_ASSERT(data_block.count >= heap_block.count);
|
368
|
-
const auto count = data_block.count - heap_block.count;
|
369
|
-
if (count == 0) {
|
370
|
-
return;
|
371
|
-
}
|
372
|
-
const auto row_width = layout.GetRowWidth();
|
373
|
-
const auto base_row_ptr = data_ptr - count * row_width;
|
374
|
-
|
375
|
-
// Compute size of remaining heap rows
|
376
|
-
idx_t size = 0;
|
377
|
-
auto row_ptr = base_row_ptr + layout.GetHeapOffset();
|
378
|
-
for (idx_t i = 0; i < count; i++) {
|
379
|
-
size += Load<uint32_t>(Load<data_ptr_t>(row_ptr));
|
380
|
-
row_ptr += row_width;
|
381
|
-
}
|
382
|
-
|
383
|
-
// Resize block if it doesn't fit
|
384
|
-
auto required_size = heap_block.byte_offset + size;
|
385
|
-
if (required_size > heap_block.capacity) {
|
386
|
-
buffer_manager.ReAllocate(heap_block.block, required_size);
|
387
|
-
heap_block.capacity = required_size;
|
388
|
-
}
|
389
|
-
auto heap_ptr = heap_handle.Ptr() + heap_block.byte_offset;
|
390
|
-
|
391
|
-
#ifdef DEBUG
|
392
|
-
if (data_block.count > count) {
|
393
|
-
auto previous_row_heap_offset = Load<idx_t>(base_row_ptr - layout.GetRowWidth() + layout.GetHeapOffset());
|
394
|
-
auto previous_row_heap_ptr = heap_handle.Ptr() + previous_row_heap_offset;
|
395
|
-
auto current_heap_ptr = previous_row_heap_ptr + Load<uint32_t>(previous_row_heap_ptr);
|
396
|
-
D_ASSERT(current_heap_ptr == heap_ptr);
|
397
|
-
}
|
398
|
-
#endif
|
399
|
-
|
400
|
-
// Copy corresponding heap rows, swizzle, and update counts
|
401
|
-
RowOperations::CopyHeapAndSwizzle(layout, base_row_ptr, heap_handle.Ptr(), heap_ptr, count);
|
402
|
-
heap_block.count += count;
|
403
|
-
heap_block.byte_offset += size;
|
404
|
-
D_ASSERT(data_block.count == heap_block.count);
|
405
|
-
D_ASSERT(heap_ptr + size == heap_handle.Ptr() + heap_block.byte_offset);
|
406
|
-
D_ASSERT(heap_ptr <= heap_handle.Ptr() + heap_block.capacity);
|
65
|
+
static void Operation(Vector &hashes, Vector &partition_indices, idx_t count) {
|
66
|
+
UnaryExecutor::Execute<hash_t, hash_t>(hashes, partition_indices, count, [&](hash_t hash) {
|
67
|
+
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
|
68
|
+
return CONSTANTS::ApplyMask(hash);
|
69
|
+
});
|
407
70
|
}
|
408
71
|
};
|
409
72
|
|
410
|
-
void RadixPartitioning::PartitionRowData(BufferManager &buffer_manager, const RowLayout &layout,
|
411
|
-
const idx_t hash_offset, RowDataCollection &block_collection,
|
412
|
-
RowDataCollection &string_heap,
|
413
|
-
vector<unique_ptr<RowDataCollection>> &partition_block_collections,
|
414
|
-
vector<unique_ptr<RowDataCollection>> &partition_string_heaps,
|
415
|
-
idx_t radix_bits) {
|
416
|
-
return RadixBitsSwitch<PartitionFunctor, void>(radix_bits, buffer_manager, layout, hash_offset, block_collection,
|
417
|
-
string_heap, partition_block_collections, partition_string_heaps);
|
418
|
-
}
|
419
|
-
|
420
73
|
//===--------------------------------------------------------------------===//
|
421
74
|
// Column Data Partitioning
|
422
75
|
//===--------------------------------------------------------------------===//
|
@@ -435,7 +88,6 @@ RadixPartitionedColumnData::RadixPartitionedColumnData(ClientContext &context_p,
|
|
435
88
|
|
436
89
|
RadixPartitionedColumnData::RadixPartitionedColumnData(const RadixPartitionedColumnData &other)
|
437
90
|
: PartitionedColumnData(other), radix_bits(other.radix_bits), hash_col_idx(other.hash_col_idx) {
|
438
|
-
|
439
91
|
for (idx_t i = 0; i < RadixPartitioning::NumberOfPartitions(radix_bits); i++) {
|
440
92
|
partitions.emplace_back(CreatePartitionCollection(i));
|
441
93
|
}
|
@@ -446,26 +98,15 @@ RadixPartitionedColumnData::~RadixPartitionedColumnData() {
|
|
446
98
|
|
447
99
|
void RadixPartitionedColumnData::InitializeAppendStateInternal(PartitionedColumnDataAppendState &state) const {
|
448
100
|
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
449
|
-
state.partition_buffers.reserve(num_partitions);
|
450
101
|
state.partition_append_states.reserve(num_partitions);
|
102
|
+
state.partition_buffers.reserve(num_partitions);
|
451
103
|
for (idx_t i = 0; i < num_partitions; i++) {
|
452
|
-
// TODO only initialize the append if partition idx > ...
|
453
104
|
state.partition_append_states.emplace_back(make_uniq<ColumnDataAppendState>());
|
454
105
|
partitions[i]->InitializeAppend(*state.partition_append_states[i]);
|
455
106
|
state.partition_buffers.emplace_back(CreatePartitionBuffer());
|
456
107
|
}
|
457
108
|
}
|
458
109
|
|
459
|
-
struct ComputePartitionIndicesFunctor {
|
460
|
-
template <idx_t radix_bits>
|
461
|
-
static void Operation(Vector &hashes, Vector &partition_indices, idx_t count) {
|
462
|
-
UnaryExecutor::Execute<hash_t, hash_t>(hashes, partition_indices, count, [&](hash_t hash) {
|
463
|
-
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
|
464
|
-
return CONSTANTS::ApplyMask(hash);
|
465
|
-
});
|
466
|
-
}
|
467
|
-
};
|
468
|
-
|
469
110
|
void RadixPartitionedColumnData::ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) {
|
470
111
|
D_ASSERT(partitions.size() == RadixPartitioning::NumberOfPartitions(radix_bits));
|
471
112
|
D_ASSERT(state.partition_buffers.size() == RadixPartitioning::NumberOfPartitions(radix_bits));
|
@@ -473,4 +114,95 @@ void RadixPartitionedColumnData::ComputePartitionIndices(PartitionedColumnDataAp
|
|
473
114
|
input.size());
|
474
115
|
}
|
475
116
|
|
117
|
+
//===--------------------------------------------------------------------===//
|
118
|
+
// Tuple Data Partitioning
|
119
|
+
//===--------------------------------------------------------------------===//
|
120
|
+
RadixPartitionedTupleData::RadixPartitionedTupleData(BufferManager &buffer_manager, const TupleDataLayout &layout_p,
|
121
|
+
idx_t radix_bits_p, idx_t hash_col_idx_p)
|
122
|
+
: PartitionedTupleData(PartitionedTupleDataType::RADIX, buffer_manager, layout_p.Copy()), radix_bits(radix_bits_p),
|
123
|
+
hash_col_idx(hash_col_idx_p) {
|
124
|
+
D_ASSERT(hash_col_idx < layout.GetTypes().size());
|
125
|
+
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
126
|
+
allocators->allocators.reserve(num_partitions);
|
127
|
+
for (idx_t i = 0; i < num_partitions; i++) {
|
128
|
+
CreateAllocator();
|
129
|
+
}
|
130
|
+
D_ASSERT(allocators->allocators.size() == num_partitions);
|
131
|
+
Initialize();
|
132
|
+
}
|
133
|
+
|
134
|
+
RadixPartitionedTupleData::RadixPartitionedTupleData(const RadixPartitionedTupleData &other)
|
135
|
+
: PartitionedTupleData(other), radix_bits(other.radix_bits), hash_col_idx(other.hash_col_idx) {
|
136
|
+
Initialize();
|
137
|
+
}
|
138
|
+
|
139
|
+
RadixPartitionedTupleData::~RadixPartitionedTupleData() {
|
140
|
+
}
|
141
|
+
|
142
|
+
void RadixPartitionedTupleData::Initialize() {
|
143
|
+
for (idx_t i = 0; i < RadixPartitioning::NumberOfPartitions(radix_bits); i++) {
|
144
|
+
partitions.emplace_back(CreatePartitionCollection(i));
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
void RadixPartitionedTupleData::InitializeAppendStateInternal(PartitionedTupleDataAppendState &state,
|
149
|
+
TupleDataPinProperties properties) const {
|
150
|
+
// Init pin state per partition
|
151
|
+
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
152
|
+
state.partition_pin_states.reserve(num_partitions);
|
153
|
+
for (idx_t i = 0; i < num_partitions; i++) {
|
154
|
+
state.partition_pin_states.emplace_back(make_uniq<TupleDataPinState>());
|
155
|
+
partitions[i]->InitializeAppend(*state.partition_pin_states[i], properties);
|
156
|
+
}
|
157
|
+
|
158
|
+
// Init single chunk state
|
159
|
+
auto column_count = layout.ColumnCount();
|
160
|
+
vector<column_t> column_ids;
|
161
|
+
column_ids.reserve(column_count);
|
162
|
+
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
163
|
+
column_ids.emplace_back(col_idx);
|
164
|
+
}
|
165
|
+
partitions[0]->InitializeAppend(state.chunk_state, std::move(column_ids));
|
166
|
+
}
|
167
|
+
|
168
|
+
void RadixPartitionedTupleData::ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) {
|
169
|
+
D_ASSERT(partitions.size() == RadixPartitioning::NumberOfPartitions(radix_bits));
|
170
|
+
RadixBitsSwitch<ComputePartitionIndicesFunctor, void>(radix_bits, input.data[hash_col_idx], state.partition_indices,
|
171
|
+
input.size());
|
172
|
+
}
|
173
|
+
|
174
|
+
void RadixPartitionedTupleData::ComputePartitionIndices(Vector &row_locations, idx_t count,
|
175
|
+
Vector &partition_indices) const {
|
176
|
+
Vector intermediate(LogicalType::HASH);
|
177
|
+
partitions[0]->Gather(row_locations, *FlatVector::IncrementalSelectionVector(), count, hash_col_idx, intermediate,
|
178
|
+
*FlatVector::IncrementalSelectionVector());
|
179
|
+
RadixBitsSwitch<ComputePartitionIndicesFunctor, void>(radix_bits, intermediate, partition_indices, count);
|
180
|
+
}
|
181
|
+
|
182
|
+
void RadixPartitionedTupleData::RepartitionFinalizeStates(PartitionedTupleData &old_partitioned_data,
|
183
|
+
PartitionedTupleData &new_partitioned_data,
|
184
|
+
PartitionedTupleDataAppendState &state,
|
185
|
+
idx_t finished_partition_idx) const {
|
186
|
+
D_ASSERT(old_partitioned_data.GetType() == PartitionedTupleDataType::RADIX &&
|
187
|
+
new_partitioned_data.GetType() == PartitionedTupleDataType::RADIX);
|
188
|
+
const auto &old_radix_partitions = (RadixPartitionedTupleData &)old_partitioned_data;
|
189
|
+
const auto &new_radix_partitions = (RadixPartitionedTupleData &)new_partitioned_data;
|
190
|
+
const auto old_radix_bits = old_radix_partitions.GetRadixBits();
|
191
|
+
const auto new_radix_bits = new_radix_partitions.GetRadixBits();
|
192
|
+
D_ASSERT(new_radix_bits > old_radix_bits);
|
193
|
+
|
194
|
+
// We take the most significant digits as the partition index
|
195
|
+
// When repartitioning, e.g., partition 0 from "old" goes into the first N partitions in "new"
|
196
|
+
// When partition 0 is done, we can already finalize the append states, unpinning blocks
|
197
|
+
const auto multiplier = RadixPartitioning::NumberOfPartitions(new_radix_bits - old_radix_bits);
|
198
|
+
const auto from_idx = finished_partition_idx * multiplier;
|
199
|
+
const auto to_idx = from_idx + multiplier;
|
200
|
+
auto &partitions = new_partitioned_data.GetPartitions();
|
201
|
+
for (idx_t partition_index = from_idx; partition_index < to_idx; partition_index++) {
|
202
|
+
auto &partition = *partitions[partition_index];
|
203
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
204
|
+
partition.FinalizePinState(partition_pin_state);
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
476
208
|
} // namespace duckdb
|
@@ -5,16 +5,15 @@
|
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
|
-
#include "duckdb/common/row_operations/row_operations.hpp"
|
9
|
-
|
10
|
-
#include "duckdb/common/types/row_layout.hpp"
|
11
8
|
#include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp"
|
12
|
-
#include "duckdb/
|
9
|
+
#include "duckdb/common/row_operations/row_operations.hpp"
|
10
|
+
#include "duckdb/common/types/row/tuple_data_layout.hpp"
|
13
11
|
#include "duckdb/execution/operator/aggregate/aggregate_object.hpp"
|
14
12
|
|
15
13
|
namespace duckdb {
|
16
14
|
|
17
|
-
void RowOperations::InitializeStates(
|
15
|
+
void RowOperations::InitializeStates(TupleDataLayout &layout, Vector &addresses, const SelectionVector &sel,
|
16
|
+
idx_t count) {
|
18
17
|
if (count == 0) {
|
19
18
|
return;
|
20
19
|
}
|
@@ -32,7 +31,7 @@ void RowOperations::InitializeStates(RowLayout &layout, Vector &addresses, const
|
|
32
31
|
}
|
33
32
|
}
|
34
33
|
|
35
|
-
void RowOperations::DestroyStates(RowOperationsState &state,
|
34
|
+
void RowOperations::DestroyStates(RowOperationsState &state, TupleDataLayout &layout, Vector &addresses, idx_t count) {
|
36
35
|
if (count == 0) {
|
37
36
|
return;
|
38
37
|
}
|
@@ -68,7 +67,7 @@ void RowOperations::UpdateFilteredStates(RowOperationsState &state, AggregateFil
|
|
68
67
|
UpdateStates(state, aggr, filtered_addresses, filter_data.filtered_payload, arg_idx, count);
|
69
68
|
}
|
70
69
|
|
71
|
-
void RowOperations::CombineStates(RowOperationsState &state,
|
70
|
+
void RowOperations::CombineStates(RowOperationsState &state, TupleDataLayout &layout, Vector &sources, Vector &targets,
|
72
71
|
idx_t count) {
|
73
72
|
if (count == 0) {
|
74
73
|
return;
|
@@ -88,8 +87,8 @@ void RowOperations::CombineStates(RowOperationsState &state, RowLayout &layout,
|
|
88
87
|
}
|
89
88
|
}
|
90
89
|
|
91
|
-
void RowOperations::FinalizeStates(RowOperationsState &state,
|
92
|
-
idx_t aggr_idx) {
|
90
|
+
void RowOperations::FinalizeStates(RowOperationsState &state, TupleDataLayout &layout, Vector &addresses,
|
91
|
+
DataChunk &result, idx_t aggr_idx) {
|
93
92
|
// Move to the first aggregate state
|
94
93
|
VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size());
|
95
94
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
9
|
-
#include "duckdb/common/types/row_layout.hpp"
|
9
|
+
#include "duckdb/common/types/row/row_layout.hpp"
|
10
10
|
|
11
11
|
namespace duckdb {
|
12
12
|
|
@@ -6,8 +6,9 @@
|
|
6
6
|
#include "duckdb/common/exception.hpp"
|
7
7
|
#include "duckdb/common/operator/constant_operators.hpp"
|
8
8
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
9
|
-
#include "duckdb/common/types/row_data_collection.hpp"
|
10
|
-
#include "duckdb/common/types/row_layout.hpp"
|
9
|
+
#include "duckdb/common/types/row/row_data_collection.hpp"
|
10
|
+
#include "duckdb/common/types/row/row_layout.hpp"
|
11
|
+
#include "duckdb/common/types/row/tuple_data_layout.hpp"
|
11
12
|
|
12
13
|
namespace duckdb {
|
13
14
|
|
@@ -193,7 +194,8 @@ static void TemplatedFullScanLoop(Vector &rows, Vector &col, idx_t count, idx_t
|
|
193
194
|
}
|
194
195
|
}
|
195
196
|
|
196
|
-
void RowOperations::FullScanColumn(const
|
197
|
+
void RowOperations::FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count,
|
198
|
+
idx_t col_no) {
|
197
199
|
const auto col_offset = layout.GetOffsets()[col_no];
|
198
200
|
col.SetVectorType(VectorType::FLAT_VECTOR);
|
199
201
|
switch (col.GetType().InternalType()) {
|