duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -3,9 +3,10 @@
|
|
3
3
|
#include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp"
|
4
4
|
#include "duckdb/common/algorithm.hpp"
|
5
5
|
#include "duckdb/common/exception.hpp"
|
6
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
6
7
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
7
8
|
#include "duckdb/common/types/null_value.hpp"
|
8
|
-
#include "duckdb/common/types/
|
9
|
+
#include "duckdb/common/types/row/tuple_data_iterator.hpp"
|
9
10
|
#include "duckdb/common/vector_operations/unary_executor.hpp"
|
10
11
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
11
12
|
#include "duckdb/execution/expression_executor.hpp"
|
@@ -16,7 +17,7 @@
|
|
16
17
|
|
17
18
|
namespace duckdb {
|
18
19
|
|
19
|
-
using ValidityBytes =
|
20
|
+
using ValidityBytes = TupleDataLayout::ValidityBytes;
|
20
21
|
|
21
22
|
GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
|
22
23
|
vector<LogicalType> group_types, vector<LogicalType> payload_types,
|
@@ -34,7 +35,8 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
|
|
34
35
|
AggregateHTAppendState::AggregateHTAppendState()
|
35
36
|
: ht_offsets(LogicalTypeId::BIGINT), hash_salts(LogicalTypeId::SMALLINT),
|
36
37
|
group_compare_vector(STANDARD_VECTOR_SIZE), no_match_vector(STANDARD_VECTOR_SIZE),
|
37
|
-
empty_vector(STANDARD_VECTOR_SIZE), new_groups(STANDARD_VECTOR_SIZE), addresses(LogicalType::POINTER)
|
38
|
+
empty_vector(STANDARD_VECTOR_SIZE), new_groups(STANDARD_VECTOR_SIZE), addresses(LogicalType::POINTER),
|
39
|
+
chunk_state_initialized(false) {
|
38
40
|
}
|
39
41
|
|
40
42
|
GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
|
@@ -43,19 +45,19 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
|
|
43
45
|
vector<AggregateObject> aggregate_objects_p,
|
44
46
|
HtEntryType entry_type, idx_t initial_capacity)
|
45
47
|
: BaseAggregateHashTable(context, allocator, aggregate_objects_p, std::move(payload_types_p)),
|
46
|
-
entry_type(entry_type), capacity(0),
|
47
|
-
aggregate_allocator(allocator) {
|
48
|
+
entry_type(entry_type), capacity(0), is_finalized(false),
|
49
|
+
aggregate_allocator(make_shared<ArenaAllocator>(allocator)) {
|
48
50
|
// Append hash column to the end and initialise the row layout
|
49
51
|
group_types_p.emplace_back(LogicalType::HASH);
|
50
52
|
layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
|
53
|
+
tuple_size = layout.GetRowWidth();
|
54
|
+
tuples_per_block = Storage::BLOCK_SIZE / tuple_size;
|
51
55
|
|
52
56
|
// HT layout
|
53
57
|
hash_offset = layout.GetOffsets()[layout.ColumnCount() - 1];
|
58
|
+
data_collection = make_uniq<TupleDataCollection>(buffer_manager, layout);
|
59
|
+
data_collection->InitializeAppend(td_pin_state, TupleDataPinProperties::KEEP_EVERYTHING_PINNED);
|
54
60
|
|
55
|
-
tuple_size = layout.GetRowWidth();
|
56
|
-
|
57
|
-
D_ASSERT(tuple_size <= Storage::BLOCK_SIZE);
|
58
|
-
tuples_per_block = Storage::BLOCK_SIZE / tuple_size;
|
59
61
|
hashes_hdl = buffer_manager.Allocate(Storage::BLOCK_SIZE);
|
60
62
|
hashes_hdl_ptr = hashes_hdl.Ptr();
|
61
63
|
|
@@ -75,44 +77,18 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
|
|
75
77
|
}
|
76
78
|
|
77
79
|
predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_EQUAL);
|
78
|
-
string_heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
|
79
80
|
}
|
80
81
|
|
81
82
|
GroupedAggregateHashTable::~GroupedAggregateHashTable() {
|
82
83
|
Destroy();
|
83
84
|
}
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
if (entries == 0) {
|
86
|
+
void GroupedAggregateHashTable::Destroy() {
|
87
|
+
if (data_collection->Count() == 0) {
|
88
88
|
return;
|
89
89
|
}
|
90
|
-
idx_t apply_entries = entries;
|
91
|
-
idx_t page_nr = 0;
|
92
|
-
idx_t page_offset = 0;
|
93
|
-
|
94
|
-
for (auto &payload_chunk_ptr : payload_hds_ptrs) {
|
95
|
-
auto this_entries = MinValue(tuples_per_block, apply_entries);
|
96
|
-
page_offset = 0;
|
97
|
-
for (data_ptr_t ptr = payload_chunk_ptr, end = payload_chunk_ptr + this_entries * tuple_size; ptr < end;
|
98
|
-
ptr += tuple_size) {
|
99
|
-
fun(page_nr, page_offset++, ptr);
|
100
|
-
}
|
101
|
-
apply_entries -= this_entries;
|
102
|
-
page_nr++;
|
103
|
-
}
|
104
|
-
D_ASSERT(apply_entries == 0);
|
105
|
-
}
|
106
90
|
|
107
|
-
|
108
|
-
auto pin = buffer_manager.Allocate(Storage::BLOCK_SIZE);
|
109
|
-
payload_hds.push_back(std::move(pin));
|
110
|
-
payload_hds_ptrs.push_back(payload_hds.back().Ptr());
|
111
|
-
payload_page_offset = 0;
|
112
|
-
}
|
113
|
-
|
114
|
-
void GroupedAggregateHashTable::Destroy() {
|
115
|
-
// check if there is a destructor
|
91
|
+
// Check if there is an aggregate with a destructor
|
116
92
|
bool has_destructor = false;
|
117
93
|
for (auto &aggr : layout.GetAggregates()) {
|
118
94
|
if (aggr.function.destructor) {
|
@@ -122,32 +98,25 @@ void GroupedAggregateHashTable::Destroy() {
|
|
122
98
|
if (!has_destructor) {
|
123
99
|
return;
|
124
100
|
}
|
125
|
-
// there are aggregates with destructors: loop over the hash table
|
126
|
-
// and call the destructor method for each of the aggregates
|
127
|
-
data_ptr_t data_pointers[STANDARD_VECTOR_SIZE];
|
128
|
-
Vector state_vector(LogicalType::POINTER, (data_ptr_t)data_pointers);
|
129
|
-
idx_t count = 0;
|
130
101
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
RowOperations::DestroyStates(state, layout, state_vector, count);
|
102
|
+
// There are aggregates with destructors: Call the destructor for each of the aggregates
|
103
|
+
RowOperationsState state(aggregate_allocator->GetAllocator());
|
104
|
+
TupleDataChunkIterator iterator(*data_collection, TupleDataPinProperties::DESTROY_AFTER_DONE, false);
|
105
|
+
auto &row_locations = iterator.GetChunkState().row_locations;
|
106
|
+
do {
|
107
|
+
RowOperations::DestroyStates(state, layout, row_locations, iterator.GetCurrentChunkCount());
|
108
|
+
} while (iterator.Next());
|
109
|
+
data_collection->Reset();
|
140
110
|
}
|
141
111
|
|
142
112
|
template <class ENTRY>
|
143
113
|
void GroupedAggregateHashTable::VerifyInternal() {
|
144
114
|
auto hashes_ptr = (ENTRY *)hashes_hdl_ptr;
|
145
|
-
D_ASSERT(payload_hds.size() == payload_hds_ptrs.size());
|
146
115
|
idx_t count = 0;
|
147
116
|
for (idx_t i = 0; i < capacity; i++) {
|
148
117
|
if (hashes_ptr[i].page_nr > 0) {
|
149
118
|
D_ASSERT(hashes_ptr[i].page_offset < tuples_per_block);
|
150
|
-
D_ASSERT(hashes_ptr[i].page_nr <=
|
119
|
+
D_ASSERT(hashes_ptr[i].page_nr <= payload_hds_ptrs.size());
|
151
120
|
auto ptr = payload_hds_ptrs[hashes_ptr[i].page_nr - 1] + ((hashes_ptr[i].page_offset) * tuple_size);
|
152
121
|
auto hash = Load<hash_t>(ptr + hash_offset);
|
153
122
|
D_ASSERT((hashes_ptr[i].salt) == (hash >> hash_prefix_shift));
|
@@ -156,7 +125,7 @@ void GroupedAggregateHashTable::VerifyInternal() {
|
|
156
125
|
}
|
157
126
|
}
|
158
127
|
(void)count;
|
159
|
-
D_ASSERT(count ==
|
128
|
+
D_ASSERT(count == Count());
|
160
129
|
}
|
161
130
|
|
162
131
|
idx_t GroupedAggregateHashTable::InitialCapacity() {
|
@@ -202,47 +171,65 @@ void GroupedAggregateHashTable::Verify() {
|
|
202
171
|
|
203
172
|
template <class ENTRY>
|
204
173
|
void GroupedAggregateHashTable::Resize(idx_t size) {
|
205
|
-
Verify();
|
206
|
-
|
207
174
|
D_ASSERT(!is_finalized);
|
175
|
+
D_ASSERT(size >= STANDARD_VECTOR_SIZE);
|
176
|
+
D_ASSERT(IsPowerOfTwo(size));
|
208
177
|
|
209
|
-
if (size
|
178
|
+
if (size < capacity) {
|
210
179
|
throw InternalException("Cannot downsize a hash table!");
|
211
180
|
}
|
212
|
-
|
213
|
-
|
214
|
-
// size needs to be a power of 2
|
215
|
-
D_ASSERT((size & (size - 1)) == 0);
|
216
|
-
bitmask = size - 1;
|
181
|
+
capacity = size;
|
217
182
|
|
218
|
-
|
183
|
+
bitmask = capacity - 1;
|
184
|
+
const auto byte_size = capacity * sizeof(ENTRY);
|
219
185
|
if (byte_size > (idx_t)Storage::BLOCK_SIZE) {
|
220
186
|
hashes_hdl = buffer_manager.Allocate(byte_size);
|
221
187
|
hashes_hdl_ptr = hashes_hdl.Ptr();
|
222
188
|
}
|
223
189
|
memset(hashes_hdl_ptr, 0, byte_size);
|
224
|
-
capacity = size;
|
225
190
|
|
226
|
-
|
191
|
+
if (Count() != 0) {
|
192
|
+
D_ASSERT(!payload_hds_ptrs.empty());
|
193
|
+
auto hashes_arr = (ENTRY *)hashes_hdl_ptr;
|
194
|
+
|
195
|
+
idx_t block_id = 0;
|
196
|
+
auto block_pointer = payload_hds_ptrs[block_id];
|
197
|
+
auto block_end = block_pointer + tuples_per_block * tuple_size;
|
198
|
+
|
199
|
+
TupleDataChunkIterator iterator(*data_collection, TupleDataPinProperties::ALREADY_PINNED, false);
|
200
|
+
const auto row_locations = iterator.GetRowLocations();
|
201
|
+
do {
|
202
|
+
for (idx_t i = 0; i < iterator.GetCurrentChunkCount(); i++) {
|
203
|
+
const auto &row_location = row_locations[i];
|
204
|
+
if (row_location > block_end || row_location < block_pointer) {
|
205
|
+
block_id++;
|
206
|
+
D_ASSERT(block_id < payload_hds_ptrs.size());
|
207
|
+
block_pointer = payload_hds_ptrs[block_id];
|
208
|
+
block_end = block_pointer + tuples_per_block * tuple_size;
|
209
|
+
}
|
210
|
+
D_ASSERT(row_location >= block_pointer && row_location < block_end);
|
211
|
+
D_ASSERT((row_location - block_pointer) % tuple_size == 0);
|
212
|
+
|
213
|
+
const auto hash = Load<hash_t>(row_location + hash_offset);
|
214
|
+
D_ASSERT((hash & bitmask) == (hash % capacity));
|
215
|
+
D_ASSERT(hash >> hash_prefix_shift <= NumericLimits<uint16_t>::Maximum());
|
216
|
+
|
217
|
+
auto entry_idx = (idx_t)hash & bitmask;
|
218
|
+
while (hashes_arr[entry_idx].page_nr > 0) {
|
219
|
+
entry_idx++;
|
220
|
+
if (entry_idx >= capacity) {
|
221
|
+
entry_idx = 0;
|
222
|
+
}
|
223
|
+
}
|
227
224
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
entry_idx++;
|
234
|
-
if (entry_idx >= capacity) {
|
235
|
-
entry_idx = 0;
|
225
|
+
auto &ht_entry = hashes_arr[entry_idx];
|
226
|
+
D_ASSERT(!ht_entry.page_nr);
|
227
|
+
ht_entry.salt = hash >> hash_prefix_shift;
|
228
|
+
ht_entry.page_nr = block_id + 1;
|
229
|
+
ht_entry.page_offset = (row_location - block_pointer) / tuple_size;
|
236
230
|
}
|
237
|
-
}
|
238
|
-
|
239
|
-
D_ASSERT(!hashes_arr[entry_idx].page_nr);
|
240
|
-
D_ASSERT(hash >> hash_prefix_shift <= NumericLimits<uint16_t>::Maximum());
|
241
|
-
|
242
|
-
hashes_arr[entry_idx].salt = hash >> hash_prefix_shift;
|
243
|
-
hashes_arr[entry_idx].page_nr = page_nr + 1;
|
244
|
-
hashes_arr[entry_idx].page_offset = page_offset;
|
245
|
-
});
|
231
|
+
} while (iterator.Next());
|
232
|
+
}
|
246
233
|
|
247
234
|
Verify();
|
248
235
|
}
|
@@ -272,26 +259,25 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
|
|
272
259
|
idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
|
273
260
|
DataChunk &payload, const vector<idx_t> &filter) {
|
274
261
|
D_ASSERT(!is_finalized);
|
275
|
-
|
276
262
|
if (groups.size() == 0) {
|
277
263
|
return 0;
|
278
264
|
}
|
279
265
|
|
266
|
+
#ifdef DEBUG
|
280
267
|
D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount());
|
281
268
|
for (idx_t i = 0; i < groups.ColumnCount(); i++) {
|
282
269
|
D_ASSERT(groups.GetTypes()[i] == layout.GetTypes()[i]);
|
283
270
|
}
|
271
|
+
#endif
|
284
272
|
|
285
273
|
auto new_group_count = FindOrCreateGroups(state, groups, group_hashes, state.addresses, state.new_groups);
|
286
274
|
VectorOperations::AddInPlace(state.addresses, layout.GetAggrOffset(), payload.size());
|
287
275
|
|
288
|
-
//
|
289
|
-
// update the aggregates
|
290
|
-
idx_t payload_idx = 0;
|
291
|
-
|
276
|
+
// Now every cell has an entry, update the aggregates
|
292
277
|
auto &aggregates = layout.GetAggregates();
|
293
278
|
idx_t filter_idx = 0;
|
294
|
-
|
279
|
+
idx_t payload_idx = 0;
|
280
|
+
RowOperationsState row_state(aggregate_allocator->GetAllocator());
|
295
281
|
for (idx_t i = 0; i < aggregates.size(); i++) {
|
296
282
|
auto &aggr = aggregates[i];
|
297
283
|
if (filter_idx >= filter.size() || i < filter[filter_idx]) {
|
@@ -309,7 +295,7 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
|
|
309
295
|
RowOperations::UpdateStates(row_state, aggr, state.addresses, payload, payload_idx, payload.size());
|
310
296
|
}
|
311
297
|
|
312
|
-
//
|
298
|
+
// Move to the next aggregate
|
313
299
|
payload_idx += aggr.child_count;
|
314
300
|
VectorOperations::AddInPlace(state.addresses, aggr.payload_size, payload.size());
|
315
301
|
filter_idx++;
|
@@ -336,7 +322,7 @@ void GroupedAggregateHashTable::FetchAggregates(DataChunk &groups, DataChunk &re
|
|
336
322
|
Vector addresses(LogicalType::POINTER);
|
337
323
|
FindOrCreateGroups(append_state, groups, addresses);
|
338
324
|
// now fetch the aggregates
|
339
|
-
RowOperationsState row_state(aggregate_allocator
|
325
|
+
RowOperationsState row_state(aggregate_allocator->GetAllocator());
|
340
326
|
RowOperations::FinalizeStates(row_state, layout, addresses, result, 0);
|
341
327
|
}
|
342
328
|
|
@@ -346,42 +332,39 @@ idx_t GroupedAggregateHashTable::ResizeThreshold() {
|
|
346
332
|
|
347
333
|
template <class ENTRY>
|
348
334
|
idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups,
|
349
|
-
Vector &
|
335
|
+
Vector &group_hashes_v, Vector &addresses_v,
|
350
336
|
SelectionVector &new_groups_out) {
|
351
337
|
D_ASSERT(!is_finalized);
|
338
|
+
D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount());
|
339
|
+
D_ASSERT(group_hashes_v.GetType() == LogicalType::HASH);
|
340
|
+
D_ASSERT(state.ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
|
341
|
+
D_ASSERT(state.ht_offsets.GetType() == LogicalType::BIGINT);
|
342
|
+
D_ASSERT(addresses_v.GetType() == LogicalType::POINTER);
|
343
|
+
D_ASSERT(state.hash_salts.GetType() == LogicalType::SMALLINT);
|
352
344
|
|
353
|
-
if (
|
345
|
+
if (Count() + groups.size() > MaxCapacity()) {
|
354
346
|
throw InternalException("Hash table capacity reached");
|
355
347
|
}
|
356
348
|
|
357
|
-
//
|
358
|
-
if (capacity -
|
349
|
+
// Resize at 50% capacity, also need to fit the entire vector
|
350
|
+
if (capacity - Count() <= groups.size() || Count() > ResizeThreshold()) {
|
351
|
+
Verify();
|
359
352
|
Resize<ENTRY>(capacity * 2);
|
360
353
|
}
|
354
|
+
D_ASSERT(capacity - Count() >= groups.size()); // we need to be able to fit at least one vector of data
|
361
355
|
|
362
|
-
|
363
|
-
|
364
|
-
// we need to be able to fit at least one vector of data
|
365
|
-
D_ASSERT(capacity - entries >= groups.size());
|
366
|
-
D_ASSERT(group_hashes.GetType() == LogicalType::HASH);
|
356
|
+
group_hashes_v.Flatten(groups.size());
|
357
|
+
auto group_hashes = FlatVector::GetData<hash_t>(group_hashes_v);
|
367
358
|
|
368
|
-
|
369
|
-
auto
|
359
|
+
addresses_v.Flatten(groups.size());
|
360
|
+
auto addresses = FlatVector::GetData<data_ptr_t>(addresses_v);
|
370
361
|
|
371
|
-
|
372
|
-
D_ASSERT(state.ht_offsets.GetType() == LogicalType::BIGINT);
|
373
|
-
|
374
|
-
D_ASSERT(addresses.GetType() == LogicalType::POINTER);
|
375
|
-
addresses.Flatten(groups.size());
|
376
|
-
auto addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);
|
377
|
-
|
378
|
-
// compute the entry in the table based on the hash using a modulo
|
362
|
+
// Compute the entry in the table based on the hash using a modulo,
|
379
363
|
// and precompute the hash salts for faster comparison below
|
380
|
-
D_ASSERT(state.hash_salts.GetType() == LogicalType::SMALLINT);
|
381
364
|
auto ht_offsets_ptr = FlatVector::GetData<uint64_t>(state.ht_offsets);
|
382
365
|
auto hash_salts_ptr = FlatVector::GetData<uint16_t>(state.hash_salts);
|
383
366
|
for (idx_t r = 0; r < groups.size(); r++) {
|
384
|
-
auto element =
|
367
|
+
auto element = group_hashes[r];
|
385
368
|
D_ASSERT((element & bitmask) == (element % capacity));
|
386
369
|
ht_offsets_ptr[r] = element & bitmask;
|
387
370
|
hash_salts_ptr[r] = element >> hash_prefix_shift;
|
@@ -389,9 +372,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
|
|
389
372
|
// we start out with all entries [0, 1, 2, ..., groups.size()]
|
390
373
|
const SelectionVector *sel_vector = FlatVector::IncrementalSelectionVector();
|
391
374
|
|
392
|
-
|
393
|
-
|
394
|
-
// make a chunk that references the groups and the hashes
|
375
|
+
// Make a chunk that references the groups and the hashes and convert to unified format
|
395
376
|
if (state.group_chunk.ColumnCount() == 0) {
|
396
377
|
state.group_chunk.InitializeEmpty(layout.GetTypes());
|
397
378
|
}
|
@@ -399,81 +380,101 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
|
|
399
380
|
for (idx_t grp_idx = 0; grp_idx < groups.ColumnCount(); grp_idx++) {
|
400
381
|
state.group_chunk.data[grp_idx].Reference(groups.data[grp_idx]);
|
401
382
|
}
|
402
|
-
state.group_chunk.data[groups.ColumnCount()].Reference(
|
383
|
+
state.group_chunk.data[groups.ColumnCount()].Reference(group_hashes_v);
|
403
384
|
state.group_chunk.SetCardinality(groups);
|
404
385
|
|
405
386
|
// convert all vectors to unified format
|
387
|
+
if (!state.chunk_state_initialized) {
|
388
|
+
data_collection->InitializeAppend(state.chunk_state);
|
389
|
+
state.chunk_state_initialized = true;
|
390
|
+
}
|
391
|
+
TupleDataCollection::ToUnifiedFormat(state.chunk_state, state.group_chunk);
|
406
392
|
if (!state.group_data) {
|
407
393
|
state.group_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[state.group_chunk.ColumnCount()]);
|
408
394
|
}
|
409
|
-
|
410
|
-
state.group_chunk.data[col_idx].ToUnifiedFormat(state.group_chunk.size(), state.group_data[col_idx]);
|
411
|
-
}
|
395
|
+
TupleDataCollection::GetVectorData(state.chunk_state, state.group_data.get());
|
412
396
|
|
413
397
|
idx_t new_group_count = 0;
|
398
|
+
idx_t remaining_entries = groups.size();
|
414
399
|
while (remaining_entries > 0) {
|
415
400
|
idx_t new_entry_count = 0;
|
416
401
|
idx_t need_compare_count = 0;
|
417
402
|
idx_t no_match_count = 0;
|
418
403
|
|
419
|
-
//
|
404
|
+
// For each remaining entry, figure out whether or not it belongs to a full or empty group
|
420
405
|
for (idx_t i = 0; i < remaining_entries; i++) {
|
421
406
|
const idx_t index = sel_vector->get_index(i);
|
422
|
-
|
423
|
-
if (
|
424
|
-
|
425
|
-
|
426
|
-
NewBlock();
|
427
|
-
}
|
428
|
-
|
429
|
-
auto entry_payload_ptr = payload_hds_ptrs.back() + (payload_page_offset * tuple_size);
|
407
|
+
auto &ht_entry = *(((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]);
|
408
|
+
if (ht_entry.page_nr == 0) { // Cell is unoccupied (we use page number 0 as a "unused marker")
|
409
|
+
D_ASSERT(group_hashes[index] >> hash_prefix_shift <= NumericLimits<uint16_t>::Maximum());
|
410
|
+
D_ASSERT(payload_hds_ptrs.size() < NumericLimits<uint32_t>::Maximum());
|
430
411
|
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
D_ASSERT(payload_page_offset + 1 < NumericLimits<uint16_t>::Maximum());
|
412
|
+
// Set page nr to 1 for now to mark it as occupied (will be corrected later) and set the salt
|
413
|
+
ht_entry.page_nr = 1;
|
414
|
+
ht_entry.salt = group_hashes[index] >> hash_prefix_shift;
|
435
415
|
|
436
|
-
|
437
|
-
|
438
|
-
// page numbers start at one so we can use 0 as empty flag
|
439
|
-
// GetPtr undoes this
|
440
|
-
ht_entry_ptr->page_nr = payload_hds.size();
|
441
|
-
ht_entry_ptr->page_offset = payload_page_offset++;
|
442
|
-
|
443
|
-
// update selection lists for outer loops
|
416
|
+
// Update selection lists for outer loops
|
444
417
|
state.empty_vector.set_index(new_entry_count++, index);
|
445
418
|
new_groups_out.set_index(new_group_count++, index);
|
446
|
-
|
447
|
-
|
448
|
-
addresses_ptr[index] = entry_payload_ptr;
|
449
|
-
|
450
|
-
} else {
|
451
|
-
// cell is occupied: add to check list
|
452
|
-
// only need to check if hash salt in ptr == prefix of hash in payload
|
453
|
-
if (ht_entry_ptr->salt == hash_salts_ptr[index]) {
|
419
|
+
} else { // Cell is occupied: Compare salts
|
420
|
+
if (ht_entry.salt == hash_salts_ptr[index]) {
|
454
421
|
state.group_compare_vector.set_index(need_compare_count++, index);
|
455
|
-
|
456
|
-
auto page_ptr = payload_hds_ptrs[ht_entry_ptr->page_nr - 1];
|
457
|
-
auto page_offset = ht_entry_ptr->page_offset * tuple_size;
|
458
|
-
addresses_ptr[index] = page_ptr + page_offset;
|
459
|
-
|
460
422
|
} else {
|
461
423
|
state.no_match_vector.set_index(no_match_count++, index);
|
462
424
|
}
|
463
425
|
}
|
464
426
|
}
|
465
427
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
428
|
+
if (new_entry_count != 0) {
|
429
|
+
// Append everything that belongs to an empty group
|
430
|
+
data_collection->AppendUnified(td_pin_state, state.chunk_state, state.group_chunk, state.empty_vector,
|
431
|
+
new_entry_count);
|
432
|
+
RowOperations::InitializeStates(layout, state.chunk_state.row_locations,
|
433
|
+
*FlatVector::IncrementalSelectionVector(), new_entry_count);
|
434
|
+
|
435
|
+
// Get the pointers to the (possibly) newly created blocks of the data collection
|
436
|
+
idx_t block_id = payload_hds_ptrs.empty() ? 0 : payload_hds_ptrs.size() - 1;
|
437
|
+
UpdateBlockPointers();
|
438
|
+
auto block_pointer = payload_hds_ptrs[block_id];
|
439
|
+
auto block_end = block_pointer + tuples_per_block * tuple_size;
|
440
|
+
|
441
|
+
// Set the page nrs/offsets in the 1st part of the HT now that the data has been appended
|
442
|
+
const auto row_locations = FlatVector::GetData<data_ptr_t>(state.chunk_state.row_locations);
|
443
|
+
for (idx_t new_entry_idx = 0; new_entry_idx < new_entry_count; new_entry_idx++) {
|
444
|
+
const auto &row_location = row_locations[new_entry_idx];
|
445
|
+
if (row_location > block_end || row_location < block_pointer) {
|
446
|
+
block_id++;
|
447
|
+
D_ASSERT(block_id < payload_hds_ptrs.size());
|
448
|
+
block_pointer = payload_hds_ptrs[block_id];
|
449
|
+
block_end = block_pointer + tuples_per_block * tuple_size;
|
450
|
+
}
|
451
|
+
D_ASSERT(row_location >= block_pointer && row_location < block_end);
|
452
|
+
D_ASSERT((row_location - block_pointer) % tuple_size == 0);
|
453
|
+
const auto index = state.empty_vector.get_index(new_entry_idx);
|
454
|
+
auto &ht_entry = *(((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]);
|
455
|
+
ht_entry.page_nr = block_id + 1;
|
456
|
+
ht_entry.page_offset = (row_location - block_pointer) / tuple_size;
|
457
|
+
addresses[index] = row_location;
|
458
|
+
}
|
459
|
+
}
|
460
|
+
|
461
|
+
if (need_compare_count != 0) {
|
462
|
+
// Get the pointers to the rows that need to be compared
|
463
|
+
for (idx_t need_compare_idx = 0; need_compare_idx < need_compare_count; need_compare_idx++) {
|
464
|
+
const auto index = state.group_compare_vector.get_index(need_compare_idx);
|
465
|
+
const auto &ht_entry = *(((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]);
|
466
|
+
auto page_ptr = payload_hds_ptrs[ht_entry.page_nr - 1];
|
467
|
+
auto page_offset = ht_entry.page_offset * tuple_size;
|
468
|
+
addresses[index] = page_ptr + page_offset;
|
469
|
+
}
|
470
470
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
471
|
+
// Perform group comparisons
|
472
|
+
RowOperations::Match(state.group_chunk, state.group_data.get(), layout, addresses_v, predicates,
|
473
|
+
state.group_compare_vector, need_compare_count, &state.no_match_vector,
|
474
|
+
no_match_count);
|
475
|
+
}
|
475
476
|
|
476
|
-
// each of the entries that do not match
|
477
|
+
// Linear probing: each of the entries that do not match move to the next entry in the HT
|
477
478
|
for (idx_t i = 0; i < no_match_count; i++) {
|
478
479
|
idx_t index = state.no_match_vector.get_index(i);
|
479
480
|
ht_offsets_ptr[index]++;
|
@@ -488,6 +489,17 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
|
|
488
489
|
return new_group_count;
|
489
490
|
}
|
490
491
|
|
492
|
+
void GroupedAggregateHashTable::UpdateBlockPointers() {
|
493
|
+
for (const auto &id_and_handle : td_pin_state.row_handles) {
|
494
|
+
const auto &id = id_and_handle.first;
|
495
|
+
const auto &handle = id_and_handle.second;
|
496
|
+
if (payload_hds_ptrs.empty() || id > payload_hds_ptrs.size() - 1) {
|
497
|
+
payload_hds_ptrs.resize(id + 1);
|
498
|
+
}
|
499
|
+
payload_hds_ptrs[id] = handle.Ptr();
|
500
|
+
}
|
501
|
+
}
|
502
|
+
|
491
503
|
// this is to support distinct aggregations where we need to record whether we
|
492
504
|
// have already seen a value for a group
|
493
505
|
idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
|
@@ -517,37 +529,44 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &stat
|
|
517
529
|
}
|
518
530
|
|
519
531
|
struct FlushMoveState {
|
520
|
-
FlushMoveState(
|
521
|
-
:
|
532
|
+
explicit FlushMoveState(TupleDataCollection &collection_p)
|
533
|
+
: collection(collection_p), hashes(LogicalType::HASH), group_addresses(LogicalType::POINTER),
|
522
534
|
new_groups_sel(STANDARD_VECTOR_SIZE) {
|
523
|
-
|
524
|
-
|
535
|
+
const auto &layout = collection.GetLayout();
|
536
|
+
vector<column_t> column_ids;
|
537
|
+
column_ids.reserve(layout.ColumnCount() - 1);
|
538
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount() - 1; col_idx++) {
|
539
|
+
column_ids.emplace_back(col_idx);
|
540
|
+
}
|
541
|
+
// FIXME DESTROY_AFTER_DONE if we make it possible to pass a selection vector to RowOperations::DestroyStates?
|
542
|
+
collection.InitializeScan(scan_state, column_ids, TupleDataPinProperties::UNPIN_AFTER_DONE);
|
543
|
+
collection.InitializeScanChunk(scan_state, groups);
|
544
|
+
hash_col_idx = layout.ColumnCount() - 1;
|
525
545
|
}
|
526
546
|
|
547
|
+
bool Scan();
|
548
|
+
|
549
|
+
TupleDataCollection &collection;
|
550
|
+
TupleDataScanState scan_state;
|
527
551
|
DataChunk groups;
|
528
|
-
|
552
|
+
|
553
|
+
idx_t hash_col_idx;
|
554
|
+
Vector hashes;
|
555
|
+
|
556
|
+
AggregateHTAppendState append_state;
|
529
557
|
Vector group_addresses;
|
530
558
|
SelectionVector new_groups_sel;
|
531
559
|
};
|
532
560
|
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
state.groups.Reset();
|
539
|
-
state.groups.SetCardinality(count);
|
540
|
-
for (idx_t col_no = 0; col_no < state.groups.ColumnCount(); col_no++) {
|
541
|
-
auto &column = state.groups.data[col_no];
|
542
|
-
RowOperations::Gather(source_addresses, *FlatVector::IncrementalSelectionVector(), column,
|
543
|
-
*FlatVector::IncrementalSelectionVector(), count, layout, col_no);
|
561
|
+
bool FlushMoveState::Scan() {
|
562
|
+
if (collection.Scan(scan_state, groups)) {
|
563
|
+
collection.Gather(scan_state.chunk_state.row_locations, *FlatVector::IncrementalSelectionVector(),
|
564
|
+
groups.size(), hash_col_idx, hashes, *FlatVector::IncrementalSelectionVector());
|
565
|
+
return true;
|
544
566
|
}
|
545
567
|
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
RowOperationsState row_state(aggregate_allocator.GetAllocator());
|
550
|
-
RowOperations::CombineStates(row_state, layout, source_addresses, state.group_addresses, count);
|
568
|
+
collection.FinalizePinState(scan_state.pin_state);
|
569
|
+
return false;
|
551
570
|
}
|
552
571
|
|
553
572
|
void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) {
|
@@ -556,127 +575,67 @@ void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) {
|
|
556
575
|
D_ASSERT(other.layout.GetAggrWidth() == layout.GetAggrWidth());
|
557
576
|
D_ASSERT(other.layout.GetDataWidth() == layout.GetDataWidth());
|
558
577
|
D_ASSERT(other.layout.GetRowWidth() == layout.GetRowWidth());
|
559
|
-
D_ASSERT(other.tuples_per_block == tuples_per_block);
|
560
578
|
|
561
|
-
if (other.
|
579
|
+
if (other.Count() == 0) {
|
562
580
|
return;
|
563
581
|
}
|
564
582
|
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
FlushMoveState state(allocator, layout);
|
574
|
-
other.PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) {
|
575
|
-
auto hash = Load<hash_t>(ptr + hash_offset);
|
583
|
+
FlushMoveState state(*other.data_collection);
|
584
|
+
RowOperationsState row_state(aggregate_allocator->GetAllocator());
|
585
|
+
while (state.Scan()) {
|
586
|
+
FindOrCreateGroups(state.append_state, state.groups, state.hashes, state.group_addresses, state.new_groups_sel);
|
587
|
+
RowOperations::CombineStates(row_state, layout, state.scan_state.chunk_state.row_locations,
|
588
|
+
state.group_addresses, state.groups.size());
|
589
|
+
}
|
576
590
|
|
577
|
-
hashes_ptr[group_idx] = hash;
|
578
|
-
addresses_ptr[group_idx] = ptr;
|
579
|
-
group_idx++;
|
580
|
-
if (group_idx == STANDARD_VECTOR_SIZE) {
|
581
|
-
FlushMove(state, addresses, hashes, group_idx);
|
582
|
-
group_idx = 0;
|
583
|
-
}
|
584
|
-
});
|
585
|
-
FlushMove(state, addresses, hashes, group_idx);
|
586
|
-
string_heap->Merge(*other.string_heap);
|
587
591
|
Verify();
|
588
592
|
}
|
589
593
|
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) {
|
609
|
-
auto hash = Load<hash_t>(ptr + hash_offset);
|
610
|
-
|
611
|
-
idx_t partition = (hash & mask) >> shift;
|
612
|
-
D_ASSERT(partition < partition_hts.size());
|
613
|
-
|
614
|
-
auto &info = partition_info[partition];
|
615
|
-
|
616
|
-
info.hashes_ptr[info.group_count] = hash;
|
617
|
-
info.addresses_ptr[info.group_count] = ptr;
|
618
|
-
info.group_count++;
|
619
|
-
if (info.group_count == STANDARD_VECTOR_SIZE) {
|
620
|
-
D_ASSERT(partition_hts[partition]);
|
621
|
-
partition_hts[partition]->FlushMove(state, info.addresses, info.hashes, info.group_count);
|
622
|
-
info.group_count = 0;
|
623
|
-
}
|
624
|
-
});
|
625
|
-
|
626
|
-
idx_t info_idx = 0;
|
627
|
-
idx_t total_count = 0;
|
628
|
-
for (auto &partition_entry : partition_hts) {
|
629
|
-
auto &info = partition_info[info_idx++];
|
630
|
-
partition_entry->FlushMove(state, info.addresses, info.hashes, info.group_count);
|
631
|
-
|
632
|
-
partition_entry->string_heap->Merge(*string_heap);
|
633
|
-
partition_entry->Verify();
|
634
|
-
total_count += partition_entry->Size();
|
594
|
+
void GroupedAggregateHashTable::Partition(vector<GroupedAggregateHashTable *> &partition_hts, idx_t radix_bits) {
|
595
|
+
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
596
|
+
D_ASSERT(partition_hts.size() == num_partitions);
|
597
|
+
|
598
|
+
// Partition the data
|
599
|
+
auto partitioned_data =
|
600
|
+
make_uniq<RadixPartitionedTupleData>(buffer_manager, layout, radix_bits, layout.ColumnCount() - 1);
|
601
|
+
partitioned_data->Partition(*data_collection, TupleDataPinProperties::KEEP_EVERYTHING_PINNED);
|
602
|
+
D_ASSERT(partitioned_data->GetPartitions().size() == num_partitions);
|
603
|
+
|
604
|
+
// Move the partitioned data collections to the partitioned hash tables and initialize the 1st part of the HT
|
605
|
+
auto &partitions = partitioned_data->GetPartitions();
|
606
|
+
for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
|
607
|
+
auto &partition_ht = *partition_hts[partition_idx];
|
608
|
+
partition_ht.data_collection = std::move(partitions[partition_idx]);
|
609
|
+
partition_ht.aggregate_allocator = aggregate_allocator;
|
610
|
+
partition_ht.InitializeFirstPart();
|
611
|
+
partition_ht.Verify();
|
635
612
|
}
|
636
|
-
(void)total_count;
|
637
|
-
D_ASSERT(total_count == entries);
|
638
613
|
}
|
639
614
|
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
auto chunk_idx = scan_state.scan_position / tuples_per_block;
|
653
|
-
auto chunk_offset = (scan_state.scan_position % tuples_per_block) * tuple_size;
|
654
|
-
D_ASSERT(chunk_offset + tuple_size <= Storage::BLOCK_SIZE);
|
655
|
-
|
656
|
-
auto read_ptr = payload_hds_ptrs[chunk_idx++];
|
657
|
-
for (idx_t i = 0; i < this_n; i++) {
|
658
|
-
data_pointers[i] = read_ptr + chunk_offset;
|
659
|
-
chunk_offset += tuple_size;
|
660
|
-
if (chunk_offset >= tuples_per_block * tuple_size) {
|
661
|
-
read_ptr = payload_hds_ptrs[chunk_idx++];
|
662
|
-
chunk_offset = 0;
|
663
|
-
}
|
664
|
-
}
|
665
|
-
scan_state.scan_position += this_n;
|
615
|
+
void GroupedAggregateHashTable::InitializeFirstPart() {
|
616
|
+
data_collection->GetBlockPointers(payload_hds_ptrs);
|
617
|
+
auto size = MaxValue<idx_t>(NextPowerOfTwo(Count() * 2L), capacity);
|
618
|
+
switch (entry_type) {
|
619
|
+
case HtEntryType::HT_WIDTH_64:
|
620
|
+
Resize<aggr_ht_entry_64>(size);
|
621
|
+
break;
|
622
|
+
case HtEntryType::HT_WIDTH_32:
|
623
|
+
Resize<aggr_ht_entry_32>(size);
|
624
|
+
break;
|
625
|
+
default:
|
626
|
+
throw InternalException("Unknown HT entry width");
|
666
627
|
}
|
628
|
+
}
|
667
629
|
|
668
|
-
|
669
|
-
|
630
|
+
idx_t GroupedAggregateHashTable::Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate,
|
631
|
+
DataChunk &result) {
|
632
|
+
data_collection->Scan(gstate, lstate, result);
|
633
|
+
|
634
|
+
RowOperationsState row_state(aggregate_allocator->GetAllocator());
|
670
635
|
const auto group_cols = layout.ColumnCount() - 1;
|
671
|
-
|
672
|
-
auto &column = result.data[col_no];
|
673
|
-
RowOperations::Gather(addresses, *FlatVector::IncrementalSelectionVector(), column,
|
674
|
-
*FlatVector::IncrementalSelectionVector(), result.size(), layout, col_no);
|
675
|
-
}
|
636
|
+
RowOperations::FinalizeStates(row_state, layout, lstate.chunk_state.row_locations, result, group_cols);
|
676
637
|
|
677
|
-
|
678
|
-
RowOperations::FinalizeStates(row_state, layout, addresses, result, group_cols);
|
679
|
-
return this_n;
|
638
|
+
return result.size();
|
680
639
|
}
|
681
640
|
|
682
641
|
void GroupedAggregateHashTable::Finalize() {
|
@@ -684,8 +643,11 @@ void GroupedAggregateHashTable::Finalize() {
|
|
684
643
|
return;
|
685
644
|
}
|
686
645
|
|
687
|
-
//
|
646
|
+
// Early release hashes (not needed for partition/scan) and data collection (will be pinned again when scanning)
|
688
647
|
hashes_hdl.Destroy();
|
648
|
+
data_collection->FinalizePinState(td_pin_state);
|
649
|
+
data_collection->Unpin();
|
650
|
+
|
689
651
|
is_finalized = true;
|
690
652
|
}
|
691
653
|
|