duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -1,49 +1,28 @@
|
|
1
1
|
#include "duckdb/execution/radix_partitioned_hashtable.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/common/radix_partitioning.hpp"
|
4
|
+
#include "duckdb/common/row_operations/row_operations.hpp"
|
4
5
|
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
6
|
+
#include "duckdb/common/types/row/tuple_data_iterator.hpp"
|
7
|
+
#include "duckdb/execution/aggregate_hashtable.hpp"
|
5
8
|
#include "duckdb/execution/executor.hpp"
|
6
9
|
#include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
|
10
|
+
#include "duckdb/main/config.hpp"
|
7
11
|
#include "duckdb/parallel/event.hpp"
|
8
|
-
#include "duckdb/parallel/task_scheduler.hpp"
|
9
12
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
10
13
|
|
11
14
|
namespace duckdb {
|
12
15
|
|
13
|
-
// compute the GROUPING values
|
14
|
-
// for each parameter to the GROUPING clause, we check if the hash table groups on this particular group
|
15
|
-
// if it does, we return 0, otherwise we return 1
|
16
|
-
// we then use bitshifts to combine these values
|
17
|
-
void RadixPartitionedHashTable::SetGroupingValues() {
|
18
|
-
auto &grouping_functions = op.GetGroupingFunctions();
|
19
|
-
for (auto &grouping : grouping_functions) {
|
20
|
-
int64_t grouping_value = 0;
|
21
|
-
D_ASSERT(grouping.size() < sizeof(int64_t) * 8);
|
22
|
-
for (idx_t i = 0; i < grouping.size(); i++) {
|
23
|
-
if (grouping_set.find(grouping[i]) == grouping_set.end()) {
|
24
|
-
// we don't group on this value!
|
25
|
-
grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
|
26
|
-
}
|
27
|
-
}
|
28
|
-
grouping_values.push_back(Value::BIGINT(grouping_value));
|
29
|
-
}
|
30
|
-
}
|
31
|
-
|
32
16
|
RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p, const GroupedAggregateData &op_p)
|
33
17
|
: grouping_set(grouping_set_p), op(op_p) {
|
34
|
-
|
35
18
|
auto groups_count = op.GroupCount();
|
36
19
|
for (idx_t i = 0; i < groups_count; i++) {
|
37
20
|
if (grouping_set.find(i) == grouping_set.end()) {
|
38
21
|
null_groups.push_back(i);
|
39
22
|
}
|
40
23
|
}
|
41
|
-
|
42
|
-
// 10000 seems like a good compromise here
|
43
|
-
radix_limit = 10000;
|
44
|
-
|
45
24
|
if (grouping_set.empty()) {
|
46
|
-
//
|
25
|
+
// Fake a single group with a constant value for aggregation without groups
|
47
26
|
group_types.emplace_back(LogicalType::TINYINT);
|
48
27
|
}
|
49
28
|
for (auto &entry : grouping_set) {
|
@@ -51,79 +30,279 @@ RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p
|
|
51
30
|
group_types.push_back(op.group_types[entry]);
|
52
31
|
}
|
53
32
|
SetGroupingValues();
|
33
|
+
|
34
|
+
auto group_types_copy = group_types;
|
35
|
+
group_types_copy.emplace_back(LogicalType::HASH);
|
36
|
+
layout.Initialize(std::move(group_types_copy), AggregateObject::CreateAggregateObjects(op.bindings));
|
37
|
+
}
|
38
|
+
|
39
|
+
void RadixPartitionedHashTable::SetGroupingValues() {
|
40
|
+
// Compute the GROUPING values:
|
41
|
+
// For each parameter to the GROUPING clause, we check if the hash table groups on this particular group
|
42
|
+
// If it does, we return 0, otherwise we return 1
|
43
|
+
// We then use bitshifts to combine these values
|
44
|
+
auto &grouping_functions = op.GetGroupingFunctions();
|
45
|
+
for (auto &grouping : grouping_functions) {
|
46
|
+
int64_t grouping_value = 0;
|
47
|
+
D_ASSERT(grouping.size() < sizeof(int64_t) * 8);
|
48
|
+
for (idx_t i = 0; i < grouping.size(); i++) {
|
49
|
+
if (grouping_set.find(grouping[i]) == grouping_set.end()) {
|
50
|
+
// We don't group on this value!
|
51
|
+
grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
|
52
|
+
}
|
53
|
+
}
|
54
|
+
grouping_values.push_back(Value::BIGINT(grouping_value));
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
const TupleDataLayout &RadixPartitionedHashTable::GetLayout() const {
|
59
|
+
return layout;
|
60
|
+
}
|
61
|
+
|
62
|
+
unique_ptr<GroupedAggregateHashTable> RadixPartitionedHashTable::CreateHT(ClientContext &context, const idx_t capacity,
|
63
|
+
const idx_t radix_bits) const {
|
64
|
+
return make_uniq<GroupedAggregateHashTable>(context, BufferAllocator::Get(context), group_types, op.payload_types,
|
65
|
+
op.bindings, capacity, radix_bits);
|
54
66
|
}
|
55
67
|
|
56
68
|
//===--------------------------------------------------------------------===//
|
57
69
|
// Sink
|
58
70
|
//===--------------------------------------------------------------------===//
|
59
|
-
|
60
|
-
|
71
|
+
struct AggregatePartition {
|
72
|
+
explicit AggregatePartition(unique_ptr<TupleDataCollection> data_p) : data(std::move(data_p)), finalized(false) {
|
73
|
+
}
|
74
|
+
unique_ptr<TupleDataCollection> data;
|
75
|
+
atomic<bool> finalized;
|
76
|
+
};
|
61
77
|
|
78
|
+
class RadixHTGlobalSinkState;
|
79
|
+
|
80
|
+
struct RadixHTConfig {
|
62
81
|
public:
|
63
|
-
explicit
|
64
|
-
: is_empty(true), multi_scan(true), partitioned(false),
|
65
|
-
partition_info(make_uniq<RadixPartitionInfo>(
|
66
|
-
MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads()))) {
|
67
|
-
}
|
82
|
+
explicit RadixHTConfig(ClientContext &context, RadixHTGlobalSinkState &sink);
|
68
83
|
|
69
|
-
|
70
|
-
|
84
|
+
void SetRadixBits(idx_t radix_bits_p);
|
85
|
+
bool SetRadixBitsToExternal();
|
86
|
+
idx_t GetRadixBits() const;
|
71
87
|
|
72
|
-
|
73
|
-
bool
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
//!
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
88
|
+
private:
|
89
|
+
void SetRadixBitsInternal(const idx_t radix_bits_p, bool external);
|
90
|
+
static idx_t InitialSinkRadixBits(ClientContext &context);
|
91
|
+
static idx_t MaximumSinkRadixBits(ClientContext &context);
|
92
|
+
static idx_t ExternalRadixBits(const idx_t &maximum_sink_radix_bits_p);
|
93
|
+
static idx_t SinkCapacity(ClientContext &context);
|
94
|
+
|
95
|
+
private:
|
96
|
+
//! Assume (1 << 15) = 32KB L1 cache per core, divided by two because hyperthreading
|
97
|
+
static constexpr const idx_t L1_CACHE_SIZE = 32768 / 2;
|
98
|
+
//! Assume (1 << 20) = 1MB L2 cache per core, divided by two because hyperthreading
|
99
|
+
static constexpr const idx_t L2_CACHE_SIZE = 1048576 / 2;
|
100
|
+
//! Assume (1 << 20) + (1 << 19) = 1.5MB L3 cache per core (shared), divided by two because hyperthreading
|
101
|
+
static constexpr const idx_t L3_CACHE_SIZE = 1572864 / 2;
|
102
|
+
|
103
|
+
//! Sink radix bits to initialize with
|
104
|
+
static constexpr const idx_t MAXIMUM_INITIAL_SINK_RADIX_BITS = 3;
|
105
|
+
//! Maximum Sink radix bits (independent of threads)
|
106
|
+
static constexpr const idx_t MAXIMUM_FINAL_SINK_RADIX_BITS = 7;
|
107
|
+
//! By how many radix bits to increment if we go external
|
108
|
+
static constexpr const idx_t EXTERNAL_RADIX_BITS_INCREMENT = 3;
|
109
|
+
|
110
|
+
//! The global sink state
|
111
|
+
RadixHTGlobalSinkState &sink;
|
112
|
+
//! Current thread-global sink radix bits
|
113
|
+
atomic<idx_t> sink_radix_bits;
|
114
|
+
//! Maximum Sink radix bits (set based on number of threads)
|
115
|
+
const idx_t maximum_sink_radix_bits;
|
116
|
+
//! Radix bits if we go external
|
117
|
+
const idx_t external_radix_bits;
|
118
|
+
|
119
|
+
public:
|
120
|
+
//! Capacity of HTs during the Sink
|
121
|
+
const idx_t sink_capacity;
|
122
|
+
|
123
|
+
//! If we fill this many blocks per partition, we trigger a repartition
|
124
|
+
static constexpr const double BLOCK_FILL_FACTOR = 1.8;
|
125
|
+
//! By how many bits to repartition if a repartition is triggered
|
126
|
+
static constexpr const idx_t REPARTITION_RADIX_BITS = 2;
|
94
127
|
};
|
95
128
|
|
96
|
-
class
|
129
|
+
class RadixHTGlobalSinkState : public GlobalSinkState {
|
130
|
+
public:
|
131
|
+
RadixHTGlobalSinkState(ClientContext &context, const RadixPartitionedHashTable &radix_ht);
|
132
|
+
|
133
|
+
//! Destroys aggregate states (if multi-scan)
|
134
|
+
~RadixHTGlobalSinkState() override;
|
135
|
+
void Destroy();
|
136
|
+
|
97
137
|
public:
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
138
|
+
//! The radix HT
|
139
|
+
const RadixPartitionedHashTable &radix_ht;
|
140
|
+
//! Config for partitioning
|
141
|
+
RadixHTConfig config;
|
142
|
+
|
143
|
+
//! Whether we've called Finalize
|
144
|
+
bool finalized;
|
145
|
+
//! Whether we are doing an external aggregation
|
146
|
+
atomic<bool> external;
|
147
|
+
//! Threads that have called Sink
|
148
|
+
atomic<idx_t> active_threads;
|
149
|
+
//! If any thread has called combine
|
150
|
+
atomic<bool> any_combined;
|
151
|
+
|
152
|
+
//! Lock for uncombined_data/stored_allocators
|
153
|
+
mutex lock;
|
154
|
+
//! Uncombined partitioned data that will be put into the AggregatePartitions
|
155
|
+
unique_ptr<PartitionedTupleData> uncombined_data;
|
156
|
+
//! Allocators used during the Sink/Finalize
|
157
|
+
vector<shared_ptr<ArenaAllocator>> stored_allocators;
|
158
|
+
|
159
|
+
//! Partitions that are finalized during GetData
|
160
|
+
vector<unique_ptr<AggregatePartition>> partitions;
|
161
|
+
|
162
|
+
//! For synchronizing finalize tasks
|
163
|
+
atomic<idx_t> finalize_idx;
|
164
|
+
|
165
|
+
//! Pin properties when scanning
|
166
|
+
TupleDataPinProperties scan_pin_properties;
|
167
|
+
//! Total count before combining
|
168
|
+
idx_t count_before_combining;
|
169
|
+
};
|
170
|
+
|
171
|
+
RadixHTGlobalSinkState::RadixHTGlobalSinkState(ClientContext &context, const RadixPartitionedHashTable &radix_ht_p)
|
172
|
+
: radix_ht(radix_ht_p), config(context, *this), finalized(false), external(false), active_threads(0),
|
173
|
+
any_combined(false), finalize_idx(0), scan_pin_properties(TupleDataPinProperties::DESTROY_AFTER_DONE),
|
174
|
+
count_before_combining(0) {
|
175
|
+
}
|
176
|
+
|
177
|
+
RadixHTGlobalSinkState::~RadixHTGlobalSinkState() {
|
178
|
+
Destroy();
|
179
|
+
}
|
180
|
+
|
181
|
+
// LCOV_EXCL_START
|
182
|
+
void RadixHTGlobalSinkState::Destroy() {
|
183
|
+
if (scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE || count_before_combining == 0 ||
|
184
|
+
partitions.empty()) {
|
185
|
+
// Already destroyed / empty
|
186
|
+
return;
|
187
|
+
}
|
188
|
+
|
189
|
+
TupleDataLayout layout = partitions[0]->data->GetLayout().Copy();
|
190
|
+
if (!layout.HasDestructor()) {
|
191
|
+
return; // No destructors, exit
|
192
|
+
}
|
193
|
+
|
194
|
+
// There are aggregates with destructors: Call the destructor for each of the aggregates
|
195
|
+
RowOperationsState row_state(*stored_allocators.back());
|
196
|
+
for (auto &partition : partitions) {
|
197
|
+
auto &data_collection = *partition->data;
|
198
|
+
if (data_collection.Count() == 0) {
|
199
|
+
continue;
|
103
200
|
}
|
201
|
+
TupleDataChunkIterator iterator(data_collection, TupleDataPinProperties::DESTROY_AFTER_DONE, false);
|
202
|
+
auto &row_locations = iterator.GetChunkState().row_locations;
|
203
|
+
do {
|
204
|
+
RowOperations::DestroyStates(row_state, layout, row_locations, iterator.GetCurrentChunkCount());
|
205
|
+
} while (iterator.Next());
|
206
|
+
data_collection.Reset();
|
104
207
|
}
|
208
|
+
}
|
209
|
+
// LCOV_EXCL_STOP
|
105
210
|
|
211
|
+
RadixHTConfig::RadixHTConfig(ClientContext &context, RadixHTGlobalSinkState &sink_p)
|
212
|
+
: sink(sink_p), sink_radix_bits(InitialSinkRadixBits(context)),
|
213
|
+
maximum_sink_radix_bits(MaximumSinkRadixBits(context)),
|
214
|
+
external_radix_bits(ExternalRadixBits(maximum_sink_radix_bits)), sink_capacity(SinkCapacity(context)) {
|
215
|
+
}
|
216
|
+
|
217
|
+
void RadixHTConfig::SetRadixBits(idx_t radix_bits_p) {
|
218
|
+
SetRadixBitsInternal(MinValue(radix_bits_p, maximum_sink_radix_bits), false);
|
219
|
+
}
|
220
|
+
|
221
|
+
bool RadixHTConfig::SetRadixBitsToExternal() {
|
222
|
+
SetRadixBitsInternal(external_radix_bits, true);
|
223
|
+
return sink.external;
|
224
|
+
}
|
225
|
+
|
226
|
+
idx_t RadixHTConfig::GetRadixBits() const {
|
227
|
+
return sink_radix_bits;
|
228
|
+
}
|
229
|
+
|
230
|
+
void RadixHTConfig::SetRadixBitsInternal(const idx_t radix_bits_p, bool external) {
|
231
|
+
if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
|
232
|
+
return;
|
233
|
+
}
|
234
|
+
|
235
|
+
lock_guard<mutex> guard(sink.lock);
|
236
|
+
if (sink_radix_bits >= radix_bits_p || sink.any_combined) {
|
237
|
+
return;
|
238
|
+
}
|
239
|
+
|
240
|
+
if (external) {
|
241
|
+
sink.external = true;
|
242
|
+
}
|
243
|
+
sink_radix_bits = radix_bits_p;
|
244
|
+
return;
|
245
|
+
}
|
246
|
+
|
247
|
+
idx_t RadixHTConfig::InitialSinkRadixBits(ClientContext &context) {
|
248
|
+
const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
249
|
+
return MinValue(RadixPartitioning::RadixBits(NextPowerOfTwo(active_threads)), MAXIMUM_INITIAL_SINK_RADIX_BITS);
|
250
|
+
}
|
251
|
+
|
252
|
+
idx_t RadixHTConfig::MaximumSinkRadixBits(ClientContext &context) {
|
253
|
+
const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
254
|
+
return MinValue(RadixPartitioning::RadixBits(NextPowerOfTwo(active_threads)), MAXIMUM_FINAL_SINK_RADIX_BITS);
|
255
|
+
}
|
256
|
+
|
257
|
+
idx_t RadixHTConfig::ExternalRadixBits(const idx_t &maximum_sink_radix_bits_p) {
|
258
|
+
return MinValue(maximum_sink_radix_bits_p + EXTERNAL_RADIX_BITS_INCREMENT, MAXIMUM_FINAL_SINK_RADIX_BITS);
|
259
|
+
}
|
260
|
+
|
261
|
+
idx_t RadixHTConfig::SinkCapacity(ClientContext &context) {
|
262
|
+
// Get active and maximum number of threads
|
263
|
+
const idx_t active_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
264
|
+
const auto max_threads = DBConfig::GetSystemMaxThreads(FileSystem::GetFileSystem(context));
|
265
|
+
|
266
|
+
// Compute cache size per active thread (assuming cache is shared)
|
267
|
+
const auto total_shared_cache_size = max_threads * L3_CACHE_SIZE;
|
268
|
+
const auto cache_per_active_thread = L1_CACHE_SIZE + L2_CACHE_SIZE + total_shared_cache_size / active_threads;
|
269
|
+
|
270
|
+
// Divide cache per active thread by entry size, round up to next power of two, to get capacity
|
271
|
+
const auto size_per_entry = sizeof(aggr_ht_entry_t) * GroupedAggregateHashTable::LOAD_FACTOR;
|
272
|
+
const auto capacity = NextPowerOfTwo(cache_per_active_thread / size_per_entry);
|
273
|
+
|
274
|
+
// Capacity must be at least the minimum capacity
|
275
|
+
return MaxValue<idx_t>(capacity, GroupedAggregateHashTable::InitialCapacity());
|
276
|
+
}
|
277
|
+
|
278
|
+
class RadixHTLocalSinkState : public LocalSinkState {
|
279
|
+
public:
|
280
|
+
RadixHTLocalSinkState(ClientContext &context, const RadixPartitionedHashTable &radix_ht);
|
281
|
+
|
282
|
+
public:
|
283
|
+
//! Thread-local HT that is re-used after abandoning
|
284
|
+
unique_ptr<GroupedAggregateHashTable> ht;
|
285
|
+
//! Chunk with group columns
|
106
286
|
DataChunk group_chunk;
|
107
|
-
//! The aggregate HT
|
108
|
-
unique_ptr<PartitionableHashTable> ht;
|
109
|
-
//! The total number of groups found by this thread
|
110
|
-
idx_t total_groups;
|
111
287
|
|
112
|
-
//!
|
113
|
-
|
288
|
+
//! Data that is abandoned ends up here (only if we're doing external aggregation)
|
289
|
+
unique_ptr<PartitionedTupleData> abandoned_data;
|
114
290
|
};
|
115
291
|
|
116
|
-
|
117
|
-
|
118
|
-
|
292
|
+
RadixHTLocalSinkState::RadixHTLocalSinkState(ClientContext &, const RadixPartitionedHashTable &radix_ht) {
|
293
|
+
// If there are no groups we create a fake group so everything has the same group
|
294
|
+
group_chunk.InitializeEmpty(radix_ht.group_types);
|
295
|
+
if (radix_ht.grouping_set.empty()) {
|
296
|
+
group_chunk.data[0].Reference(Value::TINYINT(42));
|
297
|
+
}
|
119
298
|
}
|
120
299
|
|
121
300
|
unique_ptr<GlobalSinkState> RadixPartitionedHashTable::GetGlobalSinkState(ClientContext &context) const {
|
122
|
-
return make_uniq<
|
301
|
+
return make_uniq<RadixHTGlobalSinkState>(context, *this);
|
123
302
|
}
|
124
303
|
|
125
304
|
unique_ptr<LocalSinkState> RadixPartitionedHashTable::GetLocalSinkState(ExecutionContext &context) const {
|
126
|
-
return make_uniq<
|
305
|
+
return make_uniq<RadixHTLocalSinkState>(context.client, *this);
|
127
306
|
}
|
128
307
|
|
129
308
|
void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataChunk &input_chunk) const {
|
@@ -141,507 +320,448 @@ void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataC
|
|
141
320
|
group_chunk.Verify();
|
142
321
|
}
|
143
322
|
|
144
|
-
|
145
|
-
|
146
|
-
auto &
|
147
|
-
auto &
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
323
|
+
bool MaybeRepartition(ClientContext &context, RadixHTGlobalSinkState &gstate, RadixHTLocalSinkState &lstate) {
|
324
|
+
auto &config = gstate.config;
|
325
|
+
auto &ht = *lstate.ht;
|
326
|
+
auto &partitioned_data = ht.GetPartitionedData();
|
327
|
+
|
328
|
+
// Check if we're approaching the memory limit
|
329
|
+
const idx_t n_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
330
|
+
const idx_t limit = BufferManager::GetBufferManager(context).GetMaxMemory();
|
331
|
+
const idx_t thread_limit = 0.6 * limit / n_threads;
|
332
|
+
if (ht.GetPartitionedData()->SizeInBytes() > thread_limit || context.config.force_external) {
|
333
|
+
if (gstate.config.SetRadixBitsToExternal()) {
|
334
|
+
// We're approaching the memory limit, unpin the data
|
335
|
+
if (!lstate.abandoned_data) {
|
336
|
+
lstate.abandoned_data = make_uniq<RadixPartitionedTupleData>(
|
337
|
+
BufferManager::GetBufferManager(context), gstate.radix_ht.GetLayout(), config.GetRadixBits(),
|
338
|
+
gstate.radix_ht.GetLayout().ColumnCount() - 1);
|
339
|
+
}
|
152
340
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
if (gstate.finalized_hts.empty()) {
|
159
|
-
// Create a finalized ht in the global state, that we can populate
|
160
|
-
gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
|
161
|
-
context.client, BufferAllocator::Get(context.client), group_types, op.payload_types, op.bindings,
|
162
|
-
HtEntryType::HT_WIDTH_64));
|
341
|
+
ht.UnpinData();
|
342
|
+
partitioned_data->Repartition(*lstate.abandoned_data);
|
343
|
+
ht.SetRadixBits(gstate.config.GetRadixBits());
|
344
|
+
ht.InitializePartitionedData();
|
345
|
+
return true;
|
163
346
|
}
|
164
|
-
D_ASSERT(gstate.finalized_hts.size() == 1);
|
165
|
-
D_ASSERT(gstate.finalized_hts[0]);
|
166
|
-
llstate.total_groups +=
|
167
|
-
gstate.finalized_hts[0]->AddChunk(gstate.append_state, group_chunk, payload_input, filter);
|
168
|
-
return;
|
169
347
|
}
|
170
348
|
|
171
|
-
|
172
|
-
|
173
|
-
|
349
|
+
const auto partition_count = partitioned_data->PartitionCount();
|
350
|
+
const auto current_radix_bits = RadixPartitioning::RadixBits(partition_count);
|
351
|
+
D_ASSERT(current_radix_bits <= config.GetRadixBits());
|
174
352
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
gstate.partitioned = true;
|
181
|
-
}
|
353
|
+
const auto row_size_per_partition =
|
354
|
+
partitioned_data->Count() * partitioned_data->GetLayout().GetRowWidth() / partition_count;
|
355
|
+
if (row_size_per_partition > config.BLOCK_FILL_FACTOR * Storage::BLOCK_SIZE) {
|
356
|
+
// We crossed our block filling threshold, try to increment radix bits
|
357
|
+
config.SetRadixBits(current_radix_bits + config.REPARTITION_RADIX_BITS);
|
182
358
|
}
|
183
359
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
gstate.partitioned = true;
|
360
|
+
const auto global_radix_bits = config.GetRadixBits();
|
361
|
+
if (current_radix_bits == global_radix_bits) {
|
362
|
+
return false; // We're already on the right number of radix bits
|
188
363
|
}
|
364
|
+
|
365
|
+
// We're out-of-sync with the global radix bits, repartition
|
366
|
+
ht.UnpinData();
|
367
|
+
auto old_partitioned_data = std::move(partitioned_data);
|
368
|
+
ht.SetRadixBits(global_radix_bits);
|
369
|
+
ht.InitializePartitionedData();
|
370
|
+
old_partitioned_data->Repartition(*ht.GetPartitionedData());
|
371
|
+
return true;
|
189
372
|
}
|
190
373
|
|
191
|
-
void RadixPartitionedHashTable::
|
192
|
-
|
193
|
-
auto &
|
194
|
-
auto &
|
195
|
-
|
374
|
+
void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
|
375
|
+
DataChunk &payload_input, const unsafe_vector<idx_t> &filter) const {
|
376
|
+
auto &gstate = input.global_state.Cast<RadixHTGlobalSinkState>();
|
377
|
+
auto &lstate = input.local_state.Cast<RadixHTLocalSinkState>();
|
378
|
+
if (!lstate.ht) {
|
379
|
+
lstate.ht = CreateHT(context.client, gstate.config.sink_capacity, gstate.config.GetRadixBits());
|
380
|
+
gstate.active_threads++;
|
381
|
+
}
|
196
382
|
|
197
|
-
|
198
|
-
|
383
|
+
auto &group_chunk = lstate.group_chunk;
|
384
|
+
PopulateGroupChunk(group_chunk, chunk);
|
199
385
|
|
200
|
-
|
201
|
-
|
202
|
-
return;
|
203
|
-
}
|
386
|
+
auto &ht = *lstate.ht;
|
387
|
+
ht.AddChunk(group_chunk, payload_input, filter);
|
204
388
|
|
205
|
-
if (
|
206
|
-
return; //
|
389
|
+
if (ht.Count() + STANDARD_VECTOR_SIZE < ht.ResizeThreshold()) {
|
390
|
+
return; // We can fit another chunk
|
207
391
|
}
|
208
392
|
|
209
|
-
if (
|
210
|
-
|
393
|
+
if (gstate.active_threads > 2) {
|
394
|
+
// 'Reset' the HT without taking its data, we can just keep appending to the same collection
|
395
|
+
// This only works because we never resize the HT
|
396
|
+
ht.ClearPointerTable();
|
397
|
+
ht.ResetCount();
|
398
|
+
// We don't do this when running with 1 or 2 threads, it only makes sense when there's many threads
|
211
399
|
}
|
212
400
|
|
213
|
-
//
|
214
|
-
|
401
|
+
// Check if we need to repartition
|
402
|
+
auto repartitioned = MaybeRepartition(context.client, gstate, lstate);
|
215
403
|
|
216
|
-
|
217
|
-
|
218
|
-
|
404
|
+
if (repartitioned && ht.Count() != 0) {
|
405
|
+
// We repartitioned, but we didn't clear the pointer table / reset the count because we're on 1 or 2 threads
|
406
|
+
ht.ClearPointerTable();
|
407
|
+
ht.ResetCount();
|
219
408
|
}
|
220
|
-
|
221
|
-
|
409
|
+
|
410
|
+
// TODO: combine early and often
|
222
411
|
}
|
223
412
|
|
224
|
-
void RadixPartitionedHashTable::
|
225
|
-
|
226
|
-
auto &
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
|
413
|
+
void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
|
414
|
+
LocalSinkState &lstate_p) const {
|
415
|
+
auto &gstate = gstate_p.Cast<RadixHTGlobalSinkState>();
|
416
|
+
auto &lstate = lstate_p.Cast<RadixHTLocalSinkState>();
|
417
|
+
if (!lstate.ht) {
|
418
|
+
return;
|
231
419
|
}
|
232
|
-
}
|
233
420
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
gstate.is_finalized = true;
|
421
|
+
// Set any_combined, then check one last time whether we need to repartition
|
422
|
+
gstate.any_combined = true;
|
423
|
+
MaybeRepartition(context.client, gstate, lstate);
|
238
424
|
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
D_ASSERT(gstate.
|
244
|
-
|
425
|
+
auto &ht = *lstate.ht;
|
426
|
+
ht.UnpinData();
|
427
|
+
|
428
|
+
if (lstate.abandoned_data) {
|
429
|
+
D_ASSERT(gstate.external);
|
430
|
+
D_ASSERT(lstate.abandoned_data->PartitionCount() == lstate.ht->GetPartitionedData()->PartitionCount());
|
431
|
+
D_ASSERT(lstate.abandoned_data->PartitionCount() ==
|
432
|
+
RadixPartitioning::NumberOfPartitions(gstate.config.GetRadixBits()));
|
433
|
+
lstate.abandoned_data->Combine(*lstate.ht->GetPartitionedData());
|
434
|
+
} else {
|
435
|
+
lstate.abandoned_data = std::move(ht.GetPartitionedData());
|
245
436
|
}
|
246
437
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
for (auto &pht : gstate.intermediate_hts) {
|
253
|
-
if (!pht->IsPartitioned()) {
|
254
|
-
pht->Partition(true);
|
255
|
-
}
|
256
|
-
}
|
257
|
-
// schedule additional tasks to combine the partial HTs
|
258
|
-
InitializeFinalizedHTs(context, gstate_p);
|
259
|
-
gstate.is_partitioned = true;
|
260
|
-
return true;
|
261
|
-
} else { // in the non-partitioned case we immediately combine all the unpartitioned hts created by the threads.
|
262
|
-
// TODO possible optimization, if total count < limit for 32 bit ht, use that one
|
263
|
-
// create this ht here so finalize needs no lock on gstate
|
264
|
-
|
265
|
-
gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
|
266
|
-
context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64));
|
267
|
-
for (auto &pht : gstate.intermediate_hts) {
|
268
|
-
auto unpartitioned = pht->GetUnpartitioned();
|
269
|
-
for (auto &unpartitioned_ht : unpartitioned) {
|
270
|
-
D_ASSERT(unpartitioned_ht);
|
271
|
-
gstate.finalized_hts[0]->Combine(*unpartitioned_ht);
|
272
|
-
unpartitioned_ht.reset();
|
273
|
-
}
|
274
|
-
unpartitioned.clear();
|
275
|
-
}
|
276
|
-
D_ASSERT(gstate.finalized_hts[0]);
|
277
|
-
gstate.finalized_hts[0]->Finalize();
|
278
|
-
return false;
|
438
|
+
lock_guard<mutex> guard(gstate.lock);
|
439
|
+
if (gstate.uncombined_data) {
|
440
|
+
gstate.uncombined_data->Combine(*lstate.abandoned_data);
|
441
|
+
} else {
|
442
|
+
gstate.uncombined_data = std::move(lstate.abandoned_data);
|
279
443
|
}
|
444
|
+
gstate.stored_allocators.emplace_back(ht.GetAggregateAllocator());
|
280
445
|
}
|
281
446
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
static void FinalizeHT(RadixHTGlobalState &gstate, idx_t radix) {
|
292
|
-
D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
|
293
|
-
D_ASSERT(gstate.finalized_hts[radix]);
|
294
|
-
|
295
|
-
idx_t pht_idx_from = 0;
|
296
|
-
idx_t pht_idx_to = gstate.intermediate_hts.size();
|
297
|
-
if (gstate.repartitioned) {
|
298
|
-
const auto num_partitions_before = gstate.repartition_tasks.size();
|
299
|
-
const auto multiplier = gstate.partition_info->n_partitions / num_partitions_before;
|
300
|
-
const auto radix_before = radix / multiplier;
|
301
|
-
pht_idx_from = radix_before * gstate.repartition_tasks_per_partition;
|
302
|
-
pht_idx_to = pht_idx_from + gstate.repartition_tasks_per_partition;
|
303
|
-
}
|
447
|
+
void RadixPartitionedHashTable::Finalize(ClientContext &, GlobalSinkState &gstate_p) const {
|
448
|
+
auto &gstate = gstate_p.Cast<RadixHTGlobalSinkState>();
|
449
|
+
|
450
|
+
if (gstate.uncombined_data) {
|
451
|
+
auto &uncombined_data = *gstate.uncombined_data;
|
452
|
+
gstate.count_before_combining = uncombined_data.Count();
|
453
|
+
|
454
|
+
// If true there is no need to combine, it was all done by a single thread in a single HT
|
455
|
+
const auto single_ht = !gstate.external && gstate.active_threads == 1;
|
304
456
|
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
457
|
+
auto &uncombined_partition_data = uncombined_data.GetPartitions();
|
458
|
+
const auto n_partitions = uncombined_partition_data.size();
|
459
|
+
gstate.partitions.reserve(n_partitions);
|
460
|
+
for (idx_t i = 0; i < n_partitions; i++) {
|
461
|
+
gstate.partitions.emplace_back(make_uniq<AggregatePartition>(std::move(uncombined_partition_data[i])));
|
462
|
+
if (single_ht) {
|
463
|
+
gstate.finalize_idx++;
|
464
|
+
gstate.partitions.back()->finalized = true;
|
309
465
|
}
|
310
466
|
}
|
311
|
-
|
467
|
+
} else {
|
468
|
+
gstate.count_before_combining = 0;
|
312
469
|
}
|
313
470
|
|
314
|
-
|
315
|
-
|
316
|
-
event->FinishTask();
|
317
|
-
return TaskExecutionResult::TASK_FINISHED;
|
318
|
-
}
|
471
|
+
gstate.finalized = true;
|
472
|
+
}
|
319
473
|
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
474
|
+
//===--------------------------------------------------------------------===//
|
475
|
+
// Source
|
476
|
+
//===--------------------------------------------------------------------===//
|
477
|
+
idx_t RadixPartitionedHashTable::Count(GlobalSinkState &sink_p) const {
|
478
|
+
const auto count = CountInternal(sink_p);
|
479
|
+
return count == 0 && grouping_set.empty() ? 1 : count;
|
480
|
+
}
|
481
|
+
|
482
|
+
idx_t RadixPartitionedHashTable::CountInternal(GlobalSinkState &sink_p) const {
|
483
|
+
auto &sink = sink_p.Cast<RadixHTGlobalSinkState>();
|
484
|
+
return sink.count_before_combining;
|
485
|
+
}
|
486
|
+
|
487
|
+
void RadixPartitionedHashTable::SetMultiScan(GlobalSinkState &sink_p) {
|
488
|
+
auto &sink = sink_p.Cast<RadixHTGlobalSinkState>();
|
489
|
+
sink.scan_pin_properties = TupleDataPinProperties::UNPIN_AFTER_DONE;
|
490
|
+
}
|
491
|
+
|
492
|
+
enum class RadixHTSourceTaskType : uint8_t { NO_TASK, FINALIZE, SCAN };
|
493
|
+
|
494
|
+
class RadixHTLocalSourceState;
|
495
|
+
|
496
|
+
class RadixHTGlobalSourceState : public GlobalSourceState {
|
497
|
+
public:
|
498
|
+
RadixHTGlobalSourceState(ClientContext &context, const RadixPartitionedHashTable &radix_ht);
|
499
|
+
|
500
|
+
//! Assigns a task to a local source state
|
501
|
+
bool AssignTask(RadixHTGlobalSinkState &sink, RadixHTLocalSourceState &lstate);
|
502
|
+
|
503
|
+
public:
|
504
|
+
//! The client context
|
505
|
+
ClientContext &context;
|
506
|
+
//! For synchronizing the source phase
|
507
|
+
atomic<bool> finished;
|
508
|
+
|
509
|
+
//! Column ids for scanning
|
510
|
+
vector<column_t> column_ids;
|
511
|
+
|
512
|
+
//! For synchronizing scan tasks
|
513
|
+
atomic<idx_t> scan_idx;
|
514
|
+
atomic<idx_t> scan_done;
|
324
515
|
};
|
325
516
|
|
326
|
-
class
|
517
|
+
enum class RadixHTScanStatus : uint8_t { INIT, IN_PROGRESS, DONE };
|
518
|
+
|
519
|
+
class RadixHTLocalSourceState : public LocalSourceState {
|
327
520
|
public:
|
328
|
-
|
329
|
-
idx_t num_partitions_before_p)
|
330
|
-
: ExecutorTask(executor), event(std::move(event_p)), state(state_p),
|
331
|
-
num_partitions_before(num_partitions_before_p) {
|
332
|
-
}
|
333
|
-
|
334
|
-
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
335
|
-
const auto multiplier = state.partition_info->n_partitions / num_partitions_before;
|
336
|
-
|
337
|
-
idx_t repartition_radix = 0;
|
338
|
-
idx_t finalize_radix = 0;
|
339
|
-
while (repartition_radix < num_partitions_before && finalize_radix < state.partition_info->n_partitions) {
|
340
|
-
// Loop over original partitions until we find one that we can repartition
|
341
|
-
for (; repartition_radix < num_partitions_before; repartition_radix++) {
|
342
|
-
auto task_idx = state.repartition_tasks_assigned[repartition_radix]++;
|
343
|
-
if (task_idx >= state.repartition_tasks_per_partition) {
|
344
|
-
continue;
|
345
|
-
}
|
346
|
-
auto &ht = state.repartition_tasks[repartition_radix][task_idx];
|
347
|
-
ht->Partition(true);
|
348
|
-
state.intermediate_hts[repartition_radix * state.repartition_tasks_per_partition + task_idx] =
|
349
|
-
std::move(ht);
|
350
|
-
state.repartition_tasks_done[repartition_radix]++;
|
351
|
-
break;
|
352
|
-
}
|
521
|
+
explicit RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &radix_ht);
|
353
522
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
}
|
360
|
-
|
361
|
-
if (state.finalize_assigned[finalize_radix]) {
|
362
|
-
continue; // Already assigned
|
363
|
-
}
|
364
|
-
|
365
|
-
{
|
366
|
-
lock_guard<mutex> guard(state.lock);
|
367
|
-
if (state.finalize_assigned[finalize_radix]) {
|
368
|
-
// LCOV_EXCL_START
|
369
|
-
continue; // Check again with lock, but already assigned
|
370
|
-
// LCOV_EXCL_STOP
|
371
|
-
}
|
372
|
-
state.finalize_assigned[finalize_radix] = true;
|
373
|
-
}
|
374
|
-
|
375
|
-
// We can finalize!
|
376
|
-
RadixAggregateFinalizeTask::FinalizeHT(state, finalize_radix);
|
377
|
-
}
|
378
|
-
}
|
379
|
-
event->FinishTask();
|
380
|
-
return TaskExecutionResult::TASK_FINISHED;
|
381
|
-
}
|
523
|
+
public:
|
524
|
+
//! Do the work this thread has been assigned
|
525
|
+
void ExecuteTask(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate, DataChunk &chunk);
|
526
|
+
//! Whether this thread has finished the work it has been assigned
|
527
|
+
bool TaskFinished();
|
382
528
|
|
383
529
|
private:
|
384
|
-
|
385
|
-
|
386
|
-
|
530
|
+
//! Execute the finalize or scan task
|
531
|
+
void Finalize(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate);
|
532
|
+
void Scan(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate, DataChunk &chunk);
|
533
|
+
|
534
|
+
public:
|
535
|
+
//! Current task and index
|
536
|
+
RadixHTSourceTaskType task;
|
537
|
+
idx_t task_idx;
|
538
|
+
|
539
|
+
//! Thread-local HT that is re-used to Finalize
|
540
|
+
unique_ptr<GroupedAggregateHashTable> ht;
|
541
|
+
//! Current status of a Scan
|
542
|
+
RadixHTScanStatus scan_status;
|
543
|
+
|
544
|
+
private:
|
545
|
+
//! Allocator and layout for finalizing state
|
546
|
+
TupleDataLayout layout;
|
547
|
+
ArenaAllocator aggregate_allocator;
|
548
|
+
|
549
|
+
//! State and chunk for scanning
|
550
|
+
TupleDataScanState scan_state;
|
551
|
+
DataChunk scan_chunk;
|
387
552
|
};
|
388
553
|
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
554
|
+
unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
|
555
|
+
return make_uniq<RadixHTGlobalSourceState>(context, *this);
|
556
|
+
}
|
557
|
+
|
558
|
+
unique_ptr<LocalSourceState> RadixPartitionedHashTable::GetLocalSourceState(ExecutionContext &context) const {
|
559
|
+
return make_uniq<RadixHTLocalSourceState>(context, *this);
|
560
|
+
}
|
561
|
+
|
562
|
+
RadixHTGlobalSourceState::RadixHTGlobalSourceState(ClientContext &context_p, const RadixPartitionedHashTable &radix_ht)
|
563
|
+
: context(context_p), finished(false), scan_idx(0), scan_done(0) {
|
564
|
+
for (column_t column_id = 0; column_id < radix_ht.group_types.size(); column_id++) {
|
565
|
+
column_ids.push_back(column_id);
|
394
566
|
}
|
567
|
+
}
|
395
568
|
|
396
|
-
|
397
|
-
|
398
|
-
idx_t tasks_per_partition;
|
399
|
-
GetRepartitionInfo(executor.context, state, repartition_radix_bits, concurrent_repartitions, tasks_per_partition);
|
400
|
-
if (repartition_radix_bits == gstate.partition_info->radix_bits) {
|
401
|
-
// No repartitioning necessary
|
402
|
-
for (idx_t r = 0; r < gstate.partition_info->n_partitions; r++) {
|
403
|
-
D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
|
404
|
-
D_ASSERT(gstate.finalized_hts[r]);
|
405
|
-
tasks.push_back(make_uniq<RadixAggregateFinalizeTask>(executor, event, gstate, r));
|
406
|
-
}
|
407
|
-
} else {
|
408
|
-
// Schedule repartition / finalize tasks
|
409
|
-
ScheduleRepartitionTasks(executor, event, state, tasks, repartition_radix_bits, concurrent_repartitions,
|
410
|
-
tasks_per_partition);
|
411
|
-
}
|
412
|
-
}
|
413
|
-
|
414
|
-
void RadixPartitionedHashTable::ScheduleRepartitionTasks(Executor &executor, const shared_ptr<Event> &event,
|
415
|
-
GlobalSinkState &state, vector<shared_ptr<Task>> &tasks,
|
416
|
-
const idx_t repartition_radix_bits,
|
417
|
-
const idx_t concurrent_repartitions,
|
418
|
-
const idx_t tasks_per_partition) const {
|
419
|
-
auto &gstate = state.Cast<RadixHTGlobalState>();
|
420
|
-
D_ASSERT(repartition_radix_bits > gstate.partition_info->radix_bits);
|
421
|
-
const auto num_partitions_before = gstate.partition_info->n_partitions;
|
422
|
-
const auto multiplier = RadixPartitioning::NumberOfPartitions(repartition_radix_bits) / num_partitions_before;
|
423
|
-
|
424
|
-
// Inititialize gstate
|
425
|
-
auto new_partition_info =
|
426
|
-
make_uniq<RadixPartitionInfo>(RadixPartitioning::NumberOfPartitions(repartition_radix_bits));
|
427
|
-
gstate.repartitioned = true;
|
428
|
-
gstate.repartition_tasks_per_partition = tasks_per_partition;
|
429
|
-
gstate.repartition_tasks.resize(num_partitions_before);
|
430
|
-
gstate.repartition_tasks_assigned = make_uniq_array<atomic<idx_t>>(num_partitions_before);
|
431
|
-
gstate.repartition_tasks_done = make_uniq_array<atomic<idx_t>>(num_partitions_before);
|
432
|
-
gstate.finalize_assigned = make_uniq_array<atomic<bool>>(new_partition_info->n_partitions);
|
433
|
-
for (idx_t partition_idx = 0; partition_idx < num_partitions_before; partition_idx++) {
|
434
|
-
gstate.repartition_tasks_assigned[partition_idx] = 0;
|
435
|
-
gstate.repartition_tasks_done[partition_idx] = 0;
|
436
|
-
|
437
|
-
// Grab intermediate data from gstate
|
438
|
-
HashTableList partition_list;
|
439
|
-
for (auto &pht : gstate.intermediate_hts) {
|
440
|
-
for (auto &ht : pht->GetPartition(partition_idx)) {
|
441
|
-
partition_list.push_back(std::move(ht));
|
442
|
-
}
|
443
|
-
}
|
569
|
+
bool RadixHTGlobalSourceState::AssignTask(RadixHTGlobalSinkState &sink, RadixHTLocalSourceState &lstate) {
|
570
|
+
D_ASSERT(lstate.scan_status != RadixHTScanStatus::IN_PROGRESS);
|
444
571
|
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
*new_partition_info, group_types, op.payload_types, op.bindings);
|
452
|
-
auto ht_idx_to = MinValue<idx_t>(ht_idx + hts_per_task, partition_list.size());
|
453
|
-
for (; ht_idx < ht_idx_to; ht_idx++) {
|
454
|
-
auto &ht = partition_list[ht_idx];
|
455
|
-
task_ht->Append(*ht);
|
456
|
-
ht.reset();
|
457
|
-
}
|
458
|
-
gstate.repartition_tasks[partition_idx].push_back(std::move(task_ht));
|
459
|
-
}
|
572
|
+
const auto n_partitions = sink.partitions.size();
|
573
|
+
if (scan_done == n_partitions) {
|
574
|
+
finished = true;
|
575
|
+
return false;
|
576
|
+
}
|
577
|
+
// We first try to assign a Scan task, then a Finalize task if that didn't work, without using any locks
|
460
578
|
|
461
|
-
|
462
|
-
|
579
|
+
// We need an atomic compare-and-swap to assign a Scan task, because we need to only increment
|
580
|
+
// the 'scan_idx' atomic if the 'finalize' of that partition is true, i.e., ready to be scanned
|
581
|
+
bool scan_assigned = true;
|
582
|
+
do {
|
583
|
+
lstate.task_idx = scan_idx.load();
|
584
|
+
if (lstate.task_idx >= n_partitions || !sink.partitions[lstate.task_idx]->finalized) {
|
585
|
+
scan_assigned = false;
|
586
|
+
break;
|
463
587
|
}
|
464
|
-
}
|
588
|
+
} while (!std::atomic_compare_exchange_weak(&scan_idx, &lstate.task_idx, lstate.task_idx + 1));
|
465
589
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
590
|
+
if (scan_assigned) {
|
591
|
+
// We successfully assigned a Scan task
|
592
|
+
D_ASSERT(lstate.task_idx < n_partitions && sink.partitions[lstate.task_idx]->finalized);
|
593
|
+
lstate.task = RadixHTSourceTaskType::SCAN;
|
594
|
+
lstate.scan_status = RadixHTScanStatus::INIT;
|
595
|
+
return true;
|
470
596
|
}
|
471
597
|
|
472
|
-
|
473
|
-
|
598
|
+
// We can just increment the atomic here, much simpler than assigning the scan task
|
599
|
+
lstate.task_idx = sink.finalize_idx++;
|
600
|
+
if (lstate.task_idx < n_partitions) {
|
601
|
+
// We successfully assigned a Finalize task
|
602
|
+
lstate.task = RadixHTSourceTaskType::FINALIZE;
|
603
|
+
return true;
|
604
|
+
}
|
474
605
|
|
475
|
-
|
476
|
-
|
606
|
+
// We didn't manage to assign a finalize task
|
607
|
+
return false;
|
477
608
|
}
|
478
609
|
|
479
|
-
|
480
|
-
|
481
|
-
|
610
|
+
RadixHTLocalSourceState::RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &radix_ht)
|
611
|
+
: task(RadixHTSourceTaskType::NO_TASK), scan_status(RadixHTScanStatus::DONE), layout(radix_ht.GetLayout().Copy()),
|
612
|
+
aggregate_allocator(BufferAllocator::Get(context.client)) {
|
613
|
+
auto &allocator = BufferAllocator::Get(context.client);
|
614
|
+
auto scan_chunk_types = radix_ht.group_types;
|
615
|
+
for (auto &aggr_type : radix_ht.op.aggregate_return_types) {
|
616
|
+
scan_chunk_types.push_back(aggr_type);
|
617
|
+
}
|
618
|
+
scan_chunk.Initialize(allocator, scan_chunk_types);
|
482
619
|
}
|
483
620
|
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
621
|
+
void RadixHTLocalSourceState::ExecuteTask(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate,
|
622
|
+
DataChunk &chunk) {
|
623
|
+
switch (task) {
|
624
|
+
case RadixHTSourceTaskType::FINALIZE:
|
625
|
+
Finalize(sink, gstate);
|
626
|
+
break;
|
627
|
+
case RadixHTSourceTaskType::SCAN:
|
628
|
+
Scan(sink, gstate, chunk);
|
629
|
+
break;
|
630
|
+
default:
|
631
|
+
throw InternalException("Unexpected RadixHTSourceTaskType in ExecuteTask!");
|
490
632
|
}
|
491
|
-
return false;
|
492
633
|
}
|
493
634
|
|
494
|
-
void
|
495
|
-
|
496
|
-
|
497
|
-
auto &gstate = state.Cast<RadixHTGlobalState>();
|
498
|
-
const auto num_partitions = gstate.partition_info->n_partitions;
|
499
|
-
const auto radix_bits = gstate.partition_info->radix_bits;
|
500
|
-
D_ASSERT(IsPowerOfTwo(num_partitions));
|
501
|
-
|
502
|
-
vector<idx_t> partition_counts(num_partitions, 0);
|
503
|
-
vector<idx_t> partition_sizes(num_partitions, 0);
|
504
|
-
for (const auto &ht : gstate.intermediate_hts) {
|
505
|
-
for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
|
506
|
-
partition_counts[partition_idx] += ht->GetPartitionCount(partition_idx);
|
507
|
-
partition_sizes[partition_idx] += ht->GetPartitionSize(partition_idx);
|
508
|
-
}
|
509
|
-
}
|
635
|
+
void RadixHTLocalSourceState::Finalize(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate) {
|
636
|
+
D_ASSERT(task == RadixHTSourceTaskType::FINALIZE);
|
637
|
+
D_ASSERT(scan_status != RadixHTScanStatus::IN_PROGRESS);
|
510
638
|
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
|
515
|
-
const auto &partition_count = partition_counts[partition_idx];
|
516
|
-
const auto &partition_size = partition_sizes[partition_idx];
|
517
|
-
auto partition_ht_size =
|
518
|
-
partition_size + GroupedAggregateHashTable::FirstPartSize(partition_count, HtEntryType::HT_WIDTH_64);
|
519
|
-
if (partition_ht_size > max_partition_size) {
|
520
|
-
max_partition_idx = partition_idx;
|
521
|
-
max_partition_size = partition_ht_size;
|
522
|
-
}
|
523
|
-
total_size += partition_ht_size;
|
524
|
-
}
|
525
|
-
|
526
|
-
// Switch to out-of-core finalize at ~60%
|
527
|
-
const auto max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
|
528
|
-
const idx_t n_threads = PreviousPowerOfTwo(TaskScheduler::GetScheduler(context).NumberOfThreads());
|
529
|
-
D_ASSERT(IsPowerOfTwo(n_threads));
|
530
|
-
if (!context.config.force_external && total_size < max_ht_size) {
|
531
|
-
// In-memory finalize
|
532
|
-
if (num_partitions >= n_threads) { // Can already keep all threads busy
|
533
|
-
repartition_radix_bits = radix_bits;
|
534
|
-
tasks_per_partition = 1;
|
535
|
-
} else { // Repartition to keep all threads busy
|
536
|
-
// Can't have coverage because RadixHTGlobalState::MAX_RADIX_PARTITIONS > threads on github actions
|
537
|
-
// LCOV_EXCL_START
|
538
|
-
repartition_radix_bits = RadixPartitioning::RadixBits(NextPowerOfTwo(n_threads));
|
539
|
-
tasks_per_partition = n_threads / num_partitions;
|
540
|
-
// LCOV_EXCL_STOP
|
541
|
-
}
|
542
|
-
concurrent_repartitions = num_partitions;
|
639
|
+
auto &partition = *sink.partitions[task_idx];
|
640
|
+
if (partition.data->Count() == 0) {
|
641
|
+
partition.finalized = true;
|
543
642
|
return;
|
544
643
|
}
|
545
644
|
|
546
|
-
|
547
|
-
|
548
|
-
|
645
|
+
if (!ht) {
|
646
|
+
// Create a HT with sufficient capacity
|
647
|
+
const auto capacity = GroupedAggregateHashTable::GetCapacityForCount(partition.data->Count());
|
648
|
+
ht = sink.radix_ht.CreateHT(gstate.context, capacity, 0);
|
649
|
+
} else {
|
650
|
+
// We may want to resize here to the size of this partition, but for now we just assume uniform partition sizes
|
651
|
+
ht->InitializePartitionedData();
|
652
|
+
ht->ClearPointerTable();
|
653
|
+
ht->ResetCount();
|
654
|
+
}
|
549
655
|
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
double partition_multiplier = RadixPartitioning::NumberOfPartitions(added_bits);
|
656
|
+
// Now combine the uncombined data using this thread's HT
|
657
|
+
ht->Combine(*partition.data);
|
658
|
+
ht->UnpinData();
|
554
659
|
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
660
|
+
// Move the combined data back to the partition
|
661
|
+
partition.data =
|
662
|
+
make_uniq<TupleDataCollection>(BufferManager::GetBufferManager(gstate.context), sink.radix_ht.GetLayout());
|
663
|
+
partition.data->Combine(*ht->GetPartitionedData()->GetPartitions()[0]);
|
559
664
|
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
tasks_per_partition = NextPowerOfTwo(n_threads / concurrent_repartitions);
|
665
|
+
// Mark partition as ready to scan
|
666
|
+
partition.finalized = true;
|
667
|
+
|
668
|
+
// Make sure this thread's aggregate allocator does not get lost
|
669
|
+
lock_guard<mutex> guard(sink.lock);
|
670
|
+
sink.stored_allocators.emplace_back(ht->GetAggregateAllocator());
|
567
671
|
}
|
568
672
|
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
class RadixHTGlobalSourceState : public GlobalSourceState {
|
573
|
-
public:
|
574
|
-
explicit RadixHTGlobalSourceState(Allocator &allocator, const RadixPartitionedHashTable &ht)
|
575
|
-
: ht_index(0), initialized(false), finished(false) {
|
576
|
-
}
|
673
|
+
void RadixHTLocalSourceState::Scan(RadixHTGlobalSinkState &sink, RadixHTGlobalSourceState &gstate, DataChunk &chunk) {
|
674
|
+
D_ASSERT(task == RadixHTSourceTaskType::SCAN);
|
675
|
+
D_ASSERT(scan_status != RadixHTScanStatus::DONE);
|
577
676
|
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
idx_t ht_index;
|
582
|
-
//! The set of aggregate scan states
|
583
|
-
unsafe_unique_array<TupleDataParallelScanState> ht_scan_states;
|
584
|
-
atomic<bool> initialized;
|
585
|
-
atomic<bool> finished;
|
586
|
-
};
|
677
|
+
auto &partition = *sink.partitions[task_idx];
|
678
|
+
D_ASSERT(partition.finalized);
|
679
|
+
auto &data_collection = *partition.data;
|
587
680
|
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
auto scan_chunk_types = ht.group_types;
|
593
|
-
for (auto &aggr_type : ht.op.aggregate_return_types) {
|
594
|
-
scan_chunk_types.push_back(aggr_type);
|
681
|
+
if (data_collection.Count() == 0) {
|
682
|
+
scan_status = RadixHTScanStatus::DONE;
|
683
|
+
if (++gstate.scan_done == sink.partitions.size()) {
|
684
|
+
gstate.finished = true;
|
595
685
|
}
|
596
|
-
|
686
|
+
return;
|
597
687
|
}
|
598
688
|
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
//! A reference to the current HT that we are scanning
|
604
|
-
shared_ptr<GroupedAggregateHashTable> ht;
|
605
|
-
//! Scan state for the current HT
|
606
|
-
TupleDataLocalScanState scan_state;
|
607
|
-
};
|
689
|
+
if (scan_status == RadixHTScanStatus::INIT) {
|
690
|
+
data_collection.InitializeScan(scan_state, gstate.column_ids, sink.scan_pin_properties);
|
691
|
+
scan_status = RadixHTScanStatus::IN_PROGRESS;
|
692
|
+
}
|
608
693
|
|
609
|
-
|
610
|
-
|
611
|
-
|
694
|
+
if (!data_collection.Scan(scan_state, scan_chunk)) {
|
695
|
+
scan_status = RadixHTScanStatus::DONE;
|
696
|
+
if (++gstate.scan_done == sink.partitions.size()) {
|
697
|
+
gstate.finished = true;
|
698
|
+
}
|
699
|
+
if (sink.scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE) {
|
700
|
+
data_collection.Reset();
|
701
|
+
}
|
702
|
+
return;
|
703
|
+
}
|
612
704
|
|
613
|
-
|
614
|
-
|
615
|
-
|
705
|
+
RowOperationsState row_state(aggregate_allocator);
|
706
|
+
const auto group_cols = layout.ColumnCount() - 1;
|
707
|
+
RowOperations::FinalizeStates(row_state, layout, scan_state.chunk_state.row_locations, scan_chunk, group_cols);
|
616
708
|
|
617
|
-
|
618
|
-
|
619
|
-
if (gstate.is_empty && grouping_set.empty()) {
|
620
|
-
return 1;
|
709
|
+
if (sink.scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE && layout.HasDestructor()) {
|
710
|
+
RowOperations::DestroyStates(row_state, layout, scan_state.chunk_state.row_locations, scan_chunk.size());
|
621
711
|
}
|
622
712
|
|
623
|
-
|
624
|
-
|
625
|
-
|
713
|
+
auto &radix_ht = sink.radix_ht;
|
714
|
+
idx_t chunk_index = 0;
|
715
|
+
for (auto &entry : radix_ht.grouping_set) {
|
716
|
+
chunk.data[entry].Reference(scan_chunk.data[chunk_index++]);
|
717
|
+
}
|
718
|
+
for (auto null_group : radix_ht.null_groups) {
|
719
|
+
chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
|
720
|
+
ConstantVector::SetNull(chunk.data[null_group], true);
|
721
|
+
}
|
722
|
+
D_ASSERT(radix_ht.grouping_set.size() + radix_ht.null_groups.size() == radix_ht.op.GroupCount());
|
723
|
+
for (idx_t col_idx = 0; col_idx < radix_ht.op.aggregates.size(); col_idx++) {
|
724
|
+
chunk.data[radix_ht.op.GroupCount() + col_idx].Reference(
|
725
|
+
scan_chunk.data[radix_ht.group_types.size() + col_idx]);
|
726
|
+
}
|
727
|
+
D_ASSERT(radix_ht.op.grouping_functions.size() == radix_ht.grouping_values.size());
|
728
|
+
for (idx_t i = 0; i < radix_ht.op.grouping_functions.size(); i++) {
|
729
|
+
chunk.data[radix_ht.op.GroupCount() + radix_ht.op.aggregates.size() + i].Reference(radix_ht.grouping_values[i]);
|
730
|
+
}
|
731
|
+
chunk.SetCardinality(scan_chunk);
|
732
|
+
D_ASSERT(chunk.size() != 0);
|
733
|
+
}
|
734
|
+
|
735
|
+
bool RadixHTLocalSourceState::TaskFinished() {
|
736
|
+
switch (task) {
|
737
|
+
case RadixHTSourceTaskType::FINALIZE:
|
738
|
+
return true;
|
739
|
+
case RadixHTSourceTaskType::SCAN:
|
740
|
+
return scan_status == RadixHTScanStatus::DONE;
|
741
|
+
default:
|
742
|
+
D_ASSERT(task == RadixHTSourceTaskType::NO_TASK);
|
743
|
+
return true;
|
626
744
|
}
|
627
|
-
return count;
|
628
745
|
}
|
629
746
|
|
630
747
|
SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &chunk,
|
631
|
-
GlobalSinkState &
|
632
|
-
auto &
|
633
|
-
|
748
|
+
GlobalSinkState &sink_p, OperatorSourceInput &input) const {
|
749
|
+
auto &sink = sink_p.Cast<RadixHTGlobalSinkState>();
|
750
|
+
D_ASSERT(sink.finalized);
|
751
|
+
|
752
|
+
auto &gstate = input.global_state.Cast<RadixHTGlobalSourceState>();
|
634
753
|
auto &lstate = input.local_state.Cast<RadixHTLocalSourceState>();
|
635
|
-
D_ASSERT(
|
636
|
-
|
754
|
+
D_ASSERT(sink.scan_pin_properties == TupleDataPinProperties::UNPIN_AFTER_DONE ||
|
755
|
+
sink.scan_pin_properties == TupleDataPinProperties::DESTROY_AFTER_DONE);
|
756
|
+
|
757
|
+
if (gstate.finished) {
|
637
758
|
return SourceResultType::FINISHED;
|
638
759
|
}
|
639
760
|
|
640
|
-
//
|
641
|
-
|
642
|
-
if (gstate.is_empty && grouping_set.empty()) {
|
761
|
+
// Special case hack to sort out aggregating from empty intermediates for aggregations without groups
|
762
|
+
if (CountInternal(sink_p) == 0 && grouping_set.empty()) {
|
643
763
|
D_ASSERT(chunk.ColumnCount() == null_groups.size() + op.aggregates.size() + op.grouping_functions.size());
|
644
|
-
//
|
764
|
+
// For each column in the aggregates, set to initial state
|
645
765
|
chunk.SetCardinality(1);
|
646
766
|
for (auto null_group : null_groups) {
|
647
767
|
chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
|
@@ -666,97 +786,17 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
|
|
666
786
|
for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
|
667
787
|
chunk.data[null_groups.size() + op.aggregates.size() + i].Reference(grouping_values[i]);
|
668
788
|
}
|
669
|
-
|
670
|
-
return
|
671
|
-
}
|
672
|
-
if (gstate.is_empty) {
|
673
|
-
state.finished = true;
|
674
|
-
return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
|
675
|
-
}
|
676
|
-
idx_t elements_found = 0;
|
677
|
-
|
678
|
-
lstate.scan_chunk.Reset();
|
679
|
-
if (!state.initialized) {
|
680
|
-
lock_guard<mutex> l(state.lock);
|
681
|
-
if (!state.initialized) {
|
682
|
-
auto &finalized_hts = gstate.finalized_hts;
|
683
|
-
state.ht_scan_states = make_unsafe_uniq_array<TupleDataParallelScanState>(finalized_hts.size());
|
684
|
-
|
685
|
-
const auto &layout = gstate.finalized_hts[0]->GetDataCollection().GetLayout();
|
686
|
-
vector<column_t> column_ids;
|
687
|
-
column_ids.reserve(layout.ColumnCount() - 1);
|
688
|
-
for (idx_t col_idx = 0; col_idx < layout.ColumnCount() - 1; col_idx++) {
|
689
|
-
column_ids.emplace_back(col_idx);
|
690
|
-
}
|
691
|
-
|
692
|
-
for (idx_t ht_idx = 0; ht_idx < finalized_hts.size(); ht_idx++) {
|
693
|
-
gstate.finalized_hts[ht_idx]->GetDataCollection().InitializeScan(
|
694
|
-
state.ht_scan_states.get()[ht_idx].scan_state, column_ids);
|
695
|
-
}
|
696
|
-
state.initialized = true;
|
697
|
-
}
|
789
|
+
gstate.finished = true;
|
790
|
+
return SourceResultType::HAVE_MORE_OUTPUT;
|
698
791
|
}
|
699
792
|
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
idx_t ht_index;
|
704
|
-
{
|
705
|
-
lock_guard<mutex> l(state.lock);
|
706
|
-
ht_index = state.ht_index;
|
707
|
-
if (ht_index >= gstate.finalized_hts.size()) {
|
708
|
-
state.finished = true;
|
709
|
-
return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
|
710
|
-
}
|
711
|
-
}
|
712
|
-
D_ASSERT(ht_index < gstate.finalized_hts.size());
|
713
|
-
if (lstate.ht_index != DConstants::INVALID_INDEX && ht_index != lstate.ht_index) {
|
714
|
-
lstate.ht->GetDataCollection().FinalizePinState(local_scan_state.pin_state);
|
715
|
-
}
|
716
|
-
lstate.ht_index = ht_index;
|
717
|
-
lstate.ht = gstate.finalized_hts[ht_index];
|
718
|
-
D_ASSERT(lstate.ht);
|
719
|
-
|
720
|
-
auto &global_scan_state = state.ht_scan_states[ht_index];
|
721
|
-
elements_found = lstate.ht->Scan(global_scan_state, local_scan_state, lstate.scan_chunk);
|
722
|
-
if (elements_found > 0) {
|
723
|
-
break;
|
724
|
-
}
|
725
|
-
lstate.ht->GetDataCollection().FinalizePinState(local_scan_state.pin_state);
|
726
|
-
|
727
|
-
// move to the next hash table
|
728
|
-
lock_guard<mutex> l(state.lock);
|
729
|
-
ht_index++;
|
730
|
-
if (ht_index > state.ht_index) {
|
731
|
-
// we have not yet worked on the table
|
732
|
-
// move the global index forwards
|
733
|
-
if (!gstate.multi_scan) {
|
734
|
-
gstate.finalized_hts[state.ht_index].reset();
|
735
|
-
}
|
736
|
-
state.ht_index = ht_index;
|
793
|
+
while (!gstate.finished && chunk.size() == 0) {
|
794
|
+
if (!lstate.TaskFinished() || gstate.AssignTask(sink, lstate)) {
|
795
|
+
lstate.ExecuteTask(sink, gstate, chunk);
|
737
796
|
}
|
738
797
|
}
|
739
798
|
|
740
|
-
|
741
|
-
chunk.SetCardinality(elements_found);
|
742
|
-
|
743
|
-
idx_t chunk_index = 0;
|
744
|
-
for (auto &entry : grouping_set) {
|
745
|
-
chunk.data[entry].Reference(lstate.scan_chunk.data[chunk_index++]);
|
746
|
-
}
|
747
|
-
for (auto null_group : null_groups) {
|
748
|
-
chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
|
749
|
-
ConstantVector::SetNull(chunk.data[null_group], true);
|
750
|
-
}
|
751
|
-
D_ASSERT(grouping_set.size() + null_groups.size() == op.GroupCount());
|
752
|
-
for (idx_t col_idx = 0; col_idx < op.aggregates.size(); col_idx++) {
|
753
|
-
chunk.data[op.GroupCount() + col_idx].Reference(lstate.scan_chunk.data[group_types.size() + col_idx]);
|
754
|
-
}
|
755
|
-
D_ASSERT(op.grouping_functions.size() == grouping_values.size());
|
756
|
-
for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
|
757
|
-
chunk.data[op.GroupCount() + op.aggregates.size() + i].Reference(grouping_values[i]);
|
758
|
-
}
|
759
|
-
return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
|
799
|
+
return SourceResultType::HAVE_MORE_OUTPUT;
|
760
800
|
}
|
761
801
|
|
762
802
|
} // namespace duckdb
|