duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -1,9 +1,12 @@
|
|
1
1
|
#include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp"
|
4
|
+
#include "duckdb/common/atomic.hpp"
|
4
5
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
6
|
#include "duckdb/execution/aggregate_hashtable.hpp"
|
7
|
+
#include "duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp"
|
6
8
|
#include "duckdb/main/client_context.hpp"
|
9
|
+
#include "duckdb/parallel/base_pipeline_event.hpp"
|
7
10
|
#include "duckdb/parallel/interrupt.hpp"
|
8
11
|
#include "duckdb/parallel/pipeline.hpp"
|
9
12
|
#include "duckdb/parallel/task_scheduler.hpp"
|
@@ -11,9 +14,6 @@
|
|
11
14
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
12
15
|
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
13
16
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
14
|
-
#include "duckdb/parallel/base_pipeline_event.hpp"
|
15
|
-
#include "duckdb/common/atomic.hpp"
|
16
|
-
#include "duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp"
|
17
17
|
|
18
18
|
namespace duckdb {
|
19
19
|
|
@@ -176,9 +176,9 @@ PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<Logi
|
|
176
176
|
//===--------------------------------------------------------------------===//
|
177
177
|
// Sink
|
178
178
|
//===--------------------------------------------------------------------===//
|
179
|
-
class
|
179
|
+
class HashAggregateGlobalSinkState : public GlobalSinkState {
|
180
180
|
public:
|
181
|
-
|
181
|
+
HashAggregateGlobalSinkState(const PhysicalHashAggregate &op, ClientContext &context) {
|
182
182
|
grouping_states.reserve(op.groupings.size());
|
183
183
|
for (idx_t i = 0; i < op.groupings.size(); i++) {
|
184
184
|
auto &grouping = op.groupings[i];
|
@@ -204,9 +204,9 @@ public:
|
|
204
204
|
bool finished = false;
|
205
205
|
};
|
206
206
|
|
207
|
-
class
|
207
|
+
class HashAggregateLocalSinkState : public LocalSinkState {
|
208
208
|
public:
|
209
|
-
|
209
|
+
HashAggregateLocalSinkState(const PhysicalHashAggregate &op, ExecutionContext &context) {
|
210
210
|
|
211
211
|
auto &payload_types = op.grouped_aggregate_data.payload_types;
|
212
212
|
if (!payload_types.empty()) {
|
@@ -234,28 +234,30 @@ public:
|
|
234
234
|
};
|
235
235
|
|
236
236
|
void PhysicalHashAggregate::SetMultiScan(GlobalSinkState &state) {
|
237
|
-
auto &gstate = state.Cast<
|
237
|
+
auto &gstate = state.Cast<HashAggregateGlobalSinkState>();
|
238
238
|
for (auto &grouping_state : gstate.grouping_states) {
|
239
|
-
|
240
|
-
RadixPartitionedHashTable::SetMultiScan(*radix_state);
|
239
|
+
RadixPartitionedHashTable::SetMultiScan(*grouping_state.table_state);
|
241
240
|
if (!grouping_state.distinct_state) {
|
242
241
|
continue;
|
243
242
|
}
|
244
243
|
}
|
245
244
|
}
|
246
245
|
|
246
|
+
//===--------------------------------------------------------------------===//
|
247
|
+
// Sink
|
248
|
+
//===--------------------------------------------------------------------===//
|
247
249
|
unique_ptr<GlobalSinkState> PhysicalHashAggregate::GetGlobalSinkState(ClientContext &context) const {
|
248
|
-
return make_uniq<
|
250
|
+
return make_uniq<HashAggregateGlobalSinkState>(*this, context);
|
249
251
|
}
|
250
252
|
|
251
253
|
unique_ptr<LocalSinkState> PhysicalHashAggregate::GetLocalSinkState(ExecutionContext &context) const {
|
252
|
-
return make_uniq<
|
254
|
+
return make_uniq<HashAggregateLocalSinkState>(*this, context);
|
253
255
|
}
|
254
256
|
|
255
257
|
void PhysicalHashAggregate::SinkDistinctGrouping(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
|
256
258
|
idx_t grouping_idx) const {
|
257
|
-
auto &sink = input.local_state.Cast<
|
258
|
-
auto &global_sink = input.global_state.Cast<
|
259
|
+
auto &sink = input.local_state.Cast<HashAggregateLocalSinkState>();
|
260
|
+
auto &global_sink = input.global_state.Cast<HashAggregateGlobalSinkState>();
|
259
261
|
|
260
262
|
auto &grouping_gstate = global_sink.grouping_states[grouping_idx];
|
261
263
|
auto &grouping_lstate = sink.grouping_states[grouping_idx];
|
@@ -341,8 +343,8 @@ void PhysicalHashAggregate::SinkDistinct(ExecutionContext &context, DataChunk &c
|
|
341
343
|
|
342
344
|
SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk &chunk,
|
343
345
|
OperatorSinkInput &input) const {
|
344
|
-
auto &llstate = input.local_state.Cast<
|
345
|
-
auto &gstate = input.global_state.Cast<
|
346
|
+
auto &llstate = input.local_state.Cast<HashAggregateLocalSinkState>();
|
347
|
+
auto &gstate = input.global_state.Cast<HashAggregateGlobalSinkState>();
|
346
348
|
|
347
349
|
if (distinct_collection_info) {
|
348
350
|
SinkDistinct(context, chunk, input);
|
@@ -396,10 +398,13 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
|
|
396
398
|
return SinkResultType::NEED_MORE_INPUT;
|
397
399
|
}
|
398
400
|
|
401
|
+
//===--------------------------------------------------------------------===//
|
402
|
+
// Combine
|
403
|
+
//===--------------------------------------------------------------------===//
|
399
404
|
void PhysicalHashAggregate::CombineDistinct(ExecutionContext &context, OperatorSinkCombineInput &input) const {
|
400
405
|
|
401
|
-
auto &global_sink = input.global_state.Cast<
|
402
|
-
auto &sink = input.local_state.Cast<
|
406
|
+
auto &global_sink = input.global_state.Cast<HashAggregateGlobalSinkState>();
|
407
|
+
auto &sink = input.local_state.Cast<HashAggregateLocalSinkState>();
|
403
408
|
|
404
409
|
if (!distinct_collection_info) {
|
405
410
|
return;
|
@@ -426,8 +431,8 @@ void PhysicalHashAggregate::CombineDistinct(ExecutionContext &context, OperatorS
|
|
426
431
|
}
|
427
432
|
|
428
433
|
SinkCombineResultType PhysicalHashAggregate::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
|
429
|
-
auto &gstate = input.global_state.Cast<
|
430
|
-
auto &llstate = input.local_state.Cast<
|
434
|
+
auto &gstate = input.global_state.Cast<HashAggregateGlobalSinkState>();
|
435
|
+
auto &llstate = input.local_state.Cast<HashAggregateLocalSinkState>();
|
431
436
|
|
432
437
|
OperatorSinkCombineInput combine_distinct_input {gstate, llstate, input.interrupt_state};
|
433
438
|
CombineDistinct(context, combine_distinct_input);
|
@@ -447,321 +452,267 @@ SinkCombineResultType PhysicalHashAggregate::Combine(ExecutionContext &context,
|
|
447
452
|
return SinkCombineResultType::FINISHED;
|
448
453
|
}
|
449
454
|
|
450
|
-
|
451
|
-
|
452
|
-
|
455
|
+
//===--------------------------------------------------------------------===//
|
456
|
+
// Finalize
|
457
|
+
//===--------------------------------------------------------------------===//
|
458
|
+
class HashAggregateFinalizeEvent : public BasePipelineEvent {
|
453
459
|
public:
|
454
|
-
|
455
|
-
|
460
|
+
//! "Regular" Finalize Event that is scheduled after combining the thread-local distinct HTs
|
461
|
+
HashAggregateFinalizeEvent(ClientContext &context, Pipeline *pipeline_p, const PhysicalHashAggregate &op_p,
|
462
|
+
HashAggregateGlobalSinkState &gstate_p)
|
463
|
+
: BasePipelineEvent(*pipeline_p), context(context), op(op_p), gstate(gstate_p) {
|
456
464
|
}
|
457
465
|
|
458
|
-
const PhysicalHashAggregate &op;
|
459
|
-
HashAggregateGlobalState &gstate;
|
460
|
-
|
461
466
|
public:
|
462
|
-
void Schedule() override
|
463
|
-
vector<shared_ptr<Task>> tasks;
|
464
|
-
for (idx_t i = 0; i < op.groupings.size(); i++) {
|
465
|
-
auto &grouping_gstate = gstate.grouping_states[i];
|
467
|
+
void Schedule() override;
|
466
468
|
|
467
|
-
|
468
|
-
|
469
|
-
table.ScheduleTasks(pipeline->executor, shared_from_this(), *grouping_gstate.table_state, tasks);
|
470
|
-
}
|
471
|
-
D_ASSERT(!tasks.empty());
|
472
|
-
SetTasks(std::move(tasks));
|
473
|
-
}
|
474
|
-
};
|
469
|
+
private:
|
470
|
+
ClientContext &context;
|
475
471
|
|
476
|
-
|
472
|
+
const PhysicalHashAggregate &op;
|
473
|
+
HashAggregateGlobalSinkState &gstate;
|
474
|
+
};
|
477
475
|
|
478
476
|
class HashAggregateFinalizeTask : public ExecutorTask {
|
479
477
|
public:
|
480
|
-
HashAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p,
|
481
|
-
|
482
|
-
: ExecutorTask(pipeline.executor), pipeline(pipeline), event(std::move(event_p)),
|
483
|
-
|
478
|
+
HashAggregateFinalizeTask(ClientContext &context, Pipeline &pipeline, shared_ptr<Event> event_p,
|
479
|
+
const PhysicalHashAggregate &op, HashAggregateGlobalSinkState &state_p)
|
480
|
+
: ExecutorTask(pipeline.executor), context(context), pipeline(pipeline), event(std::move(event_p)), op(op),
|
481
|
+
gstate(state_p) {
|
484
482
|
}
|
485
483
|
|
486
|
-
|
487
|
-
|
488
|
-
D_ASSERT(!gstate.finished);
|
489
|
-
gstate.finished = true;
|
490
|
-
event->FinishTask();
|
491
|
-
return TaskExecutionResult::TASK_FINISHED;
|
492
|
-
}
|
484
|
+
public:
|
485
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
|
493
486
|
|
494
487
|
private:
|
488
|
+
ClientContext &context;
|
495
489
|
Pipeline &pipeline;
|
496
490
|
shared_ptr<Event> event;
|
497
|
-
|
498
|
-
ClientContext &context;
|
491
|
+
|
499
492
|
const PhysicalHashAggregate &op;
|
493
|
+
HashAggregateGlobalSinkState &gstate;
|
500
494
|
};
|
501
495
|
|
502
|
-
|
496
|
+
void HashAggregateFinalizeEvent::Schedule() {
|
497
|
+
vector<shared_ptr<Task>> tasks;
|
498
|
+
tasks.push_back(make_uniq<HashAggregateFinalizeTask>(context, *pipeline, shared_from_this(), op, gstate));
|
499
|
+
D_ASSERT(!tasks.empty());
|
500
|
+
SetTasks(std::move(tasks));
|
501
|
+
}
|
502
|
+
|
503
|
+
TaskExecutionResult HashAggregateFinalizeTask::ExecuteTask(TaskExecutionMode mode) {
|
504
|
+
op.FinalizeInternal(pipeline, *event, context, gstate, false);
|
505
|
+
D_ASSERT(!gstate.finished);
|
506
|
+
gstate.finished = true;
|
507
|
+
event->FinishTask();
|
508
|
+
return TaskExecutionResult::TASK_FINISHED;
|
509
|
+
}
|
510
|
+
|
511
|
+
class HashAggregateDistinctFinalizeEvent : public BasePipelineEvent {
|
503
512
|
public:
|
504
|
-
|
505
|
-
|
506
|
-
|
513
|
+
//! Distinct Finalize Event that is scheduled if we have distinct aggregates
|
514
|
+
HashAggregateDistinctFinalizeEvent(ClientContext &context, Pipeline &pipeline_p, const PhysicalHashAggregate &op_p,
|
515
|
+
HashAggregateGlobalSinkState &gstate_p)
|
516
|
+
: BasePipelineEvent(pipeline_p), context(context), op(op_p), gstate(gstate_p) {
|
507
517
|
}
|
508
518
|
|
509
|
-
|
510
|
-
|
519
|
+
public:
|
520
|
+
void Schedule() override;
|
521
|
+
void FinishEvent() override;
|
522
|
+
|
523
|
+
private:
|
524
|
+
void CreateGlobalSources();
|
525
|
+
|
526
|
+
private:
|
511
527
|
ClientContext &context;
|
512
528
|
|
529
|
+
const PhysicalHashAggregate &op;
|
530
|
+
HashAggregateGlobalSinkState &gstate;
|
531
|
+
|
513
532
|
public:
|
514
|
-
|
515
|
-
|
516
|
-
tasks.push_back(make_uniq<HashAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
|
517
|
-
D_ASSERT(!tasks.empty());
|
518
|
-
SetTasks(std::move(tasks));
|
519
|
-
}
|
533
|
+
//! The GlobalSourceStates for all the radix tables of the distinct aggregates
|
534
|
+
vector<vector<unique_ptr<GlobalSourceState>>> global_source_states;
|
520
535
|
};
|
521
536
|
|
522
|
-
|
537
|
+
class HashAggregateDistinctFinalizeTask : public ExecutorTask {
|
538
|
+
public:
|
539
|
+
HashAggregateDistinctFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, const PhysicalHashAggregate &op,
|
540
|
+
HashAggregateGlobalSinkState &state_p)
|
541
|
+
: ExecutorTask(pipeline.executor), pipeline(pipeline), event(std::move(event_p)), op(op), gstate(state_p) {
|
542
|
+
}
|
523
543
|
|
524
|
-
class HashDistinctAggregateFinalizeTask : public ExecutorTask {
|
525
544
|
public:
|
526
|
-
|
527
|
-
ClientContext &context, const PhysicalHashAggregate &op,
|
528
|
-
vector<vector<unique_ptr<GlobalSourceState>>> &global_sources_p)
|
529
|
-
: ExecutorTask(pipeline.executor), pipeline(pipeline), event(std::move(event_p)), gstate(state_p),
|
530
|
-
context(context), op(op), global_sources(global_sources_p) {
|
531
|
-
}
|
532
|
-
|
533
|
-
void AggregateDistinctGrouping(DistinctAggregateCollectionInfo &info,
|
534
|
-
const HashAggregateGroupingData &grouping_data,
|
535
|
-
HashAggregateGroupingGlobalState &grouping_state, idx_t grouping_idx) {
|
536
|
-
auto &aggregates = info.aggregates;
|
537
|
-
auto &data = *grouping_data.distinct_data;
|
538
|
-
auto &state = *grouping_state.distinct_state;
|
539
|
-
auto &table_state = *grouping_state.table_state;
|
540
|
-
|
541
|
-
ThreadContext temp_thread_context(context);
|
542
|
-
ExecutionContext temp_exec_context(context, temp_thread_context, &pipeline);
|
543
|
-
|
544
|
-
auto temp_local_state = grouping_data.table_data.GetLocalSinkState(temp_exec_context);
|
545
|
-
|
546
|
-
// Create a chunk that mimics the 'input' chunk in Sink, for storing the group vectors
|
547
|
-
DataChunk group_chunk;
|
548
|
-
if (!op.input_group_types.empty()) {
|
549
|
-
group_chunk.Initialize(context, op.input_group_types);
|
550
|
-
}
|
545
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
|
551
546
|
|
552
|
-
|
553
|
-
|
547
|
+
private:
|
548
|
+
void AggregateDistinctGrouping(const idx_t grouping_idx);
|
554
549
|
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
}
|
550
|
+
private:
|
551
|
+
Pipeline &pipeline;
|
552
|
+
shared_ptr<Event> event;
|
559
553
|
|
560
|
-
|
561
|
-
|
554
|
+
const PhysicalHashAggregate &op;
|
555
|
+
HashAggregateGlobalSinkState &gstate;
|
556
|
+
};
|
557
|
+
|
558
|
+
void HashAggregateDistinctFinalizeEvent::Schedule() {
|
559
|
+
CreateGlobalSources();
|
560
|
+
|
561
|
+
const idx_t n_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
562
|
+
vector<shared_ptr<Task>> tasks;
|
563
|
+
for (idx_t i = 0; i < n_threads; i++) {
|
564
|
+
tasks.push_back(make_uniq<HashAggregateDistinctFinalizeTask>(*pipeline, shared_from_this(), op, gstate));
|
565
|
+
}
|
566
|
+
SetTasks(std::move(tasks));
|
567
|
+
}
|
562
568
|
|
563
|
-
|
564
|
-
|
569
|
+
void HashAggregateDistinctFinalizeEvent::CreateGlobalSources() {
|
570
|
+
auto &aggregates = op.grouped_aggregate_data.aggregates;
|
571
|
+
global_source_states.reserve(op.groupings.size());
|
572
|
+
for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
|
573
|
+
auto &grouping = op.groupings[grouping_idx];
|
574
|
+
auto &distinct_data = *grouping.distinct_data;
|
565
575
|
|
566
|
-
|
567
|
-
|
568
|
-
|
576
|
+
vector<unique_ptr<GlobalSourceState>> aggregate_sources;
|
577
|
+
aggregate_sources.reserve(aggregates.size());
|
578
|
+
for (idx_t agg_idx = 0; agg_idx < aggregates.size(); agg_idx++) {
|
579
|
+
auto &aggregate = aggregates[agg_idx];
|
580
|
+
auto &aggr = aggregate->Cast<BoundAggregateExpression>();
|
569
581
|
|
570
|
-
|
571
|
-
|
582
|
+
if (!aggr.IsDistinct()) {
|
583
|
+
aggregate_sources.push_back(nullptr);
|
572
584
|
continue;
|
573
585
|
}
|
574
|
-
D_ASSERT(
|
575
|
-
|
576
|
-
auto
|
577
|
-
|
578
|
-
|
579
|
-
DataChunk output_chunk;
|
580
|
-
output_chunk.Initialize(context, state.distinct_output_chunks[table_idx]->GetTypes());
|
581
|
-
|
582
|
-
auto &global_source = global_sources[grouping_idx][i];
|
583
|
-
auto local_source = radix_table_p->GetLocalSourceState(temp_exec_context);
|
584
|
-
|
585
|
-
// Fetch all the data from the aggregate ht, and Sink it into the main ht
|
586
|
-
while (true) {
|
587
|
-
output_chunk.Reset();
|
588
|
-
group_chunk.Reset();
|
589
|
-
aggregate_input_chunk.Reset();
|
590
|
-
|
591
|
-
InterruptState interrupt_state;
|
592
|
-
OperatorSourceInput source_input {*global_source, *local_source, interrupt_state};
|
593
|
-
auto res = radix_table_p->GetData(temp_exec_context, output_chunk, *state.radix_states[table_idx],
|
594
|
-
source_input);
|
595
|
-
|
596
|
-
if (res == SourceResultType::FINISHED) {
|
597
|
-
D_ASSERT(output_chunk.size() == 0);
|
598
|
-
break;
|
599
|
-
} else if (res == SourceResultType::BLOCKED) {
|
600
|
-
throw InternalException(
|
601
|
-
"Unexpected interrupt from radix table GetData in HashDistinctAggregateFinalizeTask");
|
602
|
-
}
|
603
|
-
|
604
|
-
auto &grouped_aggregate_data = *data.grouped_aggregate_data[table_idx];
|
605
|
-
|
606
|
-
for (idx_t group_idx = 0; group_idx < group_by_size; group_idx++) {
|
607
|
-
auto &group = grouped_aggregate_data.groups[group_idx];
|
608
|
-
auto &bound_ref_expr = group->Cast<BoundReferenceExpression>();
|
609
|
-
group_chunk.data[bound_ref_expr.index].Reference(output_chunk.data[group_idx]);
|
610
|
-
}
|
611
|
-
group_chunk.SetCardinality(output_chunk);
|
612
|
-
|
613
|
-
for (idx_t child_idx = 0; child_idx < grouped_aggregate_data.groups.size() - group_by_size;
|
614
|
-
child_idx++) {
|
615
|
-
aggregate_input_chunk.data[payload_idx + child_idx].Reference(
|
616
|
-
output_chunk.data[group_by_size + child_idx]);
|
617
|
-
}
|
618
|
-
aggregate_input_chunk.SetCardinality(output_chunk);
|
619
|
-
|
620
|
-
// Sink it into the main ht
|
621
|
-
OperatorSinkInput sink_input {table_state, *temp_local_state, interrupt_state};
|
622
|
-
grouping_data.table_data.Sink(temp_exec_context, group_chunk, sink_input, aggregate_input_chunk, {i});
|
623
|
-
}
|
586
|
+
D_ASSERT(distinct_data.info.table_map.count(agg_idx));
|
587
|
+
|
588
|
+
auto table_idx = distinct_data.info.table_map.at(agg_idx);
|
589
|
+
auto &radix_table_p = distinct_data.radix_tables[table_idx];
|
590
|
+
aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
|
624
591
|
}
|
625
|
-
|
592
|
+
global_source_states.push_back(std::move(aggregate_sources));
|
626
593
|
}
|
594
|
+
}
|
627
595
|
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
return TaskExecutionResult::TASK_FINISHED;
|
596
|
+
void HashAggregateDistinctFinalizeEvent::FinishEvent() {
|
597
|
+
// Now that everything is added to the main ht, we can actually finalize
|
598
|
+
auto new_event = make_shared<HashAggregateFinalizeEvent>(context, pipeline.get(), op, gstate);
|
599
|
+
this->InsertEvent(std::move(new_event));
|
600
|
+
}
|
601
|
+
|
602
|
+
TaskExecutionResult HashAggregateDistinctFinalizeTask::ExecuteTask(TaskExecutionMode mode) {
|
603
|
+
for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
|
604
|
+
AggregateDistinctGrouping(grouping_idx);
|
638
605
|
}
|
606
|
+
event->FinishTask();
|
607
|
+
return TaskExecutionResult::TASK_FINISHED;
|
608
|
+
}
|
639
609
|
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
HashAggregateGlobalState &gstate;
|
644
|
-
ClientContext &context;
|
645
|
-
const PhysicalHashAggregate &op;
|
646
|
-
vector<vector<unique_ptr<GlobalSourceState>>> &global_sources;
|
647
|
-
};
|
610
|
+
void HashAggregateDistinctFinalizeTask::AggregateDistinctGrouping(const idx_t grouping_idx) {
|
611
|
+
D_ASSERT(op.distinct_collection_info);
|
612
|
+
auto &info = *op.distinct_collection_info;
|
648
613
|
|
649
|
-
|
614
|
+
auto &grouping_data = op.groupings[grouping_idx];
|
615
|
+
auto &grouping_state = gstate.grouping_states[grouping_idx];
|
616
|
+
D_ASSERT(grouping_state.distinct_state);
|
617
|
+
auto &distinct_state = *grouping_state.distinct_state;
|
618
|
+
auto &distinct_data = *grouping_data.distinct_data;
|
650
619
|
|
651
|
-
|
652
|
-
class HashDistinctAggregateFinalizeEvent : public BasePipelineEvent {
|
653
|
-
public:
|
654
|
-
HashDistinctAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
|
655
|
-
Pipeline &pipeline_p, ClientContext &context)
|
656
|
-
: BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
|
657
|
-
}
|
658
|
-
const PhysicalHashAggregate &op;
|
659
|
-
HashAggregateGlobalState &gstate;
|
660
|
-
ClientContext &context;
|
661
|
-
//! The GlobalSourceStates for all the radix tables of the distinct aggregates
|
662
|
-
vector<vector<unique_ptr<GlobalSourceState>>> global_sources;
|
620
|
+
auto &aggregates = info.aggregates;
|
663
621
|
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
622
|
+
// Thread-local contexts
|
623
|
+
ThreadContext thread_context(executor.context);
|
624
|
+
ExecutionContext execution_context(executor.context, thread_context, &pipeline);
|
625
|
+
|
626
|
+
// Sink state to sink into global HTs
|
627
|
+
InterruptState interrupt_state;
|
628
|
+
auto &global_sink_state = *grouping_state.table_state;
|
629
|
+
auto local_sink_state = grouping_data.table_data.GetLocalSinkState(execution_context);
|
630
|
+
OperatorSinkInput sink_input {global_sink_state, *local_sink_state, interrupt_state};
|
631
|
+
|
632
|
+
// Create a chunk that mimics the 'input' chunk in Sink, for storing the group vectors
|
633
|
+
DataChunk group_chunk;
|
634
|
+
if (!op.input_group_types.empty()) {
|
635
|
+
group_chunk.Initialize(executor.context, op.input_group_types);
|
678
636
|
}
|
679
637
|
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
638
|
+
auto &groups = op.grouped_aggregate_data.groups;
|
639
|
+
const idx_t group_by_size = groups.size();
|
640
|
+
|
641
|
+
DataChunk aggregate_input_chunk;
|
642
|
+
if (!gstate.payload_types.empty()) {
|
643
|
+
aggregate_input_chunk.Initialize(executor.context, gstate.payload_types);
|
684
644
|
}
|
685
645
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
if (!aggr.IsDistinct()) {
|
702
|
-
aggregate_sources.push_back(nullptr);
|
703
|
-
continue;
|
704
|
-
}
|
705
|
-
|
706
|
-
D_ASSERT(data.info.table_map.count(i));
|
707
|
-
auto table_idx = data.info.table_map.at(i);
|
708
|
-
auto &radix_table_p = data.radix_tables[table_idx];
|
709
|
-
aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
|
710
|
-
}
|
711
|
-
grouping_sources.push_back(std::move(aggregate_sources));
|
646
|
+
auto &finalize_event = event->Cast<HashAggregateDistinctFinalizeEvent>();
|
647
|
+
|
648
|
+
idx_t payload_idx;
|
649
|
+
idx_t next_payload_idx = 0;
|
650
|
+
for (idx_t agg_idx = 0; agg_idx < op.grouped_aggregate_data.aggregates.size(); agg_idx++) {
|
651
|
+
auto &aggregate = aggregates[agg_idx]->Cast<BoundAggregateExpression>();
|
652
|
+
|
653
|
+
// Forward the payload idx
|
654
|
+
payload_idx = next_payload_idx;
|
655
|
+
next_payload_idx = payload_idx + aggregate.children.size();
|
656
|
+
|
657
|
+
// If aggregate is not distinct, skip it
|
658
|
+
if (!distinct_data.IsDistinct(agg_idx)) {
|
659
|
+
continue;
|
712
660
|
}
|
713
|
-
return grouping_sources;
|
714
|
-
}
|
715
|
-
};
|
716
661
|
|
717
|
-
|
662
|
+
D_ASSERT(distinct_data.info.table_map.count(agg_idx));
|
663
|
+
const auto &table_idx = distinct_data.info.table_map.at(agg_idx);
|
664
|
+
auto &radix_table = distinct_data.radix_tables[table_idx];
|
718
665
|
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
: BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
|
724
|
-
}
|
666
|
+
auto &sink = *distinct_state.radix_states[table_idx];
|
667
|
+
auto local_source = radix_table->GetLocalSourceState(execution_context);
|
668
|
+
OperatorSourceInput source_input {*finalize_event.global_source_states[grouping_idx][agg_idx], *local_source,
|
669
|
+
interrupt_state};
|
725
670
|
|
726
|
-
|
727
|
-
|
728
|
-
|
671
|
+
// Create a duplicate of the output_chunk, because of multi-threading we cant alter the original
|
672
|
+
DataChunk output_chunk;
|
673
|
+
output_chunk.Initialize(executor.context, distinct_state.distinct_output_chunks[table_idx]->GetTypes());
|
674
|
+
|
675
|
+
// Fetch all the data from the aggregate ht, and Sink it into the main ht
|
676
|
+
while (true) {
|
677
|
+
output_chunk.Reset();
|
678
|
+
group_chunk.Reset();
|
679
|
+
aggregate_input_chunk.Reset();
|
680
|
+
|
681
|
+
auto res = radix_table->GetData(execution_context, output_chunk, sink, source_input);
|
682
|
+
if (res == SourceResultType::FINISHED) {
|
683
|
+
D_ASSERT(output_chunk.size() == 0);
|
684
|
+
break;
|
685
|
+
} else if (res == SourceResultType::BLOCKED) {
|
686
|
+
throw InternalException(
|
687
|
+
"Unexpected interrupt from radix table GetData in HashAggregateDistinctFinalizeTask");
|
688
|
+
}
|
729
689
|
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
auto &distinct_data = *grouping.distinct_data;
|
736
|
-
auto &distinct_state = *gstate.grouping_states[i].distinct_state;
|
737
|
-
for (idx_t table_idx = 0; table_idx < distinct_data.radix_tables.size(); table_idx++) {
|
738
|
-
if (!distinct_data.radix_tables[table_idx]) {
|
739
|
-
continue;
|
740
|
-
}
|
741
|
-
distinct_data.radix_tables[table_idx]->ScheduleTasks(pipeline->executor, shared_from_this(),
|
742
|
-
*distinct_state.radix_states[table_idx], tasks);
|
690
|
+
auto &grouped_aggregate_data = *distinct_data.grouped_aggregate_data[table_idx];
|
691
|
+
for (idx_t group_idx = 0; group_idx < group_by_size; group_idx++) {
|
692
|
+
auto &group = grouped_aggregate_data.groups[group_idx];
|
693
|
+
auto &bound_ref_expr = group->Cast<BoundReferenceExpression>();
|
694
|
+
group_chunk.data[bound_ref_expr.index].Reference(output_chunk.data[group_idx]);
|
743
695
|
}
|
744
|
-
|
696
|
+
group_chunk.SetCardinality(output_chunk);
|
745
697
|
|
746
|
-
|
747
|
-
|
748
|
-
|
698
|
+
for (idx_t child_idx = 0; child_idx < grouped_aggregate_data.groups.size() - group_by_size; child_idx++) {
|
699
|
+
aggregate_input_chunk.data[payload_idx + child_idx].Reference(
|
700
|
+
output_chunk.data[group_by_size + child_idx]);
|
701
|
+
}
|
702
|
+
aggregate_input_chunk.SetCardinality(output_chunk);
|
749
703
|
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
this->InsertEvent(std::move(new_event));
|
704
|
+
// Sink it into the main ht
|
705
|
+
grouping_data.table_data.Sink(execution_context, group_chunk, sink_input, aggregate_input_chunk, {agg_idx});
|
706
|
+
}
|
754
707
|
}
|
755
|
-
|
756
|
-
|
757
|
-
//! FINALIZE
|
708
|
+
grouping_data.table_data.Combine(execution_context, global_sink_state, *local_sink_state);
|
709
|
+
}
|
758
710
|
|
759
711
|
SinkFinalizeType PhysicalHashAggregate::FinalizeDistinct(Pipeline &pipeline, Event &event, ClientContext &context,
|
760
712
|
GlobalSinkState &gstate_p) const {
|
761
|
-
auto &gstate = gstate_p.Cast<
|
713
|
+
auto &gstate = gstate_p.Cast<HashAggregateGlobalSinkState>();
|
762
714
|
D_ASSERT(distinct_collection_info);
|
763
715
|
|
764
|
-
bool any_partitioned = false;
|
765
716
|
for (idx_t i = 0; i < groupings.size(); i++) {
|
766
717
|
auto &grouping = groupings[i];
|
767
718
|
auto &distinct_data = *grouping.distinct_data;
|
@@ -773,28 +724,17 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeDistinct(Pipeline &pipeline, Eve
|
|
773
724
|
}
|
774
725
|
auto &radix_table = distinct_data.radix_tables[table_idx];
|
775
726
|
auto &radix_state = *distinct_state.radix_states[table_idx];
|
776
|
-
|
777
|
-
if (partitioned) {
|
778
|
-
any_partitioned = true;
|
779
|
-
}
|
727
|
+
radix_table->Finalize(context, radix_state);
|
780
728
|
}
|
781
729
|
}
|
782
|
-
|
783
|
-
|
784
|
-
auto new_event = make_shared<HashDistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
|
785
|
-
event.InsertEvent(std::move(new_event));
|
786
|
-
} else {
|
787
|
-
// Hashtables aren't partitioned, they dont need to be joined first
|
788
|
-
// so we can already compute the aggregate
|
789
|
-
auto new_event = make_shared<HashDistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
|
790
|
-
event.InsertEvent(std::move(new_event));
|
791
|
-
}
|
730
|
+
auto new_event = make_shared<HashAggregateDistinctFinalizeEvent>(context, pipeline, *this, gstate);
|
731
|
+
event.InsertEvent(std::move(new_event));
|
792
732
|
return SinkFinalizeType::READY;
|
793
733
|
}
|
794
734
|
|
795
735
|
SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Event &event, ClientContext &context,
|
796
736
|
GlobalSinkState &gstate_p, bool check_distinct) const {
|
797
|
-
auto &gstate = gstate_p.Cast<
|
737
|
+
auto &gstate = gstate_p.Cast<HashAggregateGlobalSinkState>();
|
798
738
|
|
799
739
|
if (check_distinct && distinct_collection_info) {
|
800
740
|
// There are distinct aggregates
|
@@ -803,19 +743,10 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Eve
|
|
803
743
|
return FinalizeDistinct(pipeline, event, context, gstate_p);
|
804
744
|
}
|
805
745
|
|
806
|
-
bool any_partitioned = false;
|
807
746
|
for (idx_t i = 0; i < groupings.size(); i++) {
|
808
747
|
auto &grouping = groupings[i];
|
809
748
|
auto &grouping_gstate = gstate.grouping_states[i];
|
810
|
-
|
811
|
-
bool is_partitioned = grouping.table_data.Finalize(context, *grouping_gstate.table_state);
|
812
|
-
if (is_partitioned) {
|
813
|
-
any_partitioned = true;
|
814
|
-
}
|
815
|
-
}
|
816
|
-
if (any_partitioned) {
|
817
|
-
auto new_event = make_shared<HashAggregateMergeEvent>(*this, gstate, &pipeline);
|
818
|
-
event.InsertEvent(std::move(new_event));
|
749
|
+
grouping.table_data.Finalize(context, *grouping_gstate.table_state);
|
819
750
|
}
|
820
751
|
return SinkFinalizeType::READY;
|
821
752
|
}
|
@@ -828,10 +759,9 @@ SinkFinalizeType PhysicalHashAggregate::Finalize(Pipeline &pipeline, Event &even
|
|
828
759
|
//===--------------------------------------------------------------------===//
|
829
760
|
// Source
|
830
761
|
//===--------------------------------------------------------------------===//
|
831
|
-
class
|
762
|
+
class HashAggregateGlobalSourceState : public GlobalSourceState {
|
832
763
|
public:
|
833
|
-
|
834
|
-
: op(op), state_index(0) {
|
764
|
+
HashAggregateGlobalSourceState(ClientContext &context, const PhysicalHashAggregate &op) : op(op), state_index(0) {
|
835
765
|
for (auto &grouping : op.groupings) {
|
836
766
|
auto &rt = grouping.table_data;
|
837
767
|
radix_states.push_back(rt.GetGlobalSourceState(context));
|
@@ -851,24 +781,24 @@ public:
|
|
851
781
|
return 1;
|
852
782
|
}
|
853
783
|
|
854
|
-
auto &ht_state = op.sink_state->Cast<
|
784
|
+
auto &ht_state = op.sink_state->Cast<HashAggregateGlobalSinkState>();
|
855
785
|
idx_t count = 0;
|
856
786
|
for (size_t sidx = 0; sidx < op.groupings.size(); ++sidx) {
|
857
787
|
auto &grouping = op.groupings[sidx];
|
858
788
|
auto &grouping_gstate = ht_state.grouping_states[sidx];
|
859
|
-
count += grouping.table_data.
|
789
|
+
count += grouping.table_data.Count(*grouping_gstate.table_state);
|
860
790
|
}
|
861
791
|
return MaxValue<idx_t>(1, count / STANDARD_VECTOR_SIZE);
|
862
792
|
}
|
863
793
|
};
|
864
794
|
|
865
795
|
unique_ptr<GlobalSourceState> PhysicalHashAggregate::GetGlobalSourceState(ClientContext &context) const {
|
866
|
-
return make_uniq<
|
796
|
+
return make_uniq<HashAggregateGlobalSourceState>(context, *this);
|
867
797
|
}
|
868
798
|
|
869
|
-
class
|
799
|
+
class HashAggregateLocalSourceState : public LocalSourceState {
|
870
800
|
public:
|
871
|
-
explicit
|
801
|
+
explicit HashAggregateLocalSourceState(ExecutionContext &context, const PhysicalHashAggregate &op) {
|
872
802
|
for (auto &grouping : op.groupings) {
|
873
803
|
auto &rt = grouping.table_data;
|
874
804
|
radix_states.push_back(rt.GetLocalSourceState(context));
|
@@ -880,14 +810,14 @@ public:
|
|
880
810
|
|
881
811
|
unique_ptr<LocalSourceState> PhysicalHashAggregate::GetLocalSourceState(ExecutionContext &context,
|
882
812
|
GlobalSourceState &gstate) const {
|
883
|
-
return make_uniq<
|
813
|
+
return make_uniq<HashAggregateLocalSourceState>(context, *this);
|
884
814
|
}
|
885
815
|
|
886
816
|
SourceResultType PhysicalHashAggregate::GetData(ExecutionContext &context, DataChunk &chunk,
|
887
817
|
OperatorSourceInput &input) const {
|
888
|
-
auto &sink_gstate = sink_state->Cast<
|
889
|
-
auto &gstate = input.global_state.Cast<
|
890
|
-
auto &lstate = input.local_state.Cast<
|
818
|
+
auto &sink_gstate = sink_state->Cast<HashAggregateGlobalSinkState>();
|
819
|
+
auto &gstate = input.global_state.Cast<HashAggregateGlobalSourceState>();
|
820
|
+
auto &lstate = input.local_state.Cast<HashAggregateLocalSourceState>();
|
891
821
|
while (true) {
|
892
822
|
idx_t radix_idx = gstate.state_index;
|
893
823
|
if (radix_idx >= groupings.size()) {
|