duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  6. package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
  7. package/src/duckdb/extension/json/json_scan.cpp +1 -1
  8. package/src/duckdb/extension/json/json_serializer.cpp +26 -69
  9. package/src/duckdb/src/common/enum_util.cpp +119 -7
  10. package/src/duckdb/src/common/extra_type_info.cpp +7 -3
  11. package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
  12. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  13. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
  14. package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
  15. package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
  16. package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
  17. package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
  18. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  19. package/src/duckdb/src/common/types/interval.cpp +3 -0
  20. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
  21. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  22. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  23. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
  24. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  26. package/src/duckdb/src/common/types/value.cpp +63 -42
  27. package/src/duckdb/src/common/types/vector.cpp +33 -67
  28. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
  30. package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
  31. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
  32. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
  33. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
  34. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  36. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  39. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
  40. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  41. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  42. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
  43. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  44. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  45. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  46. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  47. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  48. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  49. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  50. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
  51. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
  52. package/src/duckdb/src/execution/window_executor.cpp +6 -5
  53. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  54. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  55. package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
  56. package/src/duckdb/src/function/table/read_csv.cpp +150 -136
  57. package/src/duckdb/src/function/table/table_scan.cpp +0 -2
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
  60. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  61. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  62. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  64. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  65. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
  66. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
  67. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
  68. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  69. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
  70. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
  71. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  73. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
  74. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  76. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  77. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  78. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  80. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  81. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
  82. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  83. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  84. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
  85. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
  86. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
  88. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
  90. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
  91. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  92. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  93. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  94. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  95. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
  97. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  98. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  99. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  100. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
  101. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  102. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
  103. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
  104. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
  105. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
  106. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  108. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
  110. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  111. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  112. package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
  113. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  115. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  117. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  119. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
  120. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  121. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
  122. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
  123. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  124. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  125. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  126. package/src/duckdb/src/include/duckdb.h +12 -0
  127. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  128. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  129. package/src/duckdb/src/main/client_verify.cpp +1 -0
  130. package/src/duckdb/src/main/config.cpp +2 -2
  131. package/src/duckdb/src/main/connection.cpp +3 -3
  132. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  133. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  134. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
  135. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  136. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
  137. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
  138. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
  139. package/src/duckdb/src/planner/logical_operator.cpp +1 -1
  140. package/src/duckdb/src/planner/planner.cpp +1 -1
  141. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  142. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
  143. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
  144. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
  145. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
  146. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  147. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
  148. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
  149. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
  150. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
  151. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
  152. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
  153. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  154. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
  155. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  156. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  157. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  158. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  159. package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
  160. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  161. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  162. package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
  163. package/src/duckdb/src/storage/table/row_group.cpp +68 -1
  164. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  165. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  166. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  167. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  168. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  169. package/src/duckdb/ub_src_execution.cpp +0 -2
  170. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  171. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  172. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
  173. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  174. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  175. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  176. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  177. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  178. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  179. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  180. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -1,9 +1,12 @@
1
1
  #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
2
2
 
3
3
  #include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp"
4
+ #include "duckdb/common/atomic.hpp"
4
5
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
6
  #include "duckdb/execution/aggregate_hashtable.hpp"
7
+ #include "duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp"
6
8
  #include "duckdb/main/client_context.hpp"
9
+ #include "duckdb/parallel/base_pipeline_event.hpp"
7
10
  #include "duckdb/parallel/interrupt.hpp"
8
11
  #include "duckdb/parallel/pipeline.hpp"
9
12
  #include "duckdb/parallel/task_scheduler.hpp"
@@ -11,9 +14,6 @@
11
14
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
12
15
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
13
16
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
14
- #include "duckdb/parallel/base_pipeline_event.hpp"
15
- #include "duckdb/common/atomic.hpp"
16
- #include "duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp"
17
17
 
18
18
  namespace duckdb {
19
19
 
@@ -176,9 +176,9 @@ PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<Logi
176
176
  //===--------------------------------------------------------------------===//
177
177
  // Sink
178
178
  //===--------------------------------------------------------------------===//
179
- class HashAggregateGlobalState : public GlobalSinkState {
179
+ class HashAggregateGlobalSinkState : public GlobalSinkState {
180
180
  public:
181
- HashAggregateGlobalState(const PhysicalHashAggregate &op, ClientContext &context) {
181
+ HashAggregateGlobalSinkState(const PhysicalHashAggregate &op, ClientContext &context) {
182
182
  grouping_states.reserve(op.groupings.size());
183
183
  for (idx_t i = 0; i < op.groupings.size(); i++) {
184
184
  auto &grouping = op.groupings[i];
@@ -204,9 +204,9 @@ public:
204
204
  bool finished = false;
205
205
  };
206
206
 
207
- class HashAggregateLocalState : public LocalSinkState {
207
+ class HashAggregateLocalSinkState : public LocalSinkState {
208
208
  public:
209
- HashAggregateLocalState(const PhysicalHashAggregate &op, ExecutionContext &context) {
209
+ HashAggregateLocalSinkState(const PhysicalHashAggregate &op, ExecutionContext &context) {
210
210
 
211
211
  auto &payload_types = op.grouped_aggregate_data.payload_types;
212
212
  if (!payload_types.empty()) {
@@ -234,28 +234,30 @@ public:
234
234
  };
235
235
 
236
236
  void PhysicalHashAggregate::SetMultiScan(GlobalSinkState &state) {
237
- auto &gstate = state.Cast<HashAggregateGlobalState>();
237
+ auto &gstate = state.Cast<HashAggregateGlobalSinkState>();
238
238
  for (auto &grouping_state : gstate.grouping_states) {
239
- auto &radix_state = grouping_state.table_state;
240
- RadixPartitionedHashTable::SetMultiScan(*radix_state);
239
+ RadixPartitionedHashTable::SetMultiScan(*grouping_state.table_state);
241
240
  if (!grouping_state.distinct_state) {
242
241
  continue;
243
242
  }
244
243
  }
245
244
  }
246
245
 
246
+ //===--------------------------------------------------------------------===//
247
+ // Sink
248
+ //===--------------------------------------------------------------------===//
247
249
  unique_ptr<GlobalSinkState> PhysicalHashAggregate::GetGlobalSinkState(ClientContext &context) const {
248
- return make_uniq<HashAggregateGlobalState>(*this, context);
250
+ return make_uniq<HashAggregateGlobalSinkState>(*this, context);
249
251
  }
250
252
 
251
253
  unique_ptr<LocalSinkState> PhysicalHashAggregate::GetLocalSinkState(ExecutionContext &context) const {
252
- return make_uniq<HashAggregateLocalState>(*this, context);
254
+ return make_uniq<HashAggregateLocalSinkState>(*this, context);
253
255
  }
254
256
 
255
257
  void PhysicalHashAggregate::SinkDistinctGrouping(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
256
258
  idx_t grouping_idx) const {
257
- auto &sink = input.local_state.Cast<HashAggregateLocalState>();
258
- auto &global_sink = input.global_state.Cast<HashAggregateGlobalState>();
259
+ auto &sink = input.local_state.Cast<HashAggregateLocalSinkState>();
260
+ auto &global_sink = input.global_state.Cast<HashAggregateGlobalSinkState>();
259
261
 
260
262
  auto &grouping_gstate = global_sink.grouping_states[grouping_idx];
261
263
  auto &grouping_lstate = sink.grouping_states[grouping_idx];
@@ -341,8 +343,8 @@ void PhysicalHashAggregate::SinkDistinct(ExecutionContext &context, DataChunk &c
341
343
 
342
344
  SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk &chunk,
343
345
  OperatorSinkInput &input) const {
344
- auto &llstate = input.local_state.Cast<HashAggregateLocalState>();
345
- auto &gstate = input.global_state.Cast<HashAggregateGlobalState>();
346
+ auto &llstate = input.local_state.Cast<HashAggregateLocalSinkState>();
347
+ auto &gstate = input.global_state.Cast<HashAggregateGlobalSinkState>();
346
348
 
347
349
  if (distinct_collection_info) {
348
350
  SinkDistinct(context, chunk, input);
@@ -396,10 +398,13 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
396
398
  return SinkResultType::NEED_MORE_INPUT;
397
399
  }
398
400
 
401
+ //===--------------------------------------------------------------------===//
402
+ // Combine
403
+ //===--------------------------------------------------------------------===//
399
404
  void PhysicalHashAggregate::CombineDistinct(ExecutionContext &context, OperatorSinkCombineInput &input) const {
400
405
 
401
- auto &global_sink = input.global_state.Cast<HashAggregateGlobalState>();
402
- auto &sink = input.local_state.Cast<HashAggregateLocalState>();
406
+ auto &global_sink = input.global_state.Cast<HashAggregateGlobalSinkState>();
407
+ auto &sink = input.local_state.Cast<HashAggregateLocalSinkState>();
403
408
 
404
409
  if (!distinct_collection_info) {
405
410
  return;
@@ -426,8 +431,8 @@ void PhysicalHashAggregate::CombineDistinct(ExecutionContext &context, OperatorS
426
431
  }
427
432
 
428
433
  SinkCombineResultType PhysicalHashAggregate::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
429
- auto &gstate = input.global_state.Cast<HashAggregateGlobalState>();
430
- auto &llstate = input.local_state.Cast<HashAggregateLocalState>();
434
+ auto &gstate = input.global_state.Cast<HashAggregateGlobalSinkState>();
435
+ auto &llstate = input.local_state.Cast<HashAggregateLocalSinkState>();
431
436
 
432
437
  OperatorSinkCombineInput combine_distinct_input {gstate, llstate, input.interrupt_state};
433
438
  CombineDistinct(context, combine_distinct_input);
@@ -447,321 +452,267 @@ SinkCombineResultType PhysicalHashAggregate::Combine(ExecutionContext &context,
447
452
  return SinkCombineResultType::FINISHED;
448
453
  }
449
454
 
450
- //! REGULAR FINALIZE EVENT
451
-
452
- class HashAggregateMergeEvent : public BasePipelineEvent {
455
+ //===--------------------------------------------------------------------===//
456
+ // Finalize
457
+ //===--------------------------------------------------------------------===//
458
+ class HashAggregateFinalizeEvent : public BasePipelineEvent {
453
459
  public:
454
- HashAggregateMergeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p, Pipeline *pipeline_p)
455
- : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
460
+ //! "Regular" Finalize Event that is scheduled after combining the thread-local distinct HTs
461
+ HashAggregateFinalizeEvent(ClientContext &context, Pipeline *pipeline_p, const PhysicalHashAggregate &op_p,
462
+ HashAggregateGlobalSinkState &gstate_p)
463
+ : BasePipelineEvent(*pipeline_p), context(context), op(op_p), gstate(gstate_p) {
456
464
  }
457
465
 
458
- const PhysicalHashAggregate &op;
459
- HashAggregateGlobalState &gstate;
460
-
461
466
  public:
462
- void Schedule() override {
463
- vector<shared_ptr<Task>> tasks;
464
- for (idx_t i = 0; i < op.groupings.size(); i++) {
465
- auto &grouping_gstate = gstate.grouping_states[i];
467
+ void Schedule() override;
466
468
 
467
- auto &grouping = op.groupings[i];
468
- auto &table = grouping.table_data;
469
- table.ScheduleTasks(pipeline->executor, shared_from_this(), *grouping_gstate.table_state, tasks);
470
- }
471
- D_ASSERT(!tasks.empty());
472
- SetTasks(std::move(tasks));
473
- }
474
- };
469
+ private:
470
+ ClientContext &context;
475
471
 
476
- //! REGULAR FINALIZE FROM DISTINCT FINALIZE
472
+ const PhysicalHashAggregate &op;
473
+ HashAggregateGlobalSinkState &gstate;
474
+ };
477
475
 
478
476
  class HashAggregateFinalizeTask : public ExecutorTask {
479
477
  public:
480
- HashAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
481
- ClientContext &context, const PhysicalHashAggregate &op)
482
- : ExecutorTask(pipeline.executor), pipeline(pipeline), event(std::move(event_p)), gstate(state_p),
483
- context(context), op(op) {
478
+ HashAggregateFinalizeTask(ClientContext &context, Pipeline &pipeline, shared_ptr<Event> event_p,
479
+ const PhysicalHashAggregate &op, HashAggregateGlobalSinkState &state_p)
480
+ : ExecutorTask(pipeline.executor), context(context), pipeline(pipeline), event(std::move(event_p)), op(op),
481
+ gstate(state_p) {
484
482
  }
485
483
 
486
- TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
487
- op.FinalizeInternal(pipeline, *event, context, gstate, false);
488
- D_ASSERT(!gstate.finished);
489
- gstate.finished = true;
490
- event->FinishTask();
491
- return TaskExecutionResult::TASK_FINISHED;
492
- }
484
+ public:
485
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
493
486
 
494
487
  private:
488
+ ClientContext &context;
495
489
  Pipeline &pipeline;
496
490
  shared_ptr<Event> event;
497
- HashAggregateGlobalState &gstate;
498
- ClientContext &context;
491
+
499
492
  const PhysicalHashAggregate &op;
493
+ HashAggregateGlobalSinkState &gstate;
500
494
  };
501
495
 
502
- class HashAggregateFinalizeEvent : public BasePipelineEvent {
496
+ void HashAggregateFinalizeEvent::Schedule() {
497
+ vector<shared_ptr<Task>> tasks;
498
+ tasks.push_back(make_uniq<HashAggregateFinalizeTask>(context, *pipeline, shared_from_this(), op, gstate));
499
+ D_ASSERT(!tasks.empty());
500
+ SetTasks(std::move(tasks));
501
+ }
502
+
503
+ TaskExecutionResult HashAggregateFinalizeTask::ExecuteTask(TaskExecutionMode mode) {
504
+ op.FinalizeInternal(pipeline, *event, context, gstate, false);
505
+ D_ASSERT(!gstate.finished);
506
+ gstate.finished = true;
507
+ event->FinishTask();
508
+ return TaskExecutionResult::TASK_FINISHED;
509
+ }
510
+
511
+ class HashAggregateDistinctFinalizeEvent : public BasePipelineEvent {
503
512
  public:
504
- HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
505
- Pipeline *pipeline_p, ClientContext &context)
506
- : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p), context(context) {
513
+ //! Distinct Finalize Event that is scheduled if we have distinct aggregates
514
+ HashAggregateDistinctFinalizeEvent(ClientContext &context, Pipeline &pipeline_p, const PhysicalHashAggregate &op_p,
515
+ HashAggregateGlobalSinkState &gstate_p)
516
+ : BasePipelineEvent(pipeline_p), context(context), op(op_p), gstate(gstate_p) {
507
517
  }
508
518
 
509
- const PhysicalHashAggregate &op;
510
- HashAggregateGlobalState &gstate;
519
+ public:
520
+ void Schedule() override;
521
+ void FinishEvent() override;
522
+
523
+ private:
524
+ void CreateGlobalSources();
525
+
526
+ private:
511
527
  ClientContext &context;
512
528
 
529
+ const PhysicalHashAggregate &op;
530
+ HashAggregateGlobalSinkState &gstate;
531
+
513
532
  public:
514
- void Schedule() override {
515
- vector<shared_ptr<Task>> tasks;
516
- tasks.push_back(make_uniq<HashAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
517
- D_ASSERT(!tasks.empty());
518
- SetTasks(std::move(tasks));
519
- }
533
+ //! The GlobalSourceStates for all the radix tables of the distinct aggregates
534
+ vector<vector<unique_ptr<GlobalSourceState>>> global_source_states;
520
535
  };
521
536
 
522
- //! DISTINCT FINALIZE TASK
537
+ class HashAggregateDistinctFinalizeTask : public ExecutorTask {
538
+ public:
539
+ HashAggregateDistinctFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, const PhysicalHashAggregate &op,
540
+ HashAggregateGlobalSinkState &state_p)
541
+ : ExecutorTask(pipeline.executor), pipeline(pipeline), event(std::move(event_p)), op(op), gstate(state_p) {
542
+ }
523
543
 
524
- class HashDistinctAggregateFinalizeTask : public ExecutorTask {
525
544
  public:
526
- HashDistinctAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
527
- ClientContext &context, const PhysicalHashAggregate &op,
528
- vector<vector<unique_ptr<GlobalSourceState>>> &global_sources_p)
529
- : ExecutorTask(pipeline.executor), pipeline(pipeline), event(std::move(event_p)), gstate(state_p),
530
- context(context), op(op), global_sources(global_sources_p) {
531
- }
532
-
533
- void AggregateDistinctGrouping(DistinctAggregateCollectionInfo &info,
534
- const HashAggregateGroupingData &grouping_data,
535
- HashAggregateGroupingGlobalState &grouping_state, idx_t grouping_idx) {
536
- auto &aggregates = info.aggregates;
537
- auto &data = *grouping_data.distinct_data;
538
- auto &state = *grouping_state.distinct_state;
539
- auto &table_state = *grouping_state.table_state;
540
-
541
- ThreadContext temp_thread_context(context);
542
- ExecutionContext temp_exec_context(context, temp_thread_context, &pipeline);
543
-
544
- auto temp_local_state = grouping_data.table_data.GetLocalSinkState(temp_exec_context);
545
-
546
- // Create a chunk that mimics the 'input' chunk in Sink, for storing the group vectors
547
- DataChunk group_chunk;
548
- if (!op.input_group_types.empty()) {
549
- group_chunk.Initialize(context, op.input_group_types);
550
- }
545
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
551
546
 
552
- auto &groups = op.grouped_aggregate_data.groups;
553
- const idx_t group_by_size = groups.size();
547
+ private:
548
+ void AggregateDistinctGrouping(const idx_t grouping_idx);
554
549
 
555
- DataChunk aggregate_input_chunk;
556
- if (!gstate.payload_types.empty()) {
557
- aggregate_input_chunk.Initialize(context, gstate.payload_types);
558
- }
550
+ private:
551
+ Pipeline &pipeline;
552
+ shared_ptr<Event> event;
559
553
 
560
- idx_t payload_idx;
561
- idx_t next_payload_idx = 0;
554
+ const PhysicalHashAggregate &op;
555
+ HashAggregateGlobalSinkState &gstate;
556
+ };
557
+
558
+ void HashAggregateDistinctFinalizeEvent::Schedule() {
559
+ CreateGlobalSources();
560
+
561
+ const idx_t n_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
562
+ vector<shared_ptr<Task>> tasks;
563
+ for (idx_t i = 0; i < n_threads; i++) {
564
+ tasks.push_back(make_uniq<HashAggregateDistinctFinalizeTask>(*pipeline, shared_from_this(), op, gstate));
565
+ }
566
+ SetTasks(std::move(tasks));
567
+ }
562
568
 
563
- for (idx_t i = 0; i < op.grouped_aggregate_data.aggregates.size(); i++) {
564
- auto &aggregate = aggregates[i]->Cast<BoundAggregateExpression>();
569
+ void HashAggregateDistinctFinalizeEvent::CreateGlobalSources() {
570
+ auto &aggregates = op.grouped_aggregate_data.aggregates;
571
+ global_source_states.reserve(op.groupings.size());
572
+ for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
573
+ auto &grouping = op.groupings[grouping_idx];
574
+ auto &distinct_data = *grouping.distinct_data;
565
575
 
566
- // Forward the payload idx
567
- payload_idx = next_payload_idx;
568
- next_payload_idx = payload_idx + aggregate.children.size();
576
+ vector<unique_ptr<GlobalSourceState>> aggregate_sources;
577
+ aggregate_sources.reserve(aggregates.size());
578
+ for (idx_t agg_idx = 0; agg_idx < aggregates.size(); agg_idx++) {
579
+ auto &aggregate = aggregates[agg_idx];
580
+ auto &aggr = aggregate->Cast<BoundAggregateExpression>();
569
581
 
570
- // If aggregate is not distinct, skip it
571
- if (!data.IsDistinct(i)) {
582
+ if (!aggr.IsDistinct()) {
583
+ aggregate_sources.push_back(nullptr);
572
584
  continue;
573
585
  }
574
- D_ASSERT(data.info.table_map.count(i));
575
- auto table_idx = data.info.table_map.at(i);
576
- auto &radix_table_p = data.radix_tables[table_idx];
577
-
578
- // Create a duplicate of the output_chunk, because of multi-threading we cant alter the original
579
- DataChunk output_chunk;
580
- output_chunk.Initialize(context, state.distinct_output_chunks[table_idx]->GetTypes());
581
-
582
- auto &global_source = global_sources[grouping_idx][i];
583
- auto local_source = radix_table_p->GetLocalSourceState(temp_exec_context);
584
-
585
- // Fetch all the data from the aggregate ht, and Sink it into the main ht
586
- while (true) {
587
- output_chunk.Reset();
588
- group_chunk.Reset();
589
- aggregate_input_chunk.Reset();
590
-
591
- InterruptState interrupt_state;
592
- OperatorSourceInput source_input {*global_source, *local_source, interrupt_state};
593
- auto res = radix_table_p->GetData(temp_exec_context, output_chunk, *state.radix_states[table_idx],
594
- source_input);
595
-
596
- if (res == SourceResultType::FINISHED) {
597
- D_ASSERT(output_chunk.size() == 0);
598
- break;
599
- } else if (res == SourceResultType::BLOCKED) {
600
- throw InternalException(
601
- "Unexpected interrupt from radix table GetData in HashDistinctAggregateFinalizeTask");
602
- }
603
-
604
- auto &grouped_aggregate_data = *data.grouped_aggregate_data[table_idx];
605
-
606
- for (idx_t group_idx = 0; group_idx < group_by_size; group_idx++) {
607
- auto &group = grouped_aggregate_data.groups[group_idx];
608
- auto &bound_ref_expr = group->Cast<BoundReferenceExpression>();
609
- group_chunk.data[bound_ref_expr.index].Reference(output_chunk.data[group_idx]);
610
- }
611
- group_chunk.SetCardinality(output_chunk);
612
-
613
- for (idx_t child_idx = 0; child_idx < grouped_aggregate_data.groups.size() - group_by_size;
614
- child_idx++) {
615
- aggregate_input_chunk.data[payload_idx + child_idx].Reference(
616
- output_chunk.data[group_by_size + child_idx]);
617
- }
618
- aggregate_input_chunk.SetCardinality(output_chunk);
619
-
620
- // Sink it into the main ht
621
- OperatorSinkInput sink_input {table_state, *temp_local_state, interrupt_state};
622
- grouping_data.table_data.Sink(temp_exec_context, group_chunk, sink_input, aggregate_input_chunk, {i});
623
- }
586
+ D_ASSERT(distinct_data.info.table_map.count(agg_idx));
587
+
588
+ auto table_idx = distinct_data.info.table_map.at(agg_idx);
589
+ auto &radix_table_p = distinct_data.radix_tables[table_idx];
590
+ aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
624
591
  }
625
- grouping_data.table_data.Combine(temp_exec_context, table_state, *temp_local_state);
592
+ global_source_states.push_back(std::move(aggregate_sources));
626
593
  }
594
+ }
627
595
 
628
- TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
629
- D_ASSERT(op.distinct_collection_info);
630
- auto &info = *op.distinct_collection_info;
631
- for (idx_t i = 0; i < op.groupings.size(); i++) {
632
- auto &grouping = op.groupings[i];
633
- auto &grouping_state = gstate.grouping_states[i];
634
- AggregateDistinctGrouping(info, grouping, grouping_state, i);
635
- }
636
- event->FinishTask();
637
- return TaskExecutionResult::TASK_FINISHED;
596
+ void HashAggregateDistinctFinalizeEvent::FinishEvent() {
597
+ // Now that everything is added to the main ht, we can actually finalize
598
+ auto new_event = make_shared<HashAggregateFinalizeEvent>(context, pipeline.get(), op, gstate);
599
+ this->InsertEvent(std::move(new_event));
600
+ }
601
+
602
+ TaskExecutionResult HashAggregateDistinctFinalizeTask::ExecuteTask(TaskExecutionMode mode) {
603
+ for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
604
+ AggregateDistinctGrouping(grouping_idx);
638
605
  }
606
+ event->FinishTask();
607
+ return TaskExecutionResult::TASK_FINISHED;
608
+ }
639
609
 
640
- private:
641
- Pipeline &pipeline;
642
- shared_ptr<Event> event;
643
- HashAggregateGlobalState &gstate;
644
- ClientContext &context;
645
- const PhysicalHashAggregate &op;
646
- vector<vector<unique_ptr<GlobalSourceState>>> &global_sources;
647
- };
610
+ void HashAggregateDistinctFinalizeTask::AggregateDistinctGrouping(const idx_t grouping_idx) {
611
+ D_ASSERT(op.distinct_collection_info);
612
+ auto &info = *op.distinct_collection_info;
648
613
 
649
- //! DISTINCT FINALIZE EVENT
614
+ auto &grouping_data = op.groupings[grouping_idx];
615
+ auto &grouping_state = gstate.grouping_states[grouping_idx];
616
+ D_ASSERT(grouping_state.distinct_state);
617
+ auto &distinct_state = *grouping_state.distinct_state;
618
+ auto &distinct_data = *grouping_data.distinct_data;
650
619
 
651
- // TODO: Create tasks and run these in parallel instead of doing this all in Schedule, single threaded
652
- class HashDistinctAggregateFinalizeEvent : public BasePipelineEvent {
653
- public:
654
- HashDistinctAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
655
- Pipeline &pipeline_p, ClientContext &context)
656
- : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
657
- }
658
- const PhysicalHashAggregate &op;
659
- HashAggregateGlobalState &gstate;
660
- ClientContext &context;
661
- //! The GlobalSourceStates for all the radix tables of the distinct aggregates
662
- vector<vector<unique_ptr<GlobalSourceState>>> global_sources;
620
+ auto &aggregates = info.aggregates;
663
621
 
664
- public:
665
- void Schedule() override {
666
- global_sources = CreateGlobalSources();
667
-
668
- vector<shared_ptr<Task>> tasks;
669
- auto &scheduler = TaskScheduler::GetScheduler(context);
670
- auto number_of_threads = scheduler.NumberOfThreads();
671
- tasks.reserve(number_of_threads);
672
- for (int32_t i = 0; i < number_of_threads; i++) {
673
- tasks.push_back(make_uniq<HashDistinctAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context,
674
- op, global_sources));
675
- }
676
- D_ASSERT(!tasks.empty());
677
- SetTasks(std::move(tasks));
622
+ // Thread-local contexts
623
+ ThreadContext thread_context(executor.context);
624
+ ExecutionContext execution_context(executor.context, thread_context, &pipeline);
625
+
626
+ // Sink state to sink into global HTs
627
+ InterruptState interrupt_state;
628
+ auto &global_sink_state = *grouping_state.table_state;
629
+ auto local_sink_state = grouping_data.table_data.GetLocalSinkState(execution_context);
630
+ OperatorSinkInput sink_input {global_sink_state, *local_sink_state, interrupt_state};
631
+
632
+ // Create a chunk that mimics the 'input' chunk in Sink, for storing the group vectors
633
+ DataChunk group_chunk;
634
+ if (!op.input_group_types.empty()) {
635
+ group_chunk.Initialize(executor.context, op.input_group_types);
678
636
  }
679
637
 
680
- void FinishEvent() override {
681
- //! Now that everything is added to the main ht, we can actually finalize
682
- auto new_event = make_shared<HashAggregateFinalizeEvent>(op, gstate, pipeline.get(), context);
683
- this->InsertEvent(std::move(new_event));
638
+ auto &groups = op.grouped_aggregate_data.groups;
639
+ const idx_t group_by_size = groups.size();
640
+
641
+ DataChunk aggregate_input_chunk;
642
+ if (!gstate.payload_types.empty()) {
643
+ aggregate_input_chunk.Initialize(executor.context, gstate.payload_types);
684
644
  }
685
645
 
686
- private:
687
- vector<vector<unique_ptr<GlobalSourceState>>> CreateGlobalSources() {
688
- vector<vector<unique_ptr<GlobalSourceState>>> grouping_sources;
689
- grouping_sources.reserve(op.groupings.size());
690
- for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
691
- auto &grouping = op.groupings[grouping_idx];
692
- auto &data = *grouping.distinct_data;
693
-
694
- vector<unique_ptr<GlobalSourceState>> aggregate_sources;
695
- aggregate_sources.reserve(op.grouped_aggregate_data.aggregates.size());
696
-
697
- for (idx_t i = 0; i < op.grouped_aggregate_data.aggregates.size(); i++) {
698
- auto &aggregate = op.grouped_aggregate_data.aggregates[i];
699
- auto &aggr = aggregate->Cast<BoundAggregateExpression>();
700
-
701
- if (!aggr.IsDistinct()) {
702
- aggregate_sources.push_back(nullptr);
703
- continue;
704
- }
705
-
706
- D_ASSERT(data.info.table_map.count(i));
707
- auto table_idx = data.info.table_map.at(i);
708
- auto &radix_table_p = data.radix_tables[table_idx];
709
- aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
710
- }
711
- grouping_sources.push_back(std::move(aggregate_sources));
646
+ auto &finalize_event = event->Cast<HashAggregateDistinctFinalizeEvent>();
647
+
648
+ idx_t payload_idx;
649
+ idx_t next_payload_idx = 0;
650
+ for (idx_t agg_idx = 0; agg_idx < op.grouped_aggregate_data.aggregates.size(); agg_idx++) {
651
+ auto &aggregate = aggregates[agg_idx]->Cast<BoundAggregateExpression>();
652
+
653
+ // Forward the payload idx
654
+ payload_idx = next_payload_idx;
655
+ next_payload_idx = payload_idx + aggregate.children.size();
656
+
657
+ // If aggregate is not distinct, skip it
658
+ if (!distinct_data.IsDistinct(agg_idx)) {
659
+ continue;
712
660
  }
713
- return grouping_sources;
714
- }
715
- };
716
661
 
717
- //! DISTINCT COMBINE EVENT
662
+ D_ASSERT(distinct_data.info.table_map.count(agg_idx));
663
+ const auto &table_idx = distinct_data.info.table_map.at(agg_idx);
664
+ auto &radix_table = distinct_data.radix_tables[table_idx];
718
665
 
719
- class HashDistinctCombineFinalizeEvent : public BasePipelineEvent {
720
- public:
721
- HashDistinctCombineFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
722
- Pipeline &pipeline_p, ClientContext &client)
723
- : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
724
- }
666
+ auto &sink = *distinct_state.radix_states[table_idx];
667
+ auto local_source = radix_table->GetLocalSourceState(execution_context);
668
+ OperatorSourceInput source_input {*finalize_event.global_source_states[grouping_idx][agg_idx], *local_source,
669
+ interrupt_state};
725
670
 
726
- const PhysicalHashAggregate &op;
727
- HashAggregateGlobalState &gstate;
728
- ClientContext &client;
671
+ // Create a duplicate of the output_chunk, because of multi-threading we cant alter the original
672
+ DataChunk output_chunk;
673
+ output_chunk.Initialize(executor.context, distinct_state.distinct_output_chunks[table_idx]->GetTypes());
674
+
675
+ // Fetch all the data from the aggregate ht, and Sink it into the main ht
676
+ while (true) {
677
+ output_chunk.Reset();
678
+ group_chunk.Reset();
679
+ aggregate_input_chunk.Reset();
680
+
681
+ auto res = radix_table->GetData(execution_context, output_chunk, sink, source_input);
682
+ if (res == SourceResultType::FINISHED) {
683
+ D_ASSERT(output_chunk.size() == 0);
684
+ break;
685
+ } else if (res == SourceResultType::BLOCKED) {
686
+ throw InternalException(
687
+ "Unexpected interrupt from radix table GetData in HashAggregateDistinctFinalizeTask");
688
+ }
729
689
 
730
- public:
731
- void Schedule() override {
732
- vector<shared_ptr<Task>> tasks;
733
- for (idx_t i = 0; i < op.groupings.size(); i++) {
734
- auto &grouping = op.groupings[i];
735
- auto &distinct_data = *grouping.distinct_data;
736
- auto &distinct_state = *gstate.grouping_states[i].distinct_state;
737
- for (idx_t table_idx = 0; table_idx < distinct_data.radix_tables.size(); table_idx++) {
738
- if (!distinct_data.radix_tables[table_idx]) {
739
- continue;
740
- }
741
- distinct_data.radix_tables[table_idx]->ScheduleTasks(pipeline->executor, shared_from_this(),
742
- *distinct_state.radix_states[table_idx], tasks);
690
+ auto &grouped_aggregate_data = *distinct_data.grouped_aggregate_data[table_idx];
691
+ for (idx_t group_idx = 0; group_idx < group_by_size; group_idx++) {
692
+ auto &group = grouped_aggregate_data.groups[group_idx];
693
+ auto &bound_ref_expr = group->Cast<BoundReferenceExpression>();
694
+ group_chunk.data[bound_ref_expr.index].Reference(output_chunk.data[group_idx]);
743
695
  }
744
- }
696
+ group_chunk.SetCardinality(output_chunk);
745
697
 
746
- D_ASSERT(!tasks.empty());
747
- SetTasks(std::move(tasks));
748
- }
698
+ for (idx_t child_idx = 0; child_idx < grouped_aggregate_data.groups.size() - group_by_size; child_idx++) {
699
+ aggregate_input_chunk.data[payload_idx + child_idx].Reference(
700
+ output_chunk.data[group_by_size + child_idx]);
701
+ }
702
+ aggregate_input_chunk.SetCardinality(output_chunk);
749
703
 
750
- void FinishEvent() override {
751
- //! Now that all tables are combined, it's time to do the distinct aggregations
752
- auto new_event = make_shared<HashDistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
753
- this->InsertEvent(std::move(new_event));
704
+ // Sink it into the main ht
705
+ grouping_data.table_data.Sink(execution_context, group_chunk, sink_input, aggregate_input_chunk, {agg_idx});
706
+ }
754
707
  }
755
- };
756
-
757
- //! FINALIZE
708
+ grouping_data.table_data.Combine(execution_context, global_sink_state, *local_sink_state);
709
+ }
758
710
 
759
711
  SinkFinalizeType PhysicalHashAggregate::FinalizeDistinct(Pipeline &pipeline, Event &event, ClientContext &context,
760
712
  GlobalSinkState &gstate_p) const {
761
- auto &gstate = gstate_p.Cast<HashAggregateGlobalState>();
713
+ auto &gstate = gstate_p.Cast<HashAggregateGlobalSinkState>();
762
714
  D_ASSERT(distinct_collection_info);
763
715
 
764
- bool any_partitioned = false;
765
716
  for (idx_t i = 0; i < groupings.size(); i++) {
766
717
  auto &grouping = groupings[i];
767
718
  auto &distinct_data = *grouping.distinct_data;
@@ -773,28 +724,17 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeDistinct(Pipeline &pipeline, Eve
773
724
  }
774
725
  auto &radix_table = distinct_data.radix_tables[table_idx];
775
726
  auto &radix_state = *distinct_state.radix_states[table_idx];
776
- bool partitioned = radix_table->Finalize(context, radix_state);
777
- if (partitioned) {
778
- any_partitioned = true;
779
- }
727
+ radix_table->Finalize(context, radix_state);
780
728
  }
781
729
  }
782
- if (any_partitioned) {
783
- // If any of the groupings are partitioned then we first need to combine those, then aggregate
784
- auto new_event = make_shared<HashDistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
785
- event.InsertEvent(std::move(new_event));
786
- } else {
787
- // Hashtables aren't partitioned, they dont need to be joined first
788
- // so we can already compute the aggregate
789
- auto new_event = make_shared<HashDistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
790
- event.InsertEvent(std::move(new_event));
791
- }
730
+ auto new_event = make_shared<HashAggregateDistinctFinalizeEvent>(context, pipeline, *this, gstate);
731
+ event.InsertEvent(std::move(new_event));
792
732
  return SinkFinalizeType::READY;
793
733
  }
794
734
 
795
735
  SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Event &event, ClientContext &context,
796
736
  GlobalSinkState &gstate_p, bool check_distinct) const {
797
- auto &gstate = gstate_p.Cast<HashAggregateGlobalState>();
737
+ auto &gstate = gstate_p.Cast<HashAggregateGlobalSinkState>();
798
738
 
799
739
  if (check_distinct && distinct_collection_info) {
800
740
  // There are distinct aggregates
@@ -803,19 +743,10 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Eve
803
743
  return FinalizeDistinct(pipeline, event, context, gstate_p);
804
744
  }
805
745
 
806
- bool any_partitioned = false;
807
746
  for (idx_t i = 0; i < groupings.size(); i++) {
808
747
  auto &grouping = groupings[i];
809
748
  auto &grouping_gstate = gstate.grouping_states[i];
810
-
811
- bool is_partitioned = grouping.table_data.Finalize(context, *grouping_gstate.table_state);
812
- if (is_partitioned) {
813
- any_partitioned = true;
814
- }
815
- }
816
- if (any_partitioned) {
817
- auto new_event = make_shared<HashAggregateMergeEvent>(*this, gstate, &pipeline);
818
- event.InsertEvent(std::move(new_event));
749
+ grouping.table_data.Finalize(context, *grouping_gstate.table_state);
819
750
  }
820
751
  return SinkFinalizeType::READY;
821
752
  }
@@ -828,10 +759,9 @@ SinkFinalizeType PhysicalHashAggregate::Finalize(Pipeline &pipeline, Event &even
828
759
  //===--------------------------------------------------------------------===//
829
760
  // Source
830
761
  //===--------------------------------------------------------------------===//
831
- class PhysicalHashAggregateGlobalSourceState : public GlobalSourceState {
762
+ class HashAggregateGlobalSourceState : public GlobalSourceState {
832
763
  public:
833
- PhysicalHashAggregateGlobalSourceState(ClientContext &context, const PhysicalHashAggregate &op)
834
- : op(op), state_index(0) {
764
+ HashAggregateGlobalSourceState(ClientContext &context, const PhysicalHashAggregate &op) : op(op), state_index(0) {
835
765
  for (auto &grouping : op.groupings) {
836
766
  auto &rt = grouping.table_data;
837
767
  radix_states.push_back(rt.GetGlobalSourceState(context));
@@ -851,24 +781,24 @@ public:
851
781
  return 1;
852
782
  }
853
783
 
854
- auto &ht_state = op.sink_state->Cast<HashAggregateGlobalState>();
784
+ auto &ht_state = op.sink_state->Cast<HashAggregateGlobalSinkState>();
855
785
  idx_t count = 0;
856
786
  for (size_t sidx = 0; sidx < op.groupings.size(); ++sidx) {
857
787
  auto &grouping = op.groupings[sidx];
858
788
  auto &grouping_gstate = ht_state.grouping_states[sidx];
859
- count += grouping.table_data.Size(*grouping_gstate.table_state);
789
+ count += grouping.table_data.Count(*grouping_gstate.table_state);
860
790
  }
861
791
  return MaxValue<idx_t>(1, count / STANDARD_VECTOR_SIZE);
862
792
  }
863
793
  };
864
794
 
865
795
  unique_ptr<GlobalSourceState> PhysicalHashAggregate::GetGlobalSourceState(ClientContext &context) const {
866
- return make_uniq<PhysicalHashAggregateGlobalSourceState>(context, *this);
796
+ return make_uniq<HashAggregateGlobalSourceState>(context, *this);
867
797
  }
868
798
 
869
- class PhysicalHashAggregateLocalSourceState : public LocalSourceState {
799
+ class HashAggregateLocalSourceState : public LocalSourceState {
870
800
  public:
871
- explicit PhysicalHashAggregateLocalSourceState(ExecutionContext &context, const PhysicalHashAggregate &op) {
801
+ explicit HashAggregateLocalSourceState(ExecutionContext &context, const PhysicalHashAggregate &op) {
872
802
  for (auto &grouping : op.groupings) {
873
803
  auto &rt = grouping.table_data;
874
804
  radix_states.push_back(rt.GetLocalSourceState(context));
@@ -880,14 +810,14 @@ public:
880
810
 
881
811
  unique_ptr<LocalSourceState> PhysicalHashAggregate::GetLocalSourceState(ExecutionContext &context,
882
812
  GlobalSourceState &gstate) const {
883
- return make_uniq<PhysicalHashAggregateLocalSourceState>(context, *this);
813
+ return make_uniq<HashAggregateLocalSourceState>(context, *this);
884
814
  }
885
815
 
886
816
  SourceResultType PhysicalHashAggregate::GetData(ExecutionContext &context, DataChunk &chunk,
887
817
  OperatorSourceInput &input) const {
888
- auto &sink_gstate = sink_state->Cast<HashAggregateGlobalState>();
889
- auto &gstate = input.global_state.Cast<PhysicalHashAggregateGlobalSourceState>();
890
- auto &lstate = input.local_state.Cast<PhysicalHashAggregateLocalSourceState>();
818
+ auto &sink_gstate = sink_state->Cast<HashAggregateGlobalSinkState>();
819
+ auto &gstate = input.global_state.Cast<HashAggregateGlobalSourceState>();
820
+ auto &lstate = input.local_state.Cast<HashAggregateLocalSourceState>();
891
821
  while (true) {
892
822
  idx_t radix_idx = gstate.state_index;
893
823
  if (radix_idx >= groupings.size()) {