duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -37,7 +37,9 @@ PhysicalUngroupedAggregate::PhysicalUngroupedAggregate(vector<LogicalType> types
|
|
37
37
|
//===--------------------------------------------------------------------===//
|
38
38
|
struct AggregateState {
|
39
39
|
explicit AggregateState(const vector<unique_ptr<Expression>> &aggregate_expressions) {
|
40
|
-
|
40
|
+
counts = make_uniq_array<atomic<idx_t>>(aggregate_expressions.size());
|
41
|
+
for (idx_t i = 0; i < aggregate_expressions.size(); i++) {
|
42
|
+
auto &aggregate = aggregate_expressions[i];
|
41
43
|
D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
|
42
44
|
auto &aggr = aggregate->Cast<BoundAggregateExpression>();
|
43
45
|
auto state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
|
@@ -46,7 +48,7 @@ struct AggregateState {
|
|
46
48
|
bind_data.push_back(aggr.bind_info.get());
|
47
49
|
destructors.push_back(aggr.function.destructor);
|
48
50
|
#ifdef DEBUG
|
49
|
-
counts
|
51
|
+
counts[i] = 0;
|
50
52
|
#endif
|
51
53
|
}
|
52
54
|
}
|
@@ -77,12 +79,12 @@ struct AggregateState {
|
|
77
79
|
//! The destructors
|
78
80
|
vector<aggregate_destructor_t> destructors;
|
79
81
|
//! Counts (used for verification)
|
80
|
-
|
82
|
+
unique_array<atomic<idx_t>> counts;
|
81
83
|
};
|
82
84
|
|
83
|
-
class
|
85
|
+
class UngroupedAggregateGlobalSinkState : public GlobalSinkState {
|
84
86
|
public:
|
85
|
-
|
87
|
+
UngroupedAggregateGlobalSinkState(const PhysicalUngroupedAggregate &op, ClientContext &client)
|
86
88
|
: state(op.aggregates), finished(false), allocator(BufferAllocator::Get(client)) {
|
87
89
|
if (op.distinct_data) {
|
88
90
|
distinct_state = make_uniq<DistinctAggregateState>(*op.distinct_data, client);
|
@@ -101,13 +103,13 @@ public:
|
|
101
103
|
ArenaAllocator allocator;
|
102
104
|
};
|
103
105
|
|
104
|
-
class
|
106
|
+
class UngroupedAggregateLocalSinkState : public LocalSinkState {
|
105
107
|
public:
|
106
|
-
|
107
|
-
|
108
|
+
UngroupedAggregateLocalSinkState(const PhysicalUngroupedAggregate &op, const vector<LogicalType> &child_types,
|
109
|
+
GlobalSinkState &gstate_p, ExecutionContext &context)
|
108
110
|
: allocator(BufferAllocator::Get(context.client)), state(op.aggregates), child_executor(context.client),
|
109
111
|
aggregate_input_chunk(), filter_set() {
|
110
|
-
auto &gstate = gstate_p.Cast<
|
112
|
+
auto &gstate = gstate_p.Cast<UngroupedAggregateGlobalSinkState>();
|
111
113
|
|
112
114
|
auto &allocator = BufferAllocator::Get(context.client);
|
113
115
|
InitializeDistinctAggregates(op, gstate, context);
|
@@ -147,8 +149,8 @@ public:
|
|
147
149
|
void Reset() {
|
148
150
|
aggregate_input_chunk.Reset();
|
149
151
|
}
|
150
|
-
void InitializeDistinctAggregates(const PhysicalUngroupedAggregate &op,
|
151
|
-
ExecutionContext &context) {
|
152
|
+
void InitializeDistinctAggregates(const PhysicalUngroupedAggregate &op,
|
153
|
+
const UngroupedAggregateGlobalSinkState &gstate, ExecutionContext &context) {
|
152
154
|
|
153
155
|
if (!op.distinct_data) {
|
154
156
|
return;
|
@@ -185,19 +187,19 @@ bool PhysicalUngroupedAggregate::SinkOrderDependent() const {
|
|
185
187
|
}
|
186
188
|
|
187
189
|
unique_ptr<GlobalSinkState> PhysicalUngroupedAggregate::GetGlobalSinkState(ClientContext &context) const {
|
188
|
-
return make_uniq<
|
190
|
+
return make_uniq<UngroupedAggregateGlobalSinkState>(*this, context);
|
189
191
|
}
|
190
192
|
|
191
193
|
unique_ptr<LocalSinkState> PhysicalUngroupedAggregate::GetLocalSinkState(ExecutionContext &context) const {
|
192
194
|
D_ASSERT(sink_state);
|
193
195
|
auto &gstate = *sink_state;
|
194
|
-
return make_uniq<
|
196
|
+
return make_uniq<UngroupedAggregateLocalSinkState>(*this, children[0]->GetTypes(), gstate, context);
|
195
197
|
}
|
196
198
|
|
197
199
|
void PhysicalUngroupedAggregate::SinkDistinct(ExecutionContext &context, DataChunk &chunk,
|
198
200
|
OperatorSinkInput &input) const {
|
199
|
-
auto &sink = input.local_state.Cast<
|
200
|
-
auto &global_sink = input.global_state.Cast<
|
201
|
+
auto &sink = input.local_state.Cast<UngroupedAggregateLocalSinkState>();
|
202
|
+
auto &global_sink = input.global_state.Cast<UngroupedAggregateGlobalSinkState>();
|
201
203
|
D_ASSERT(distinct_data);
|
202
204
|
auto &distinct_state = *global_sink.distinct_state;
|
203
205
|
auto &distinct_info = *distinct_collection_info;
|
@@ -239,7 +241,7 @@ void PhysicalUngroupedAggregate::SinkDistinct(ExecutionContext &context, DataChu
|
|
239
241
|
|
240
242
|
SinkResultType PhysicalUngroupedAggregate::Sink(ExecutionContext &context, DataChunk &chunk,
|
241
243
|
OperatorSinkInput &input) const {
|
242
|
-
auto &sink = input.local_state.Cast<
|
244
|
+
auto &sink = input.local_state.Cast<UngroupedAggregateLocalSinkState>();
|
243
245
|
|
244
246
|
// perform the aggregation inside the local state
|
245
247
|
sink.Reset();
|
@@ -296,23 +298,22 @@ SinkResultType PhysicalUngroupedAggregate::Sink(ExecutionContext &context, DataC
|
|
296
298
|
}
|
297
299
|
|
298
300
|
//===--------------------------------------------------------------------===//
|
299
|
-
//
|
301
|
+
// Combine
|
300
302
|
//===--------------------------------------------------------------------===//
|
301
|
-
|
302
303
|
void PhysicalUngroupedAggregate::CombineDistinct(ExecutionContext &context, OperatorSinkCombineInput &input) const {
|
303
|
-
auto &
|
304
|
-
auto &
|
304
|
+
auto &gstate = input.global_state.Cast<UngroupedAggregateGlobalSinkState>();
|
305
|
+
auto &lstate = input.local_state.Cast<UngroupedAggregateLocalSinkState>();
|
305
306
|
|
306
307
|
if (!distinct_data) {
|
307
308
|
return;
|
308
309
|
}
|
309
|
-
auto &distinct_state =
|
310
|
+
auto &distinct_state = gstate.distinct_state;
|
310
311
|
auto table_count = distinct_data->radix_tables.size();
|
311
312
|
for (idx_t table_idx = 0; table_idx < table_count; table_idx++) {
|
312
313
|
D_ASSERT(distinct_data->radix_tables[table_idx]);
|
313
314
|
auto &radix_table = *distinct_data->radix_tables[table_idx];
|
314
315
|
auto &radix_global_sink = *distinct_state->radix_states[table_idx];
|
315
|
-
auto &radix_local_sink = *
|
316
|
+
auto &radix_local_sink = *lstate.radix_states[table_idx];
|
316
317
|
|
317
318
|
radix_table.Combine(context, radix_global_sink, radix_local_sink);
|
318
319
|
}
|
@@ -320,18 +321,17 @@ void PhysicalUngroupedAggregate::CombineDistinct(ExecutionContext &context, Oper
|
|
320
321
|
|
321
322
|
SinkCombineResultType PhysicalUngroupedAggregate::Combine(ExecutionContext &context,
|
322
323
|
OperatorSinkCombineInput &input) const {
|
323
|
-
auto &gstate = input.global_state.Cast<
|
324
|
-
auto &
|
324
|
+
auto &gstate = input.global_state.Cast<UngroupedAggregateGlobalSinkState>();
|
325
|
+
auto &lstate = input.local_state.Cast<UngroupedAggregateLocalSinkState>();
|
325
326
|
D_ASSERT(!gstate.finished);
|
326
327
|
|
327
328
|
// finalize: combine the local state into the global state
|
328
329
|
// all aggregates are combinable: we might be doing a parallel aggregate
|
329
330
|
// use the combine method to combine the partial aggregates
|
330
|
-
|
331
|
-
|
332
|
-
OperatorSinkCombineInput distinct_input {gstate, source, input.interrupt_state};
|
331
|
+
OperatorSinkCombineInput distinct_input {gstate, lstate, input.interrupt_state};
|
333
332
|
CombineDistinct(context, distinct_input);
|
334
333
|
|
334
|
+
lock_guard<mutex> glock(gstate.lock);
|
335
335
|
for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) {
|
336
336
|
auto &aggregate = aggregates[aggr_idx]->Cast<BoundAggregateExpression>();
|
337
337
|
|
@@ -339,207 +339,236 @@ SinkCombineResultType PhysicalUngroupedAggregate::Combine(ExecutionContext &cont
|
|
339
339
|
continue;
|
340
340
|
}
|
341
341
|
|
342
|
-
Vector source_state(Value::POINTER(CastPointerToValue(
|
342
|
+
Vector source_state(Value::POINTER(CastPointerToValue(lstate.state.aggregates[aggr_idx].get())));
|
343
343
|
Vector dest_state(Value::POINTER(CastPointerToValue(gstate.state.aggregates[aggr_idx].get())));
|
344
344
|
|
345
345
|
AggregateInputData aggr_input_data(aggregate.bind_info.get(), gstate.allocator);
|
346
346
|
aggregate.function.combine(source_state, dest_state, aggr_input_data, 1);
|
347
347
|
#ifdef DEBUG
|
348
|
-
gstate.state.counts[aggr_idx] +=
|
348
|
+
gstate.state.counts[aggr_idx] += lstate.state.counts[aggr_idx];
|
349
349
|
#endif
|
350
350
|
}
|
351
|
-
|
351
|
+
lstate.allocator.Destroy();
|
352
352
|
|
353
353
|
auto &client_profiler = QueryProfiler::Get(context.client);
|
354
|
-
context.thread.profiler.Flush(*this,
|
354
|
+
context.thread.profiler.Flush(*this, lstate.child_executor, "child_executor", 0);
|
355
355
|
client_profiler.Flush(context.thread.profiler);
|
356
356
|
|
357
357
|
return SinkCombineResultType::FINISHED;
|
358
358
|
}
|
359
359
|
|
360
|
-
|
360
|
+
//===--------------------------------------------------------------------===//
|
361
|
+
// Finalize
|
362
|
+
//===--------------------------------------------------------------------===//
|
363
|
+
class UngroupedDistinctAggregateFinalizeEvent : public BasePipelineEvent {
|
361
364
|
public:
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
allocator(BufferAllocator::Get(context)) {
|
365
|
+
UngroupedDistinctAggregateFinalizeEvent(ClientContext &context, const PhysicalUngroupedAggregate &op_p,
|
366
|
+
UngroupedAggregateGlobalSinkState &gstate_p, Pipeline &pipeline_p)
|
367
|
+
: BasePipelineEvent(pipeline_p), context(context), op(op_p), gstate(gstate_p), tasks_scheduled(0),
|
368
|
+
tasks_done(0) {
|
367
369
|
}
|
368
370
|
|
369
|
-
|
370
|
-
|
371
|
-
auto &aggregates = op.aggregates;
|
372
|
-
auto &distinct_state = *gstate.distinct_state;
|
373
|
-
auto &distinct_data = *op.distinct_data;
|
374
|
-
|
375
|
-
ThreadContext temp_thread_context(context);
|
376
|
-
ExecutionContext temp_exec_context(context, temp_thread_context, nullptr);
|
377
|
-
|
378
|
-
idx_t payload_idx = 0;
|
379
|
-
idx_t next_payload_idx = 0;
|
371
|
+
public:
|
372
|
+
void Schedule() override;
|
380
373
|
|
381
|
-
|
382
|
-
|
374
|
+
private:
|
375
|
+
ClientContext &context;
|
383
376
|
|
384
|
-
|
385
|
-
|
386
|
-
next_payload_idx = payload_idx + aggregate.children.size();
|
377
|
+
const PhysicalUngroupedAggregate &op;
|
378
|
+
UngroupedAggregateGlobalSinkState &gstate;
|
387
379
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
380
|
+
public:
|
381
|
+
mutex lock;
|
382
|
+
idx_t tasks_scheduled;
|
383
|
+
idx_t tasks_done;
|
392
384
|
|
393
|
-
|
394
|
-
|
395
|
-
D_ASSERT(distinct_data.info.table_map.count(agg_idx));
|
396
|
-
auto table_idx = distinct_data.info.table_map.at(agg_idx);
|
397
|
-
auto &radix_table_p = distinct_data.radix_tables[table_idx];
|
398
|
-
auto &output_chunk = *distinct_state.distinct_output_chunks[table_idx];
|
399
|
-
auto &grouped_aggregate_data = *distinct_data.grouped_aggregate_data[table_idx];
|
400
|
-
|
401
|
-
payload_chunk.InitializeEmpty(grouped_aggregate_data.group_types);
|
402
|
-
payload_chunk.SetCardinality(0);
|
403
|
-
|
404
|
-
//! Create global and local state for the hashtable
|
405
|
-
auto global_source_state = radix_table_p->GetGlobalSourceState(context);
|
406
|
-
auto local_source_state = radix_table_p->GetLocalSourceState(temp_exec_context);
|
407
|
-
|
408
|
-
//! Retrieve the stored data from the hashtable
|
409
|
-
while (true) {
|
410
|
-
output_chunk.Reset();
|
411
|
-
|
412
|
-
InterruptState interrupt_state;
|
413
|
-
OperatorSourceInput source_input {*global_source_state, *local_source_state, interrupt_state};
|
414
|
-
auto res = radix_table_p->GetData(temp_exec_context, output_chunk,
|
415
|
-
*distinct_state.radix_states[table_idx], source_input);
|
416
|
-
if (res == SourceResultType::FINISHED) {
|
417
|
-
D_ASSERT(output_chunk.size() == 0);
|
418
|
-
break;
|
419
|
-
} else if (res == SourceResultType::BLOCKED) {
|
420
|
-
throw InternalException(
|
421
|
-
"Unexpected interrupt from radix table GetData in UngroupedDistinctAggregateFinalizeTask");
|
422
|
-
}
|
423
|
-
|
424
|
-
// We dont need to resolve the filter, we already did this in Sink
|
425
|
-
idx_t payload_cnt = aggregate.children.size();
|
426
|
-
for (idx_t i = 0; i < payload_cnt; i++) {
|
427
|
-
payload_chunk.data[i].Reference(output_chunk.data[i]);
|
428
|
-
}
|
429
|
-
payload_chunk.SetCardinality(output_chunk);
|
430
|
-
#ifdef DEBUG
|
431
|
-
gstate.state.counts[agg_idx] += payload_chunk.size();
|
432
|
-
#endif
|
385
|
+
vector<unique_ptr<GlobalSourceState>> global_source_states;
|
386
|
+
};
|
433
387
|
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
D_ASSERT(!gstate.finished);
|
442
|
-
gstate.finished = true;
|
388
|
+
class UngroupedDistinctAggregateFinalizeTask : public ExecutorTask {
|
389
|
+
public:
|
390
|
+
UngroupedDistinctAggregateFinalizeTask(Executor &executor, shared_ptr<Event> event_p,
|
391
|
+
const PhysicalUngroupedAggregate &op,
|
392
|
+
UngroupedAggregateGlobalSinkState &state_p)
|
393
|
+
: ExecutorTask(executor), event(std::move(event_p)), op(op), gstate(state_p),
|
394
|
+
allocator(BufferAllocator::Get(executor.context)) {
|
443
395
|
}
|
444
396
|
|
445
|
-
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
}
|
397
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
|
398
|
+
|
399
|
+
private:
|
400
|
+
void AggregateDistinct();
|
450
401
|
|
451
402
|
private:
|
452
403
|
shared_ptr<Event> event;
|
453
|
-
|
454
|
-
ClientContext &context;
|
404
|
+
|
455
405
|
const PhysicalUngroupedAggregate &op;
|
406
|
+
UngroupedAggregateGlobalSinkState &gstate;
|
407
|
+
|
456
408
|
ArenaAllocator allocator;
|
457
409
|
};
|
458
410
|
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
UngroupedAggregateGlobalState &gstate_p, Pipeline &pipeline_p,
|
464
|
-
ClientContext &context)
|
465
|
-
: BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
|
466
|
-
}
|
467
|
-
const PhysicalUngroupedAggregate &op;
|
468
|
-
UngroupedAggregateGlobalState &gstate;
|
469
|
-
ClientContext &context;
|
411
|
+
void UngroupedDistinctAggregateFinalizeEvent::Schedule() {
|
412
|
+
D_ASSERT(gstate.distinct_state);
|
413
|
+
auto &aggregates = op.aggregates;
|
414
|
+
auto &distinct_data = *op.distinct_data;
|
470
415
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
416
|
+
idx_t payload_idx = 0;
|
417
|
+
idx_t next_payload_idx = 0;
|
418
|
+
for (idx_t agg_idx = 0; agg_idx < aggregates.size(); agg_idx++) {
|
419
|
+
auto &aggregate = aggregates[agg_idx]->Cast<BoundAggregateExpression>();
|
420
|
+
|
421
|
+
// Forward the payload idx
|
422
|
+
payload_idx = next_payload_idx;
|
423
|
+
next_payload_idx = payload_idx + aggregate.children.size();
|
424
|
+
|
425
|
+
// If aggregate is not distinct, skip it
|
426
|
+
if (!distinct_data.IsDistinct(agg_idx)) {
|
427
|
+
global_source_states.push_back(nullptr);
|
428
|
+
continue;
|
429
|
+
}
|
430
|
+
D_ASSERT(distinct_data.info.table_map.count(agg_idx));
|
431
|
+
|
432
|
+
// Create global state for scanning
|
433
|
+
auto table_idx = distinct_data.info.table_map.at(agg_idx);
|
434
|
+
auto &radix_table_p = *distinct_data.radix_tables[table_idx];
|
435
|
+
global_source_states.push_back(radix_table_p.GetGlobalSourceState(context));
|
478
436
|
}
|
479
|
-
};
|
480
437
|
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
438
|
+
const idx_t n_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
439
|
+
vector<shared_ptr<Task>> tasks;
|
440
|
+
for (idx_t i = 0; i < n_threads; i++) {
|
441
|
+
tasks.push_back(
|
442
|
+
make_uniq<UngroupedDistinctAggregateFinalizeTask>(pipeline->executor, shared_from_this(), op, gstate));
|
443
|
+
tasks_scheduled++;
|
487
444
|
}
|
445
|
+
SetTasks(std::move(tasks));
|
446
|
+
}
|
488
447
|
|
489
|
-
|
490
|
-
|
491
|
-
|
448
|
+
TaskExecutionResult UngroupedDistinctAggregateFinalizeTask::ExecuteTask(TaskExecutionMode mode) {
|
449
|
+
AggregateDistinct();
|
450
|
+
event->FinishTask();
|
451
|
+
return TaskExecutionResult::TASK_FINISHED;
|
452
|
+
}
|
492
453
|
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
454
|
+
void UngroupedDistinctAggregateFinalizeTask::AggregateDistinct() {
|
455
|
+
D_ASSERT(gstate.distinct_state);
|
456
|
+
auto &distinct_state = *gstate.distinct_state;
|
457
|
+
auto &distinct_data = *op.distinct_data;
|
458
|
+
|
459
|
+
// Create thread-local copy of aggregate state
|
460
|
+
auto &aggregates = op.aggregates;
|
461
|
+
AggregateState state(aggregates);
|
462
|
+
|
463
|
+
// Thread-local contexts
|
464
|
+
ThreadContext thread_context(executor.context);
|
465
|
+
ExecutionContext execution_context(executor.context, thread_context, nullptr);
|
466
|
+
|
467
|
+
auto &finalize_event = event->Cast<UngroupedDistinctAggregateFinalizeEvent>();
|
468
|
+
|
469
|
+
// Now loop through the distinct aggregates, scanning the distinct HTs
|
470
|
+
idx_t payload_idx = 0;
|
471
|
+
idx_t next_payload_idx = 0;
|
472
|
+
for (idx_t agg_idx = 0; agg_idx < aggregates.size(); agg_idx++) {
|
473
|
+
auto &aggregate = aggregates[agg_idx]->Cast<BoundAggregateExpression>();
|
474
|
+
|
475
|
+
// Forward the payload idx
|
476
|
+
payload_idx = next_payload_idx;
|
477
|
+
next_payload_idx = payload_idx + aggregate.children.size();
|
478
|
+
|
479
|
+
// If aggregate is not distinct, skip it
|
480
|
+
if (!distinct_data.IsDistinct(agg_idx)) {
|
481
|
+
continue;
|
482
|
+
}
|
483
|
+
|
484
|
+
const auto table_idx = distinct_data.info.table_map.at(agg_idx);
|
485
|
+
auto &radix_table = *distinct_data.radix_tables[table_idx];
|
486
|
+
auto lstate = radix_table.GetLocalSourceState(execution_context);
|
487
|
+
|
488
|
+
auto &sink = *distinct_state.radix_states[table_idx];
|
489
|
+
InterruptState interrupt_state;
|
490
|
+
OperatorSourceInput source_input {*finalize_event.global_source_states[agg_idx], *lstate, interrupt_state};
|
491
|
+
|
492
|
+
DataChunk output_chunk;
|
493
|
+
output_chunk.Initialize(executor.context, distinct_state.distinct_output_chunks[table_idx]->GetTypes());
|
494
|
+
|
495
|
+
DataChunk payload_chunk;
|
496
|
+
payload_chunk.InitializeEmpty(distinct_data.grouped_aggregate_data[table_idx]->group_types);
|
497
|
+
payload_chunk.SetCardinality(0);
|
498
|
+
|
499
|
+
AggregateInputData aggr_input_data(aggregate.bind_info.get(), allocator);
|
500
|
+
while (true) {
|
501
|
+
output_chunk.Reset();
|
502
|
+
|
503
|
+
auto res = radix_table.GetData(execution_context, output_chunk, sink, source_input);
|
504
|
+
if (res == SourceResultType::FINISHED) {
|
505
|
+
D_ASSERT(output_chunk.size() == 0);
|
506
|
+
break;
|
507
|
+
} else if (res == SourceResultType::BLOCKED) {
|
508
|
+
throw InternalException(
|
509
|
+
"Unexpected interrupt from radix table GetData in UngroupedDistinctAggregateFinalizeTask");
|
510
|
+
}
|
511
|
+
|
512
|
+
// We dont need to resolve the filter, we already did this in Sink
|
513
|
+
idx_t payload_cnt = aggregate.children.size();
|
514
|
+
for (idx_t i = 0; i < payload_cnt; i++) {
|
515
|
+
payload_chunk.data[i].Reference(output_chunk.data[i]);
|
516
|
+
}
|
517
|
+
payload_chunk.SetCardinality(output_chunk);
|
518
|
+
|
519
|
+
#ifdef DEBUG
|
520
|
+
gstate.state.counts[agg_idx] += payload_chunk.size();
|
521
|
+
#endif
|
522
|
+
|
523
|
+
// Update the aggregate state
|
524
|
+
auto start_of_input = payload_cnt ? &payload_chunk.data[0] : nullptr;
|
525
|
+
aggregate.function.simple_update(start_of_input, aggr_input_data, payload_cnt,
|
526
|
+
state.aggregates[agg_idx].get(), payload_chunk.size());
|
501
527
|
}
|
502
|
-
D_ASSERT(!tasks.empty());
|
503
|
-
SetTasks(std::move(tasks));
|
504
528
|
}
|
505
529
|
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
530
|
+
// After scanning the distinct HTs, we can combine the thread-local agg states with the thread-global
|
531
|
+
lock_guard<mutex> guard(finalize_event.lock);
|
532
|
+
payload_idx = 0;
|
533
|
+
next_payload_idx = 0;
|
534
|
+
for (idx_t agg_idx = 0; agg_idx < aggregates.size(); agg_idx++) {
|
535
|
+
if (!distinct_data.IsDistinct(agg_idx)) {
|
536
|
+
continue;
|
537
|
+
}
|
538
|
+
|
539
|
+
auto &aggregate = aggregates[agg_idx]->Cast<BoundAggregateExpression>();
|
540
|
+
AggregateInputData aggr_input_data(aggregate.bind_info.get(), allocator);
|
541
|
+
|
542
|
+
Vector state_vec(Value::POINTER(CastPointerToValue(state.aggregates[agg_idx].get())));
|
543
|
+
Vector combined_vec(Value::POINTER(CastPointerToValue(gstate.state.aggregates[agg_idx].get())));
|
544
|
+
aggregate.function.combine(state_vec, combined_vec, aggr_input_data, 1);
|
510
545
|
}
|
511
|
-
|
546
|
+
|
547
|
+
D_ASSERT(!gstate.finished);
|
548
|
+
if (++finalize_event.tasks_done == finalize_event.tasks_scheduled) {
|
549
|
+
gstate.finished = true;
|
550
|
+
}
|
551
|
+
}
|
512
552
|
|
513
553
|
SinkFinalizeType PhysicalUngroupedAggregate::FinalizeDistinct(Pipeline &pipeline, Event &event, ClientContext &context,
|
514
554
|
GlobalSinkState &gstate_p) const {
|
515
|
-
auto &gstate = gstate_p.Cast<
|
555
|
+
auto &gstate = gstate_p.Cast<UngroupedAggregateGlobalSinkState>();
|
516
556
|
D_ASSERT(distinct_data);
|
517
557
|
auto &distinct_state = *gstate.distinct_state;
|
518
558
|
|
519
|
-
bool any_partitioned = false;
|
520
559
|
for (idx_t table_idx = 0; table_idx < distinct_data->radix_tables.size(); table_idx++) {
|
521
560
|
auto &radix_table_p = distinct_data->radix_tables[table_idx];
|
522
561
|
auto &radix_state = *distinct_state.radix_states[table_idx];
|
523
|
-
|
524
|
-
if (partitioned) {
|
525
|
-
any_partitioned = true;
|
526
|
-
}
|
527
|
-
}
|
528
|
-
if (any_partitioned) {
|
529
|
-
auto new_event = make_shared<UngroupedDistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
|
530
|
-
event.InsertEvent(std::move(new_event));
|
531
|
-
} else {
|
532
|
-
//! Hashtables aren't partitioned, they dont need to be joined first
|
533
|
-
//! So we can compute the aggregate already
|
534
|
-
auto new_event = make_shared<UngroupedDistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
|
535
|
-
event.InsertEvent(std::move(new_event));
|
562
|
+
radix_table_p->Finalize(context, radix_state);
|
536
563
|
}
|
564
|
+
auto new_event = make_shared<UngroupedDistinctAggregateFinalizeEvent>(context, *this, gstate, pipeline);
|
565
|
+
event.InsertEvent(std::move(new_event));
|
537
566
|
return SinkFinalizeType::READY;
|
538
567
|
}
|
539
568
|
|
540
569
|
SinkFinalizeType PhysicalUngroupedAggregate::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
541
570
|
OperatorSinkFinalizeInput &input) const {
|
542
|
-
auto &gstate = input.global_state.Cast<
|
571
|
+
auto &gstate = input.global_state.Cast<UngroupedAggregateGlobalSinkState>();
|
543
572
|
|
544
573
|
if (distinct_data) {
|
545
574
|
return FinalizeDistinct(pipeline, event, context, input.global_state);
|
@@ -569,7 +598,7 @@ void VerifyNullHandling(DataChunk &chunk, AggregateState &state, const vector<un
|
|
569
598
|
|
570
599
|
SourceResultType PhysicalUngroupedAggregate::GetData(ExecutionContext &context, DataChunk &chunk,
|
571
600
|
OperatorSourceInput &input) const {
|
572
|
-
auto &gstate = sink_state->Cast<
|
601
|
+
auto &gstate = sink_state->Cast<UngroupedAggregateGlobalSinkState>();
|
573
602
|
D_ASSERT(gstate.finished);
|
574
603
|
|
575
604
|
// initialize the result chunk with the aggregate values
|
@@ -14,7 +14,6 @@
|
|
14
14
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
15
15
|
#include "duckdb/common/windows_undefs.hpp"
|
16
16
|
#include "duckdb/execution/expression_executor.hpp"
|
17
|
-
#include "duckdb/execution/partitionable_hashtable.hpp"
|
18
17
|
#include "duckdb/execution/window_executor.hpp"
|
19
18
|
#include "duckdb/execution/window_segment_tree.hpp"
|
20
19
|
#include "duckdb/main/client_config.hpp"
|
@@ -222,6 +221,7 @@ WindowGlobalSourceState::WindowGlobalSourceState(ClientContext &context_p, Windo
|
|
222
221
|
}
|
223
222
|
} else {
|
224
223
|
built.resize(hash_groups.size());
|
224
|
+
idx_t batch_base = 0;
|
225
225
|
for (auto &hash_group : hash_groups) {
|
226
226
|
if (!hash_group) {
|
227
227
|
continue;
|
@@ -235,6 +235,9 @@ WindowGlobalSourceState::WindowGlobalSourceState(ClientContext &context_p, Windo
|
|
235
235
|
auto &sb = *global_sort_state.sorted_blocks[0];
|
236
236
|
auto &sd = *sb.payload_data;
|
237
237
|
tasks_remaining += sd.data_blocks.size();
|
238
|
+
|
239
|
+
hash_group->batch_base = batch_base;
|
240
|
+
batch_base += sd.data_blocks.size();
|
238
241
|
}
|
239
242
|
}
|
240
243
|
}
|
@@ -436,6 +439,7 @@ public:
|
|
436
439
|
using ReadStates = vector<ReadStatePtr>;
|
437
440
|
|
438
441
|
explicit WindowLocalSourceState(WindowGlobalSourceState &gsource);
|
442
|
+
void UpdateBatchIndex();
|
439
443
|
bool NextPartition();
|
440
444
|
void Scan(DataChunk &chunk);
|
441
445
|
|
@@ -443,6 +447,8 @@ public:
|
|
443
447
|
WindowGlobalSourceState &gsource;
|
444
448
|
//! The current bin being processed
|
445
449
|
idx_t hash_bin;
|
450
|
+
//! The current batch index (for output reordering)
|
451
|
+
idx_t batch_index;
|
446
452
|
//! The current source being processed
|
447
453
|
optional_ptr<WindowPartitionSourceState> partition_source;
|
448
454
|
//! The read cursor
|
@@ -456,7 +462,7 @@ public:
|
|
456
462
|
};
|
457
463
|
|
458
464
|
WindowLocalSourceState::WindowLocalSourceState(WindowGlobalSourceState &gsource)
|
459
|
-
: gsource(gsource), hash_bin(gsource.built.size()) {
|
465
|
+
: gsource(gsource), hash_bin(gsource.built.size()), batch_index(0) {
|
460
466
|
auto &gsink = *gsource.gsink.global_partition;
|
461
467
|
auto &op = gsource.gsink.op;
|
462
468
|
|
@@ -564,6 +570,14 @@ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin)
|
|
564
570
|
return Task();
|
565
571
|
}
|
566
572
|
|
573
|
+
void WindowLocalSourceState::UpdateBatchIndex() {
|
574
|
+
D_ASSERT(partition_source);
|
575
|
+
D_ASSERT(scanner.get());
|
576
|
+
|
577
|
+
batch_index = partition_source->hash_group ? partition_source->hash_group->batch_base : 0;
|
578
|
+
batch_index += scanner->BlockIndex();
|
579
|
+
}
|
580
|
+
|
567
581
|
bool WindowLocalSourceState::NextPartition() {
|
568
582
|
// Release old states before the source
|
569
583
|
scanner.reset();
|
@@ -578,6 +592,7 @@ bool WindowLocalSourceState::NextPartition() {
|
|
578
592
|
partition_source = task.first;
|
579
593
|
scanner = std::move(task.second);
|
580
594
|
hash_bin = partition_source->hash_bin;
|
595
|
+
UpdateBatchIndex();
|
581
596
|
}
|
582
597
|
|
583
598
|
for (auto &wexec : partition_source->executors) {
|
@@ -599,6 +614,8 @@ void WindowLocalSourceState::Scan(DataChunk &result) {
|
|
599
614
|
read_states.clear();
|
600
615
|
return;
|
601
616
|
}
|
617
|
+
|
618
|
+
UpdateBatchIndex();
|
602
619
|
}
|
603
620
|
|
604
621
|
const auto position = scanner->Scanned();
|
@@ -638,6 +655,23 @@ unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext
|
|
638
655
|
return make_uniq<WindowGlobalSourceState>(context, gsink);
|
639
656
|
}
|
640
657
|
|
658
|
+
bool PhysicalWindow::SupportsBatchIndex() const {
|
659
|
+
// We can only preserve order for single partitioning
|
660
|
+
// or work stealing causes out of order batch numbers
|
661
|
+
auto &wexpr = select_list[0]->Cast<BoundWindowExpression>();
|
662
|
+
return wexpr.partitions.empty() && !wexpr.orders.empty();
|
663
|
+
}
|
664
|
+
|
665
|
+
OrderPreservationType PhysicalWindow::SourceOrder() const {
|
666
|
+
return SupportsBatchIndex() ? OrderPreservationType::FIXED_ORDER : OrderPreservationType::NO_ORDER;
|
667
|
+
}
|
668
|
+
|
669
|
+
idx_t PhysicalWindow::GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
|
670
|
+
LocalSourceState &lstate_p) const {
|
671
|
+
auto &lstate = lstate_p.Cast<WindowLocalSourceState>();
|
672
|
+
return lstate.batch_index;
|
673
|
+
}
|
674
|
+
|
641
675
|
SourceResultType PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk,
|
642
676
|
OperatorSourceInput &input) const {
|
643
677
|
auto &lsource = input.local_state.Cast<WindowLocalSourceState>();
|