duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -11,7 +11,7 @@ namespace duckdb {
|
|
11
11
|
|
12
12
|
PartitionGlobalHashGroup::PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions,
|
13
13
|
const Orders &orders, const Types &payload_types, bool external)
|
14
|
-
: count(0) {
|
14
|
+
: count(0), batch_base(0) {
|
15
15
|
|
16
16
|
RowLayout payload_layout;
|
17
17
|
payload_layout.Initialize(payload_types);
|
@@ -191,52 +191,45 @@ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_pa
|
|
191
191
|
grouping_data->Combine(*local_partition);
|
192
192
|
}
|
193
193
|
|
194
|
-
|
194
|
+
PartitionLocalMergeState::PartitionLocalMergeState(PartitionGlobalSinkState &gstate)
|
195
|
+
: merge_state(nullptr), stage(PartitionSortStage::INIT), finished(true), executor(gstate.context) {
|
196
|
+
|
195
197
|
// Set up the sort expression computation.
|
196
198
|
vector<LogicalType> sort_types;
|
197
|
-
|
198
|
-
for (auto &order : orders) {
|
199
|
+
for (auto &order : gstate.orders) {
|
199
200
|
auto &oexpr = order.expression;
|
200
201
|
sort_types.emplace_back(oexpr->return_type);
|
201
202
|
executor.AddExpression(*oexpr);
|
202
203
|
}
|
203
|
-
|
204
|
-
|
204
|
+
sort_chunk.Initialize(gstate.allocator, sort_types);
|
205
|
+
payload_chunk.Initialize(gstate.allocator, gstate.payload_types);
|
206
|
+
}
|
205
207
|
|
208
|
+
void PartitionLocalMergeState::Scan() {
|
209
|
+
auto &group_data = *merge_state->group_data;
|
210
|
+
auto &hash_group = *merge_state->hash_group;
|
211
|
+
auto &chunk_state = merge_state->chunk_state;
|
206
212
|
// Copy the data from the group into the sort code.
|
213
|
+
auto &global_sort = *hash_group.global_sort;
|
207
214
|
LocalSortState local_sort;
|
208
215
|
local_sort.Initialize(global_sort, global_sort.buffer_manager);
|
209
216
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
vector<column_t> column_ids;
|
215
|
-
column_ids.reserve(payload_types.size());
|
216
|
-
for (column_t i = 0; i < payload_types.size(); ++i) {
|
217
|
-
column_ids.emplace_back(i);
|
218
|
-
}
|
219
|
-
TupleDataScanState chunk_state;
|
220
|
-
group_data.InitializeScan(chunk_state, column_ids);
|
221
|
-
while (group_data.Scan(chunk_state, payload_chunk)) {
|
217
|
+
TupleDataScanState local_scan;
|
218
|
+
group_data.InitializeScan(local_scan, merge_state->column_ids);
|
219
|
+
while (group_data.Scan(chunk_state, local_scan, payload_chunk)) {
|
222
220
|
sort_chunk.Reset();
|
223
221
|
executor.Execute(payload_chunk, sort_chunk);
|
224
222
|
|
225
223
|
local_sort.SinkChunk(sort_chunk, payload_chunk);
|
226
|
-
if (local_sort.SizeInBytes() > memory_per_thread) {
|
224
|
+
if (local_sort.SizeInBytes() > merge_state->memory_per_thread) {
|
227
225
|
local_sort.Sort(global_sort, true);
|
228
226
|
}
|
227
|
+
hash_group.count += payload_chunk.size();
|
229
228
|
}
|
230
229
|
|
231
230
|
global_sort.AddLocalState(local_sort);
|
232
231
|
}
|
233
232
|
|
234
|
-
void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
|
235
|
-
BuildSortState(group_data, *hash_group.global_sort);
|
236
|
-
|
237
|
-
hash_group.count += group_data.Count();
|
238
|
-
}
|
239
|
-
|
240
233
|
// Per-thread sink state
|
241
234
|
PartitionLocalSinkState::PartitionLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p)
|
242
235
|
: gstate(gstate_p), allocator(Allocator::Get(context)), executor(context) {
|
@@ -349,10 +342,11 @@ void PartitionLocalSinkState::Combine() {
|
|
349
342
|
gstate.CombineLocalPartition(local_partition, local_append);
|
350
343
|
}
|
351
344
|
|
352
|
-
PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr
|
345
|
+
PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data_p,
|
353
346
|
hash_t hash_bin)
|
354
|
-
: sink(sink), group_data(std::move(
|
355
|
-
|
347
|
+
: sink(sink), group_data(std::move(group_data_p)), memory_per_thread(sink.memory_per_thread),
|
348
|
+
num_threads(TaskScheduler::GetScheduler(sink.context).NumberOfThreads()), stage(PartitionSortStage::INIT),
|
349
|
+
total_tasks(0), tasks_assigned(0), tasks_completed(0) {
|
356
350
|
|
357
351
|
const auto group_idx = sink.hash_groups.size();
|
358
352
|
auto new_group = make_uniq<PartitionGlobalHashGroup>(sink.buffer_manager, sink.partitions, sink.orders,
|
@@ -363,13 +357,18 @@ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &s
|
|
363
357
|
global_sort = sink.hash_groups[group_idx]->global_sort.get();
|
364
358
|
|
365
359
|
sink.bin_groups[hash_bin] = group_idx;
|
360
|
+
|
361
|
+
column_ids.reserve(sink.payload_types.size());
|
362
|
+
for (column_t i = 0; i < sink.payload_types.size(); ++i) {
|
363
|
+
column_ids.emplace_back(i);
|
364
|
+
}
|
365
|
+
group_data->InitializeScan(chunk_state, column_ids);
|
366
366
|
}
|
367
367
|
|
368
368
|
void PartitionLocalMergeState::Prepare() {
|
369
|
-
auto &global_sort = *merge_state->global_sort;
|
370
|
-
merge_state->sink.BuildSortState(*merge_state->group_data, *merge_state->hash_group);
|
371
369
|
merge_state->group_data.reset();
|
372
370
|
|
371
|
+
auto &global_sort = *merge_state->global_sort;
|
373
372
|
global_sort.PrepareMergePhase();
|
374
373
|
}
|
375
374
|
|
@@ -381,6 +380,9 @@ void PartitionLocalMergeState::Merge() {
|
|
381
380
|
|
382
381
|
void PartitionLocalMergeState::ExecuteTask() {
|
383
382
|
switch (stage) {
|
383
|
+
case PartitionSortStage::SCAN:
|
384
|
+
Scan();
|
385
|
+
break;
|
384
386
|
case PartitionSortStage::PREPARE:
|
385
387
|
Prepare();
|
386
388
|
break;
|
@@ -427,6 +429,11 @@ bool PartitionGlobalMergeState::TryPrepareNextStage() {
|
|
427
429
|
|
428
430
|
switch (stage) {
|
429
431
|
case PartitionSortStage::INIT:
|
432
|
+
total_tasks = num_threads;
|
433
|
+
stage = PartitionSortStage::SCAN;
|
434
|
+
return true;
|
435
|
+
|
436
|
+
case PartitionSortStage::SCAN:
|
430
437
|
total_tasks = 1;
|
431
438
|
stage = PartitionSortStage::PREPARE;
|
432
439
|
return true;
|
@@ -474,8 +481,9 @@ PartitionGlobalMergeStates::PartitionGlobalMergeStates(PartitionGlobalSinkState
|
|
474
481
|
|
475
482
|
class PartitionMergeTask : public ExecutorTask {
|
476
483
|
public:
|
477
|
-
PartitionMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, PartitionGlobalMergeStates &hash_groups_p
|
478
|
-
|
484
|
+
PartitionMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, PartitionGlobalMergeStates &hash_groups_p,
|
485
|
+
PartitionGlobalSinkState &gstate)
|
486
|
+
: ExecutorTask(context_p), event(std::move(event_p)), local_state(gstate), hash_groups(hash_groups_p) {
|
479
487
|
}
|
480
488
|
|
481
489
|
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
|
@@ -576,7 +584,7 @@ void PartitionMergeEvent::Schedule() {
|
|
576
584
|
|
577
585
|
vector<shared_ptr<Task>> merge_tasks;
|
578
586
|
for (idx_t tnum = 0; tnum < num_threads; tnum++) {
|
579
|
-
merge_tasks.emplace_back(make_uniq<PartitionMergeTask>(shared_from_this(), context, merge_states));
|
587
|
+
merge_tasks.emplace_back(make_uniq<PartitionMergeTask>(shared_from_this(), context, merge_states, gstate));
|
580
588
|
}
|
581
589
|
SetTasks(std::move(merge_tasks));
|
582
590
|
}
|
@@ -1,21 +1,15 @@
|
|
1
1
|
#include "duckdb/common/types/data_chunk.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/common/array.hpp"
|
4
|
-
#include "duckdb/common/arrow/arrow.hpp"
|
5
4
|
#include "duckdb/common/exception.hpp"
|
6
5
|
#include "duckdb/common/helper.hpp"
|
7
6
|
#include "duckdb/common/printer.hpp"
|
8
7
|
#include "duckdb/common/serializer.hpp"
|
9
|
-
#include "duckdb/common/
|
10
|
-
#include "duckdb/common/
|
11
|
-
#include "duckdb/common/types/date.hpp"
|
8
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
9
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
12
10
|
#include "duckdb/common/types/interval.hpp"
|
13
|
-
#include "duckdb/common/types/null_value.hpp"
|
14
11
|
#include "duckdb/common/types/sel_cache.hpp"
|
15
|
-
#include "duckdb/common/types/timestamp.hpp"
|
16
|
-
#include "duckdb/common/types/uuid.hpp"
|
17
12
|
#include "duckdb/common/types/vector_cache.hpp"
|
18
|
-
#include "duckdb/common/unordered_map.hpp"
|
19
13
|
#include "duckdb/common/vector.hpp"
|
20
14
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
21
15
|
#include "duckdb/execution/execution_context.hpp"
|
@@ -268,6 +262,48 @@ void DataChunk::Deserialize(Deserializer &source) {
|
|
268
262
|
Verify();
|
269
263
|
}
|
270
264
|
|
265
|
+
void DataChunk::FormatSerialize(FormatSerializer &serializer) const {
|
266
|
+
// write the count
|
267
|
+
auto row_count = size();
|
268
|
+
serializer.WriteProperty<sel_t>(100, "rows", row_count);
|
269
|
+
auto column_count = ColumnCount();
|
270
|
+
|
271
|
+
// Write the types
|
272
|
+
serializer.WriteList(101, "types", column_count,
|
273
|
+
[&](FormatSerializer::List &list, idx_t i) { list.WriteElement(data[i].GetType()); });
|
274
|
+
|
275
|
+
// Write the data
|
276
|
+
serializer.WriteList(102, "columns", column_count, [&](FormatSerializer::List &list, idx_t i) {
|
277
|
+
list.WriteObject([&](FormatSerializer &object) {
|
278
|
+
// Reference the vector to avoid potentially mutating it during serialization
|
279
|
+
Vector serialized_vector(data[i].GetType());
|
280
|
+
serialized_vector.Reference(data[i]);
|
281
|
+
serialized_vector.FormatSerialize(object, row_count);
|
282
|
+
});
|
283
|
+
});
|
284
|
+
}
|
285
|
+
|
286
|
+
void DataChunk::FormatDeserialize(FormatDeserializer &deserializer) {
|
287
|
+
// read the count
|
288
|
+
auto row_count = deserializer.ReadProperty<sel_t>(100, "rows");
|
289
|
+
|
290
|
+
// Read the types
|
291
|
+
vector<LogicalType> types;
|
292
|
+
deserializer.ReadList(101, "types", [&](FormatDeserializer::List &list, idx_t i) {
|
293
|
+
auto type = list.ReadElement<LogicalType>();
|
294
|
+
types.push_back(type);
|
295
|
+
});
|
296
|
+
Initialize(Allocator::DefaultAllocator(), types);
|
297
|
+
|
298
|
+
// now load the column data
|
299
|
+
SetCardinality(row_count);
|
300
|
+
|
301
|
+
// Read the data
|
302
|
+
deserializer.ReadList(102, "columns", [&](FormatDeserializer::List &list, idx_t i) {
|
303
|
+
list.ReadObject([&](FormatDeserializer &object) { data[i].FormatDeserialize(object, row_count); });
|
304
|
+
});
|
305
|
+
}
|
306
|
+
|
271
307
|
void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count_p) {
|
272
308
|
this->count = count_p;
|
273
309
|
SelCache merge_cache;
|
@@ -2,6 +2,9 @@
|
|
2
2
|
|
3
3
|
#include "duckdb/common/exception.hpp"
|
4
4
|
#include "duckdb/common/field_writer.hpp"
|
5
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
6
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
7
|
+
|
5
8
|
#include "hyperloglog.hpp"
|
6
9
|
|
7
10
|
namespace duckdb {
|
@@ -106,6 +109,24 @@ unique_ptr<HyperLogLog> HyperLogLog::Deserialize(FieldReader &reader) {
|
|
106
109
|
return result;
|
107
110
|
}
|
108
111
|
|
112
|
+
void HyperLogLog::FormatSerialize(FormatSerializer &serializer) const {
|
113
|
+
serializer.WriteProperty(100, "type", HLLStorageType::UNCOMPRESSED);
|
114
|
+
serializer.WriteProperty(101, "data", GetPtr(), GetSize());
|
115
|
+
}
|
116
|
+
|
117
|
+
unique_ptr<HyperLogLog> HyperLogLog::FormatDeserialize(FormatDeserializer &deserializer) {
|
118
|
+
auto result = make_uniq<HyperLogLog>();
|
119
|
+
auto storage_type = deserializer.ReadProperty<HLLStorageType>(100, "type");
|
120
|
+
switch (storage_type) {
|
121
|
+
case HLLStorageType::UNCOMPRESSED:
|
122
|
+
deserializer.ReadProperty(101, "data", result->GetPtr(), GetSize());
|
123
|
+
break;
|
124
|
+
default:
|
125
|
+
throw SerializationException("Unknown HyperLogLog storage type!");
|
126
|
+
}
|
127
|
+
return result;
|
128
|
+
}
|
129
|
+
|
109
130
|
//===--------------------------------------------------------------------===//
|
110
131
|
// Vectorized HLL implementation
|
111
132
|
//===--------------------------------------------------------------------===//
|
@@ -11,6 +11,9 @@
|
|
11
11
|
#include "duckdb/common/operator/subtract.hpp"
|
12
12
|
#include "duckdb/common/string_util.hpp"
|
13
13
|
|
14
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
15
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
16
|
+
|
14
17
|
namespace duckdb {
|
15
18
|
|
16
19
|
bool Interval::FromString(const string &str, interval_t &result) {
|