duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  6. package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
  7. package/src/duckdb/extension/json/json_scan.cpp +1 -1
  8. package/src/duckdb/extension/json/json_serializer.cpp +26 -69
  9. package/src/duckdb/src/common/enum_util.cpp +119 -7
  10. package/src/duckdb/src/common/extra_type_info.cpp +7 -3
  11. package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
  12. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  13. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
  14. package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
  15. package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
  16. package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
  17. package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
  18. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  19. package/src/duckdb/src/common/types/interval.cpp +3 -0
  20. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
  21. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  22. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  23. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
  24. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  26. package/src/duckdb/src/common/types/value.cpp +63 -42
  27. package/src/duckdb/src/common/types/vector.cpp +33 -67
  28. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
  30. package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
  31. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
  32. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
  33. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
  34. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  36. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  39. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
  40. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  41. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  42. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
  43. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  44. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  45. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  46. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  47. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  48. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  49. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  50. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
  51. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
  52. package/src/duckdb/src/execution/window_executor.cpp +6 -5
  53. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  54. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  55. package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
  56. package/src/duckdb/src/function/table/read_csv.cpp +150 -136
  57. package/src/duckdb/src/function/table/table_scan.cpp +0 -2
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
  60. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  61. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  62. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  64. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  65. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
  66. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
  67. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
  68. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  69. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
  70. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
  71. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  73. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
  74. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  76. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  77. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  78. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  80. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  81. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
  82. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  83. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  84. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
  85. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
  86. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
  88. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
  90. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
  91. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  92. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  93. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  94. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  95. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
  97. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  98. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  99. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  100. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
  101. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  102. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
  103. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
  104. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
  105. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
  106. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  108. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
  110. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  111. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  112. package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
  113. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  115. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  117. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  119. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
  120. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  121. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
  122. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
  123. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  124. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  125. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  126. package/src/duckdb/src/include/duckdb.h +12 -0
  127. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  128. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  129. package/src/duckdb/src/main/client_verify.cpp +1 -0
  130. package/src/duckdb/src/main/config.cpp +2 -2
  131. package/src/duckdb/src/main/connection.cpp +3 -3
  132. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  133. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  134. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
  135. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  136. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
  137. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
  138. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
  139. package/src/duckdb/src/planner/logical_operator.cpp +1 -1
  140. package/src/duckdb/src/planner/planner.cpp +1 -1
  141. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  142. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
  143. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
  144. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
  145. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
  146. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  147. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
  148. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
  149. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
  150. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
  151. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
  152. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
  153. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  154. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
  155. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  156. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  157. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  158. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  159. package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
  160. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  161. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  162. package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
  163. package/src/duckdb/src/storage/table/row_group.cpp +68 -1
  164. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  165. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  166. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  167. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  168. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  169. package/src/duckdb/ub_src_execution.cpp +0 -2
  170. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  171. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  172. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
  173. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  174. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  175. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  176. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  177. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  178. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  179. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  180. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -11,7 +11,7 @@ namespace duckdb {
11
11
 
12
12
  PartitionGlobalHashGroup::PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions,
13
13
  const Orders &orders, const Types &payload_types, bool external)
14
- : count(0) {
14
+ : count(0), batch_base(0) {
15
15
 
16
16
  RowLayout payload_layout;
17
17
  payload_layout.Initialize(payload_types);
@@ -191,52 +191,45 @@ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_pa
191
191
  grouping_data->Combine(*local_partition);
192
192
  }
193
193
 
194
- void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const {
194
+ PartitionLocalMergeState::PartitionLocalMergeState(PartitionGlobalSinkState &gstate)
195
+ : merge_state(nullptr), stage(PartitionSortStage::INIT), finished(true), executor(gstate.context) {
196
+
195
197
  // Set up the sort expression computation.
196
198
  vector<LogicalType> sort_types;
197
- ExpressionExecutor executor(context);
198
- for (auto &order : orders) {
199
+ for (auto &order : gstate.orders) {
199
200
  auto &oexpr = order.expression;
200
201
  sort_types.emplace_back(oexpr->return_type);
201
202
  executor.AddExpression(*oexpr);
202
203
  }
203
- DataChunk sort_chunk;
204
- sort_chunk.Initialize(allocator, sort_types);
204
+ sort_chunk.Initialize(gstate.allocator, sort_types);
205
+ payload_chunk.Initialize(gstate.allocator, gstate.payload_types);
206
+ }
205
207
 
208
+ void PartitionLocalMergeState::Scan() {
209
+ auto &group_data = *merge_state->group_data;
210
+ auto &hash_group = *merge_state->hash_group;
211
+ auto &chunk_state = merge_state->chunk_state;
206
212
  // Copy the data from the group into the sort code.
213
+ auto &global_sort = *hash_group.global_sort;
207
214
  LocalSortState local_sort;
208
215
  local_sort.Initialize(global_sort, global_sort.buffer_manager);
209
216
 
210
- // Strip hash column
211
- DataChunk payload_chunk;
212
- payload_chunk.Initialize(allocator, payload_types);
213
-
214
- vector<column_t> column_ids;
215
- column_ids.reserve(payload_types.size());
216
- for (column_t i = 0; i < payload_types.size(); ++i) {
217
- column_ids.emplace_back(i);
218
- }
219
- TupleDataScanState chunk_state;
220
- group_data.InitializeScan(chunk_state, column_ids);
221
- while (group_data.Scan(chunk_state, payload_chunk)) {
217
+ TupleDataScanState local_scan;
218
+ group_data.InitializeScan(local_scan, merge_state->column_ids);
219
+ while (group_data.Scan(chunk_state, local_scan, payload_chunk)) {
222
220
  sort_chunk.Reset();
223
221
  executor.Execute(payload_chunk, sort_chunk);
224
222
 
225
223
  local_sort.SinkChunk(sort_chunk, payload_chunk);
226
- if (local_sort.SizeInBytes() > memory_per_thread) {
224
+ if (local_sort.SizeInBytes() > merge_state->memory_per_thread) {
227
225
  local_sort.Sort(global_sort, true);
228
226
  }
227
+ hash_group.count += payload_chunk.size();
229
228
  }
230
229
 
231
230
  global_sort.AddLocalState(local_sort);
232
231
  }
233
232
 
234
- void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
235
- BuildSortState(group_data, *hash_group.global_sort);
236
-
237
- hash_group.count += group_data.Count();
238
- }
239
-
240
233
  // Per-thread sink state
241
234
  PartitionLocalSinkState::PartitionLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p)
242
235
  : gstate(gstate_p), allocator(Allocator::Get(context)), executor(context) {
@@ -349,10 +342,11 @@ void PartitionLocalSinkState::Combine() {
349
342
  gstate.CombineLocalPartition(local_partition, local_append);
350
343
  }
351
344
 
352
- PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data,
345
+ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data_p,
353
346
  hash_t hash_bin)
354
- : sink(sink), group_data(std::move(group_data)), stage(PartitionSortStage::INIT), total_tasks(0), tasks_assigned(0),
355
- tasks_completed(0) {
347
+ : sink(sink), group_data(std::move(group_data_p)), memory_per_thread(sink.memory_per_thread),
348
+ num_threads(TaskScheduler::GetScheduler(sink.context).NumberOfThreads()), stage(PartitionSortStage::INIT),
349
+ total_tasks(0), tasks_assigned(0), tasks_completed(0) {
356
350
 
357
351
  const auto group_idx = sink.hash_groups.size();
358
352
  auto new_group = make_uniq<PartitionGlobalHashGroup>(sink.buffer_manager, sink.partitions, sink.orders,
@@ -363,13 +357,18 @@ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &s
363
357
  global_sort = sink.hash_groups[group_idx]->global_sort.get();
364
358
 
365
359
  sink.bin_groups[hash_bin] = group_idx;
360
+
361
+ column_ids.reserve(sink.payload_types.size());
362
+ for (column_t i = 0; i < sink.payload_types.size(); ++i) {
363
+ column_ids.emplace_back(i);
364
+ }
365
+ group_data->InitializeScan(chunk_state, column_ids);
366
366
  }
367
367
 
368
368
  void PartitionLocalMergeState::Prepare() {
369
- auto &global_sort = *merge_state->global_sort;
370
- merge_state->sink.BuildSortState(*merge_state->group_data, *merge_state->hash_group);
371
369
  merge_state->group_data.reset();
372
370
 
371
+ auto &global_sort = *merge_state->global_sort;
373
372
  global_sort.PrepareMergePhase();
374
373
  }
375
374
 
@@ -381,6 +380,9 @@ void PartitionLocalMergeState::Merge() {
381
380
 
382
381
  void PartitionLocalMergeState::ExecuteTask() {
383
382
  switch (stage) {
383
+ case PartitionSortStage::SCAN:
384
+ Scan();
385
+ break;
384
386
  case PartitionSortStage::PREPARE:
385
387
  Prepare();
386
388
  break;
@@ -427,6 +429,11 @@ bool PartitionGlobalMergeState::TryPrepareNextStage() {
427
429
 
428
430
  switch (stage) {
429
431
  case PartitionSortStage::INIT:
432
+ total_tasks = num_threads;
433
+ stage = PartitionSortStage::SCAN;
434
+ return true;
435
+
436
+ case PartitionSortStage::SCAN:
430
437
  total_tasks = 1;
431
438
  stage = PartitionSortStage::PREPARE;
432
439
  return true;
@@ -474,8 +481,9 @@ PartitionGlobalMergeStates::PartitionGlobalMergeStates(PartitionGlobalSinkState
474
481
 
475
482
  class PartitionMergeTask : public ExecutorTask {
476
483
  public:
477
- PartitionMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, PartitionGlobalMergeStates &hash_groups_p)
478
- : ExecutorTask(context_p), event(std::move(event_p)), hash_groups(hash_groups_p) {
484
+ PartitionMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, PartitionGlobalMergeStates &hash_groups_p,
485
+ PartitionGlobalSinkState &gstate)
486
+ : ExecutorTask(context_p), event(std::move(event_p)), local_state(gstate), hash_groups(hash_groups_p) {
479
487
  }
480
488
 
481
489
  TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
@@ -576,7 +584,7 @@ void PartitionMergeEvent::Schedule() {
576
584
 
577
585
  vector<shared_ptr<Task>> merge_tasks;
578
586
  for (idx_t tnum = 0; tnum < num_threads; tnum++) {
579
- merge_tasks.emplace_back(make_uniq<PartitionMergeTask>(shared_from_this(), context, merge_states));
587
+ merge_tasks.emplace_back(make_uniq<PartitionMergeTask>(shared_from_this(), context, merge_states, gstate));
580
588
  }
581
589
  SetTasks(std::move(merge_tasks));
582
590
  }
@@ -1,21 +1,15 @@
1
1
  #include "duckdb/common/types/data_chunk.hpp"
2
2
 
3
3
  #include "duckdb/common/array.hpp"
4
- #include "duckdb/common/arrow/arrow.hpp"
5
4
  #include "duckdb/common/exception.hpp"
6
5
  #include "duckdb/common/helper.hpp"
7
6
  #include "duckdb/common/printer.hpp"
8
7
  #include "duckdb/common/serializer.hpp"
9
- #include "duckdb/common/to_string.hpp"
10
- #include "duckdb/common/types/arrow_aux_data.hpp"
11
- #include "duckdb/common/types/date.hpp"
8
+ #include "duckdb/common/serializer/format_serializer.hpp"
9
+ #include "duckdb/common/serializer/format_deserializer.hpp"
12
10
  #include "duckdb/common/types/interval.hpp"
13
- #include "duckdb/common/types/null_value.hpp"
14
11
  #include "duckdb/common/types/sel_cache.hpp"
15
- #include "duckdb/common/types/timestamp.hpp"
16
- #include "duckdb/common/types/uuid.hpp"
17
12
  #include "duckdb/common/types/vector_cache.hpp"
18
- #include "duckdb/common/unordered_map.hpp"
19
13
  #include "duckdb/common/vector.hpp"
20
14
  #include "duckdb/common/vector_operations/vector_operations.hpp"
21
15
  #include "duckdb/execution/execution_context.hpp"
@@ -268,6 +262,48 @@ void DataChunk::Deserialize(Deserializer &source) {
268
262
  Verify();
269
263
  }
270
264
 
265
+ void DataChunk::FormatSerialize(FormatSerializer &serializer) const {
266
+ // write the count
267
+ auto row_count = size();
268
+ serializer.WriteProperty<sel_t>(100, "rows", row_count);
269
+ auto column_count = ColumnCount();
270
+
271
+ // Write the types
272
+ serializer.WriteList(101, "types", column_count,
273
+ [&](FormatSerializer::List &list, idx_t i) { list.WriteElement(data[i].GetType()); });
274
+
275
+ // Write the data
276
+ serializer.WriteList(102, "columns", column_count, [&](FormatSerializer::List &list, idx_t i) {
277
+ list.WriteObject([&](FormatSerializer &object) {
278
+ // Reference the vector to avoid potentially mutating it during serialization
279
+ Vector serialized_vector(data[i].GetType());
280
+ serialized_vector.Reference(data[i]);
281
+ serialized_vector.FormatSerialize(object, row_count);
282
+ });
283
+ });
284
+ }
285
+
286
+ void DataChunk::FormatDeserialize(FormatDeserializer &deserializer) {
287
+ // read the count
288
+ auto row_count = deserializer.ReadProperty<sel_t>(100, "rows");
289
+
290
+ // Read the types
291
+ vector<LogicalType> types;
292
+ deserializer.ReadList(101, "types", [&](FormatDeserializer::List &list, idx_t i) {
293
+ auto type = list.ReadElement<LogicalType>();
294
+ types.push_back(type);
295
+ });
296
+ Initialize(Allocator::DefaultAllocator(), types);
297
+
298
+ // now load the column data
299
+ SetCardinality(row_count);
300
+
301
+ // Read the data
302
+ deserializer.ReadList(102, "columns", [&](FormatDeserializer::List &list, idx_t i) {
303
+ list.ReadObject([&](FormatDeserializer &object) { data[i].FormatDeserialize(object, row_count); });
304
+ });
305
+ }
306
+
271
307
  void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count_p) {
272
308
  this->count = count_p;
273
309
  SelCache merge_cache;
@@ -2,6 +2,9 @@
2
2
 
3
3
  #include "duckdb/common/exception.hpp"
4
4
  #include "duckdb/common/field_writer.hpp"
5
+ #include "duckdb/common/serializer/format_serializer.hpp"
6
+ #include "duckdb/common/serializer/format_deserializer.hpp"
7
+
5
8
  #include "hyperloglog.hpp"
6
9
 
7
10
  namespace duckdb {
@@ -106,6 +109,24 @@ unique_ptr<HyperLogLog> HyperLogLog::Deserialize(FieldReader &reader) {
106
109
  return result;
107
110
  }
108
111
 
112
+ void HyperLogLog::FormatSerialize(FormatSerializer &serializer) const {
113
+ serializer.WriteProperty(100, "type", HLLStorageType::UNCOMPRESSED);
114
+ serializer.WriteProperty(101, "data", GetPtr(), GetSize());
115
+ }
116
+
117
+ unique_ptr<HyperLogLog> HyperLogLog::FormatDeserialize(FormatDeserializer &deserializer) {
118
+ auto result = make_uniq<HyperLogLog>();
119
+ auto storage_type = deserializer.ReadProperty<HLLStorageType>(100, "type");
120
+ switch (storage_type) {
121
+ case HLLStorageType::UNCOMPRESSED:
122
+ deserializer.ReadProperty(101, "data", result->GetPtr(), GetSize());
123
+ break;
124
+ default:
125
+ throw SerializationException("Unknown HyperLogLog storage type!");
126
+ }
127
+ return result;
128
+ }
129
+
109
130
  //===--------------------------------------------------------------------===//
110
131
  // Vectorized HLL implementation
111
132
  //===--------------------------------------------------------------------===//
@@ -11,6 +11,9 @@
11
11
  #include "duckdb/common/operator/subtract.hpp"
12
12
  #include "duckdb/common/string_util.hpp"
13
13
 
14
+ #include "duckdb/common/serializer/format_serializer.hpp"
15
+ #include "duckdb/common/serializer/format_deserializer.hpp"
16
+
14
17
  namespace duckdb {
15
18
 
16
19
  bool Interval::FromString(const string &str, interval_t &result) {