duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  6. package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
  7. package/src/duckdb/extension/json/json_scan.cpp +1 -1
  8. package/src/duckdb/extension/json/json_serializer.cpp +26 -69
  9. package/src/duckdb/src/common/enum_util.cpp +119 -7
  10. package/src/duckdb/src/common/extra_type_info.cpp +7 -3
  11. package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
  12. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  13. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
  14. package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
  15. package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
  16. package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
  17. package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
  18. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  19. package/src/duckdb/src/common/types/interval.cpp +3 -0
  20. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
  21. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  22. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  23. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
  24. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  26. package/src/duckdb/src/common/types/value.cpp +63 -42
  27. package/src/duckdb/src/common/types/vector.cpp +33 -67
  28. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
  30. package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
  31. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
  32. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
  33. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
  34. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  36. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  39. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
  40. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  41. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  42. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
  43. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  44. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  45. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  46. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  47. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  48. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  49. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  50. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
  51. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
  52. package/src/duckdb/src/execution/window_executor.cpp +6 -5
  53. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  54. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  55. package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
  56. package/src/duckdb/src/function/table/read_csv.cpp +150 -136
  57. package/src/duckdb/src/function/table/table_scan.cpp +0 -2
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
  60. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  61. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  62. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  64. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  65. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
  66. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
  67. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
  68. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  69. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
  70. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
  71. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  73. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
  74. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  76. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  77. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  78. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  80. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  81. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
  82. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  83. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  84. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
  85. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
  86. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
  88. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
  90. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
  91. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  92. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  93. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  94. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  95. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
  97. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  98. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  99. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  100. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
  101. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  102. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
  103. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
  104. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
  105. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
  106. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  108. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
  110. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  111. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  112. package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
  113. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  115. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  117. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  119. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
  120. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  121. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
  122. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
  123. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  124. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  125. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  126. package/src/duckdb/src/include/duckdb.h +12 -0
  127. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  128. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  129. package/src/duckdb/src/main/client_verify.cpp +1 -0
  130. package/src/duckdb/src/main/config.cpp +2 -2
  131. package/src/duckdb/src/main/connection.cpp +3 -3
  132. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  133. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  134. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
  135. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  136. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
  137. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
  138. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
  139. package/src/duckdb/src/planner/logical_operator.cpp +1 -1
  140. package/src/duckdb/src/planner/planner.cpp +1 -1
  141. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  142. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
  143. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
  144. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
  145. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
  146. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  147. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
  148. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
  149. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
  150. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
  151. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
  152. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
  153. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  154. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
  155. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  156. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  157. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  158. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  159. package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
  160. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  161. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  162. package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
  163. package/src/duckdb/src/storage/table/row_group.cpp +68 -1
  164. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  165. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  166. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  167. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  168. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  169. package/src/duckdb/ub_src_execution.cpp +0 -2
  170. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  171. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  172. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
  173. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  174. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  175. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  176. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  177. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  178. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  179. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  180. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -26,9 +26,11 @@ public:
26
26
  };
27
27
 
28
28
  template <class OP, class RETURN_TYPE, typename... ARGS>
29
- RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
29
+ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
30
30
  D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
31
31
  switch (radix_bits) {
32
+ case 0:
33
+ return OP::template Operation<0>(std::forward<ARGS>(args)...);
32
34
  case 1:
33
35
  return OP::template Operation<1>(std::forward<ARGS>(args)...);
34
36
  case 2:
@@ -82,36 +84,6 @@ idx_t RadixPartitioning::Select(Vector &hashes, const SelectionVector *sel, idx_
82
84
  return RadixBitsSwitch<SelectFunctor, idx_t>(radix_bits, hashes, sel, count, cutoff, true_sel, false_sel);
83
85
  }
84
86
 
85
- struct HashsToBinsFunctor {
86
- template <idx_t radix_bits>
87
- static void Operation(Vector &hashes, Vector &bins, idx_t count) {
88
- using CONSTANTS = RadixPartitioningConstants<radix_bits>;
89
- UnaryExecutor::Execute<hash_t, hash_t>(hashes, bins, count,
90
- [&](hash_t hash) { return CONSTANTS::ApplyMask(hash); });
91
- }
92
- };
93
-
94
- //===--------------------------------------------------------------------===//
95
- // Row Data Partitioning
96
- //===--------------------------------------------------------------------===//
97
- template <idx_t radix_bits>
98
- static void InitPartitions(BufferManager &buffer_manager, vector<unique_ptr<RowDataCollection>> &partition_collections,
99
- RowDataBlock *partition_blocks[], vector<BufferHandle> &partition_handles,
100
- data_ptr_t partition_ptrs[], idx_t block_capacity, idx_t row_width) {
101
- using CONSTANTS = RadixPartitioningConstants<radix_bits>;
102
-
103
- partition_collections.reserve(CONSTANTS::NUM_PARTITIONS);
104
- partition_handles.reserve(CONSTANTS::NUM_PARTITIONS);
105
- for (idx_t i = 0; i < CONSTANTS::NUM_PARTITIONS; i++) {
106
- partition_collections.push_back(make_uniq<RowDataCollection>(buffer_manager, block_capacity, row_width));
107
- partition_blocks[i] = &partition_collections[i]->CreateBlock();
108
- partition_handles.push_back(buffer_manager.Pin(partition_blocks[i]->block));
109
- if (partition_ptrs) {
110
- partition_ptrs[i] = partition_handles[i].Ptr();
111
- }
112
- }
113
- }
114
-
115
87
  struct ComputePartitionIndicesFunctor {
116
88
  template <idx_t radix_bits>
117
89
  static void Operation(Vector &hashes, Vector &partition_indices, idx_t count) {
@@ -129,6 +101,7 @@ RadixPartitionedColumnData::RadixPartitionedColumnData(ClientContext &context_p,
129
101
  idx_t radix_bits_p, idx_t hash_col_idx_p)
130
102
  : PartitionedColumnData(PartitionedColumnDataType::RADIX, context_p, std::move(types_p)), radix_bits(radix_bits_p),
131
103
  hash_col_idx(hash_col_idx_p) {
104
+ D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
132
105
  D_ASSERT(hash_col_idx < types.size());
133
106
  const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
134
107
  allocators->allocators.reserve(num_partitions);
@@ -173,6 +146,7 @@ RadixPartitionedTupleData::RadixPartitionedTupleData(BufferManager &buffer_manag
173
146
  idx_t radix_bits_p, idx_t hash_col_idx_p)
174
147
  : PartitionedTupleData(PartitionedTupleDataType::RADIX, buffer_manager, layout_p.Copy()), radix_bits(radix_bits_p),
175
148
  hash_col_idx(hash_col_idx_p) {
149
+ D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
176
150
  D_ASSERT(hash_col_idx < layout.GetTypes().size());
177
151
  const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
178
152
  allocators->allocators.reserve(num_partitions);
@@ -215,6 +189,9 @@ void RadixPartitionedTupleData::InitializeAppendStateInternal(PartitionedTupleDa
215
189
  column_ids.emplace_back(col_idx);
216
190
  }
217
191
  partitions[0]->InitializeAppend(state.chunk_state, std::move(column_ids));
192
+
193
+ // Initialize fixed-size map
194
+ state.fixed_partition_entries.resize(RadixPartitioning::NumberOfPartitions(radix_bits));
218
195
  }
219
196
 
220
197
  void RadixPartitionedTupleData::ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) {
@@ -76,6 +76,10 @@ void RowOperations::CombineStates(RowOperationsState &state, TupleDataLayout &la
76
76
  // Move to the first aggregate states
77
77
  VectorOperations::AddInPlace(sources, layout.GetAggrOffset(), count);
78
78
  VectorOperations::AddInPlace(targets, layout.GetAggrOffset(), count);
79
+
80
+ // Keep track of the offset
81
+ idx_t offset = layout.GetAggrOffset();
82
+
79
83
  for (auto &aggr : layout.GetAggregates()) {
80
84
  D_ASSERT(aggr.function.combine);
81
85
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), state.allocator);
@@ -84,23 +88,34 @@ void RowOperations::CombineStates(RowOperationsState &state, TupleDataLayout &la
84
88
  // Move to the next aggregate states
85
89
  VectorOperations::AddInPlace(sources, aggr.payload_size, count);
86
90
  VectorOperations::AddInPlace(targets, aggr.payload_size, count);
91
+
92
+ // Increment the offset
93
+ offset += aggr.payload_size;
87
94
  }
95
+
96
+ // Now subtract the offset to get back to the original position
97
+ VectorOperations::AddInPlace(sources, -offset, count);
98
+ VectorOperations::AddInPlace(targets, -offset, count);
88
99
  }
89
100
 
90
101
  void RowOperations::FinalizeStates(RowOperationsState &state, TupleDataLayout &layout, Vector &addresses,
91
102
  DataChunk &result, idx_t aggr_idx) {
103
+ // Copy the addresses
104
+ Vector addresses_copy(LogicalType::POINTER);
105
+ VectorOperations::Copy(addresses, addresses_copy, result.size(), 0, 0);
106
+
92
107
  // Move to the first aggregate state
93
- VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size());
108
+ VectorOperations::AddInPlace(addresses_copy, layout.GetAggrOffset(), result.size());
94
109
 
95
110
  auto &aggregates = layout.GetAggregates();
96
111
  for (idx_t i = 0; i < aggregates.size(); i++) {
97
112
  auto &target = result.data[aggr_idx + i];
98
113
  auto &aggr = aggregates[i];
99
114
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), state.allocator);
100
- aggr.function.finalize(addresses, aggr_input_data, target, result.size(), 0);
115
+ aggr.function.finalize(addresses_copy, aggr_input_data, target, result.size(), 0);
101
116
 
102
117
  // Move to the next aggregate state
103
- VectorOperations::AddInPlace(addresses, aggr.payload_size, result.size());
118
+ VectorOperations::AddInPlace(addresses_copy, aggr.payload_size, result.size());
104
119
  }
105
120
  }
106
121
 
@@ -2,147 +2,132 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- void BinaryDeserializer::SetTag(const field_id_t field_id, const char *tag) {
6
- current_field_id = field_id;
7
- current_tag = tag;
8
- stack.back().read_field_count++;
9
- if (stack.back().read_field_count > stack.back().expected_field_count) {
10
- throw SerializationException("Attempting to read a required field, but field is missing");
5
+ //-------------------------------------------------------------------------
6
+ // Nested Type Hooks
7
+ //-------------------------------------------------------------------------
8
+ void BinaryDeserializer::OnPropertyBegin(const field_id_t field_id, const char *) {
9
+ auto field = NextField();
10
+ if (field != field_id) {
11
+ throw InternalException("Failed to deserialize: field id mismatch, expected: %d, got: %d", field_id, field);
11
12
  }
12
13
  }
13
14
 
14
- //===--------------------------------------------------------------------===//
15
- // Nested Types Hooks
16
- //===--------------------------------------------------------------------===//
17
- void BinaryDeserializer::OnObjectBegin() {
18
- auto expected_field_id = ReadPrimitive<field_id_t>();
19
- auto expected_field_count = ReadPrimitive<uint32_t>();
20
- auto expected_size = ReadPrimitive<uint64_t>();
21
- D_ASSERT(expected_field_count > 0);
22
- D_ASSERT(expected_size > 0);
23
- D_ASSERT(expected_field_id == current_field_id);
24
- stack.emplace_back(expected_field_count, expected_size, expected_field_id);
15
+ void BinaryDeserializer::OnPropertyEnd() {
25
16
  }
26
17
 
27
- void BinaryDeserializer::OnObjectEnd() {
28
- auto &frame = stack.back();
29
- if (frame.read_field_count < frame.expected_field_count) {
30
- throw SerializationException("Not all fields were read. This file might have been written with a newer version "
31
- "of DuckDB and is incompatible with this version of DuckDB.");
18
+ bool BinaryDeserializer::OnOptionalPropertyBegin(const field_id_t field_id, const char *s) {
19
+ auto next_field = PeekField();
20
+ auto present = next_field == field_id;
21
+ if (present) {
22
+ ConsumeField();
32
23
  }
33
- stack.pop_back();
34
- }
35
-
36
- idx_t BinaryDeserializer::OnListBegin() {
37
- return ReadPrimitive<idx_t>();
38
- }
39
-
40
- void BinaryDeserializer::OnListEnd() {
41
- }
42
-
43
- // Deserialize maps as [ { key: ..., value: ... } ]
44
- idx_t BinaryDeserializer::OnMapBegin() {
45
- return ReadPrimitive<idx_t>();
46
- }
47
-
48
- void BinaryDeserializer::OnMapEntryBegin() {
49
- }
50
-
51
- void BinaryDeserializer::OnMapKeyBegin() {
52
- }
53
-
54
- void BinaryDeserializer::OnMapValueBegin() {
24
+ return present;
55
25
  }
56
26
 
57
- void BinaryDeserializer::OnMapEntryEnd() {
27
+ void BinaryDeserializer::OnOptionalPropertyEnd(bool present) {
58
28
  }
59
29
 
60
- void BinaryDeserializer::OnMapEnd() {
30
+ void BinaryDeserializer::OnObjectBegin() {
31
+ nesting_level++;
61
32
  }
62
33
 
63
- void BinaryDeserializer::OnPairBegin() {
34
+ void BinaryDeserializer::OnObjectEnd() {
35
+ auto next_field = NextField();
36
+ if (next_field != MESSAGE_TERMINATOR_FIELD_ID) {
37
+ throw InternalException("Failed to deserialize: expected end of object, but found field id: %d", next_field);
38
+ }
39
+ nesting_level--;
64
40
  }
65
41
 
66
- void BinaryDeserializer::OnPairKeyBegin() {
42
+ idx_t BinaryDeserializer::OnListBegin() {
43
+ return VarIntDecode<idx_t>();
67
44
  }
68
45
 
69
- void BinaryDeserializer::OnPairValueBegin() {
46
+ void BinaryDeserializer::OnListEnd() {
70
47
  }
71
48
 
72
- void BinaryDeserializer::OnPairEnd() {
49
+ bool BinaryDeserializer::OnNullableBegin() {
50
+ return ReadBool();
73
51
  }
74
52
 
75
- bool BinaryDeserializer::OnOptionalBegin() {
76
- return ReadPrimitive<bool>();
53
+ void BinaryDeserializer::OnNullableEnd() {
77
54
  }
78
55
 
79
- //===--------------------------------------------------------------------===//
56
+ //-------------------------------------------------------------------------
80
57
  // Primitive Types
81
- //===--------------------------------------------------------------------===//
58
+ //-------------------------------------------------------------------------
82
59
  bool BinaryDeserializer::ReadBool() {
83
- return ReadPrimitive<bool>();
60
+ return static_cast<bool>(ReadPrimitive<uint8_t>());
61
+ }
62
+
63
+ char BinaryDeserializer::ReadChar() {
64
+ return ReadPrimitive<char>();
84
65
  }
85
66
 
86
67
  int8_t BinaryDeserializer::ReadSignedInt8() {
87
- return ReadPrimitive<int8_t>();
68
+ return VarIntDecode<int8_t>();
88
69
  }
89
70
 
90
71
  uint8_t BinaryDeserializer::ReadUnsignedInt8() {
91
- return ReadPrimitive<uint8_t>();
72
+ return VarIntDecode<uint8_t>();
92
73
  }
93
74
 
94
75
  int16_t BinaryDeserializer::ReadSignedInt16() {
95
- return ReadPrimitive<int16_t>();
76
+ return VarIntDecode<int16_t>();
96
77
  }
97
78
 
98
79
  uint16_t BinaryDeserializer::ReadUnsignedInt16() {
99
- return ReadPrimitive<uint16_t>();
80
+ return VarIntDecode<uint16_t>();
100
81
  }
101
82
 
102
83
  int32_t BinaryDeserializer::ReadSignedInt32() {
103
- return ReadPrimitive<int32_t>();
84
+ return VarIntDecode<int32_t>();
104
85
  }
105
86
 
106
87
  uint32_t BinaryDeserializer::ReadUnsignedInt32() {
107
- return ReadPrimitive<uint32_t>();
88
+ return VarIntDecode<uint32_t>();
108
89
  }
109
90
 
110
91
  int64_t BinaryDeserializer::ReadSignedInt64() {
111
- return ReadPrimitive<int64_t>();
92
+ return VarIntDecode<int64_t>();
112
93
  }
113
94
 
114
95
  uint64_t BinaryDeserializer::ReadUnsignedInt64() {
115
- return ReadPrimitive<uint64_t>();
96
+ return VarIntDecode<uint64_t>();
116
97
  }
117
98
 
118
99
  float BinaryDeserializer::ReadFloat() {
119
- return ReadPrimitive<float>();
100
+ auto value = ReadPrimitive<float>();
101
+ return value;
120
102
  }
121
103
 
122
104
  double BinaryDeserializer::ReadDouble() {
123
- return ReadPrimitive<double>();
105
+ auto value = ReadPrimitive<double>();
106
+ return value;
124
107
  }
125
108
 
126
109
  string BinaryDeserializer::ReadString() {
127
- uint32_t size = ReadPrimitive<uint32_t>();
128
- if (size == 0) {
110
+ auto len = VarIntDecode<uint32_t>();
111
+ if (len == 0) {
129
112
  return string();
130
113
  }
131
- auto buffer = make_unsafe_uniq_array<data_t>(size);
132
- ReadData(buffer.get(), size);
133
- return string(const_char_ptr_cast(buffer.get()), size);
134
- }
135
-
136
- interval_t BinaryDeserializer::ReadInterval() {
137
- return ReadPrimitive<interval_t>();
114
+ auto buffer = make_unsafe_uniq_array<data_t>(len);
115
+ ReadData(buffer.get(), len);
116
+ return string(const_char_ptr_cast(buffer.get()), len);
138
117
  }
139
118
 
140
119
  hugeint_t BinaryDeserializer::ReadHugeInt() {
141
- return ReadPrimitive<hugeint_t>();
120
+ auto upper = VarIntDecode<int64_t>();
121
+ auto lower = VarIntDecode<uint64_t>();
122
+ return hugeint_t(upper, lower);
142
123
  }
143
124
 
144
- void BinaryDeserializer::ReadDataPtr(data_ptr_t &ptr, idx_t count) {
145
- ReadData(ptr, count);
125
+ void BinaryDeserializer::ReadDataPtr(data_ptr_t &ptr_p, idx_t count) {
126
+ auto len = VarIntDecode<uint64_t>();
127
+ if (len != count) {
128
+ throw SerializationException("Tried to read blob of %d size, but only %d elements are available", count, len);
129
+ }
130
+ ReadData(ptr_p, count);
146
131
  }
147
132
 
148
133
  } // namespace duckdb
@@ -1,121 +1,134 @@
1
1
  #include "duckdb/common/serializer/binary_serializer.hpp"
2
2
 
3
- namespace duckdb {
4
-
5
- void BinarySerializer::SetTag(const field_id_t field_id, const char *tag) {
6
- current_field_id = field_id;
7
- current_tag = tag;
8
- // Increment the number of fields
9
- stack.back().field_count++;
10
- }
11
-
12
- //===--------------------------------------------------------------------===//
13
- // Nested types
14
- //===--------------------------------------------------------------------===//
15
- void BinarySerializer::OnOptionalBegin(bool present) {
16
- Write(present);
17
- }
18
-
19
- void BinarySerializer::OnListBegin(idx_t count) {
20
- Write(count);
21
- }
22
-
23
- void BinarySerializer::OnListEnd(idx_t count) {
24
- }
3
+ #ifdef DEBUG
4
+ #include "duckdb/common/string_util.hpp"
5
+ #endif
25
6
 
26
- // Serialize maps as arrays of objects with "key" and "value" properties.
27
- void BinarySerializer::OnMapBegin(idx_t count) {
28
- Write(count);
29
- }
7
+ namespace duckdb {
30
8
 
31
- void BinarySerializer::OnMapEntryBegin() {
32
- }
9
+ void BinarySerializer::OnPropertyBegin(const field_id_t field_id, const char *tag) {
10
+ // Just write the field id straight up
11
+ Write<field_id_t>(field_id);
12
+ #ifdef DEBUG
13
+ // Check that the tag is unique
14
+ auto &state = debug_stack.back();
15
+ auto &seen_field_ids = state.seen_field_ids;
16
+ auto &seen_field_tags = state.seen_field_tags;
17
+ auto &seen_fields = state.seen_fields;
18
+
19
+ if (seen_field_ids.find(field_id) != seen_field_ids.end() || seen_field_tags.find(tag) != seen_field_tags.end()) {
20
+ string all_fields;
21
+ for (auto &field : seen_fields) {
22
+ all_fields += StringUtil::Format("\"%s\":%d ", field.first, field.second);
23
+ }
24
+ throw InternalException("Duplicate field id/tag in field: \"%s\":%d, other fields: %s", tag, field_id,
25
+ all_fields);
26
+ }
33
27
 
34
- void BinarySerializer::OnMapKeyBegin() {
28
+ seen_field_ids.insert(field_id);
29
+ seen_field_tags.insert(tag);
30
+ seen_fields.emplace_back(tag, field_id);
31
+ #else
32
+ (void)tag;
33
+ #endif
35
34
  }
36
35
 
37
- void BinarySerializer::OnMapValueBegin() {
36
+ void BinarySerializer::OnPropertyEnd() {
37
+ // Nothing to do here
38
38
  }
39
39
 
40
- void BinarySerializer::OnMapEntryEnd() {
40
+ void BinarySerializer::OnOptionalPropertyBegin(const field_id_t field_id, const char *tag, bool present) {
41
+ // Dont write anything at all if the property is not present
42
+ if (present) {
43
+ OnPropertyBegin(field_id, tag);
44
+ }
41
45
  }
42
46
 
43
- void BinarySerializer::OnMapEnd(idx_t count) {
47
+ void BinarySerializer::OnOptionalPropertyEnd(bool present) {
48
+ // Nothing to do here
44
49
  }
45
50
 
51
+ //-------------------------------------------------------------------------
52
+ // Nested Type Hooks
53
+ //-------------------------------------------------------------------------
46
54
  void BinarySerializer::OnObjectBegin() {
47
- stack.push_back(State({0, 0, data.size()}));
48
- // Store the field id
49
- Write<field_id_t>(current_field_id);
50
- // Store the offset so we can patch the field count and size later
51
- Write<uint32_t>(0); // Placeholder for the field count
52
- Write<uint64_t>(0); // Placeholder for the size
55
+ #ifdef DEBUG
56
+ debug_stack.emplace_back();
57
+ #endif
53
58
  }
54
59
 
55
60
  void BinarySerializer::OnObjectEnd() {
56
- auto &frame = stack.back();
57
- // Patch the field count and size
58
- auto ptr = &data[frame.offset];
59
- ptr += sizeof(field_id_t); // Skip the field id
60
- Store<uint32_t>(frame.field_count, ptr);
61
- ptr += sizeof(uint32_t); // Skip the field count
62
- Store<uint64_t>(frame.size, ptr);
63
- stack.pop_back();
61
+ #ifdef DEBUG
62
+ debug_stack.pop_back();
63
+ #endif
64
+ // Write object terminator
65
+ Write<field_id_t>(MESSAGE_TERMINATOR_FIELD_ID);
64
66
  }
65
67
 
66
- void BinarySerializer::OnPairBegin() {
68
+ void BinarySerializer::OnListBegin(idx_t count) {
69
+ VarIntEncode(count);
67
70
  }
68
71
 
69
- void BinarySerializer::OnPairKeyBegin() {
72
+ void BinarySerializer::OnListEnd() {
70
73
  }
71
74
 
72
- void BinarySerializer::OnPairValueBegin() {
75
+ void BinarySerializer::OnNullableBegin(bool present) {
76
+ WriteValue(present);
73
77
  }
74
78
 
75
- void BinarySerializer::OnPairEnd() {
79
+ void BinarySerializer::OnNullableEnd() {
76
80
  }
77
81
 
78
- //===--------------------------------------------------------------------===//
79
- // Primitive types
80
- //===--------------------------------------------------------------------===//
82
+ //-------------------------------------------------------------------------
83
+ // Primitive Types
84
+ //-------------------------------------------------------------------------
81
85
  void BinarySerializer::WriteNull() {
82
86
  // This should never be called, optional writes should be handled by OnOptionalBegin
83
87
  }
84
88
 
89
+ void BinarySerializer::WriteValue(bool value) {
90
+ Write<uint8_t>(value);
91
+ }
92
+
85
93
  void BinarySerializer::WriteValue(uint8_t value) {
94
+ VarIntEncode(value);
95
+ }
96
+
97
+ void BinarySerializer::WriteValue(char value) {
86
98
  Write(value);
87
99
  }
88
100
 
89
101
  void BinarySerializer::WriteValue(int8_t value) {
90
- Write(value);
102
+ VarIntEncode(value);
91
103
  }
92
104
 
93
105
  void BinarySerializer::WriteValue(uint16_t value) {
94
- Write(value);
106
+ VarIntEncode(value);
95
107
  }
96
108
 
97
109
  void BinarySerializer::WriteValue(int16_t value) {
98
- Write(value);
110
+ VarIntEncode(value);
99
111
  }
100
112
 
101
113
  void BinarySerializer::WriteValue(uint32_t value) {
102
- Write(value);
114
+ VarIntEncode(value);
103
115
  }
104
116
 
105
117
  void BinarySerializer::WriteValue(int32_t value) {
106
- Write(value);
118
+ VarIntEncode(value);
107
119
  }
108
120
 
109
121
  void BinarySerializer::WriteValue(uint64_t value) {
110
- Write(value);
122
+ VarIntEncode(value);
111
123
  }
112
124
 
113
125
  void BinarySerializer::WriteValue(int64_t value) {
114
- Write(value);
126
+ VarIntEncode(value);
115
127
  }
116
128
 
117
129
  void BinarySerializer::WriteValue(hugeint_t value) {
118
- Write(value);
130
+ VarIntEncode(value.upper);
131
+ VarIntEncode(value.lower);
119
132
  }
120
133
 
121
134
  void BinarySerializer::WriteValue(float value) {
@@ -126,39 +139,26 @@ void BinarySerializer::WriteValue(double value) {
126
139
  Write(value);
127
140
  }
128
141
 
129
- void BinarySerializer::WriteValue(interval_t value) {
130
- Write(value);
131
- }
132
-
133
142
  void BinarySerializer::WriteValue(const string &value) {
134
- auto len = value.length();
135
- Write<uint32_t>((uint32_t)len);
136
- if (len > 0) {
137
- WriteDataInternal(value.c_str(), len);
138
- }
143
+ uint32_t len = value.length();
144
+ VarIntEncode(len);
145
+ WriteDataInternal(value.c_str(), len);
139
146
  }
140
147
 
141
148
  void BinarySerializer::WriteValue(const string_t value) {
142
- auto len = value.GetSize();
143
- Write<uint32_t>((uint32_t)len);
144
- if (len > 0) {
145
- WriteDataInternal(value.GetDataUnsafe(), len);
146
- }
149
+ uint32_t len = value.GetSize();
150
+ VarIntEncode(len);
151
+ WriteDataInternal(value.GetDataUnsafe(), len);
147
152
  }
148
153
 
149
154
  void BinarySerializer::WriteValue(const char *value) {
150
- auto len = strlen(value);
151
- Write<uint32_t>((uint32_t)len);
152
- if (len > 0) {
153
- WriteDataInternal(value, len);
154
- }
155
- }
156
-
157
- void BinarySerializer::WriteValue(bool value) {
158
- Write(value);
155
+ uint32_t len = strlen(value);
156
+ VarIntEncode(len);
157
+ WriteDataInternal(value, len);
159
158
  }
160
159
 
161
160
  void BinarySerializer::WriteDataPtr(const_data_ptr_t ptr, idx_t count) {
161
+ VarIntEncode(static_cast<uint64_t>(count));
162
162
  WriteDataInternal(ptr, count);
163
163
  }
164
164
 
@@ -9,7 +9,7 @@ void FormatSerializer::WriteValue(const vector<bool> &vec) {
9
9
  for (auto item : vec) {
10
10
  WriteValue(item);
11
11
  }
12
- OnListEnd(count);
12
+ OnListEnd();
13
13
  }
14
14
 
15
15
  } // namespace duckdb