duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -26,9 +26,11 @@ public:
|
|
26
26
|
};
|
27
27
|
|
28
28
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
29
|
-
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...
|
29
|
+
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
|
30
30
|
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
|
31
31
|
switch (radix_bits) {
|
32
|
+
case 0:
|
33
|
+
return OP::template Operation<0>(std::forward<ARGS>(args)...);
|
32
34
|
case 1:
|
33
35
|
return OP::template Operation<1>(std::forward<ARGS>(args)...);
|
34
36
|
case 2:
|
@@ -82,36 +84,6 @@ idx_t RadixPartitioning::Select(Vector &hashes, const SelectionVector *sel, idx_
|
|
82
84
|
return RadixBitsSwitch<SelectFunctor, idx_t>(radix_bits, hashes, sel, count, cutoff, true_sel, false_sel);
|
83
85
|
}
|
84
86
|
|
85
|
-
struct HashsToBinsFunctor {
|
86
|
-
template <idx_t radix_bits>
|
87
|
-
static void Operation(Vector &hashes, Vector &bins, idx_t count) {
|
88
|
-
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
|
89
|
-
UnaryExecutor::Execute<hash_t, hash_t>(hashes, bins, count,
|
90
|
-
[&](hash_t hash) { return CONSTANTS::ApplyMask(hash); });
|
91
|
-
}
|
92
|
-
};
|
93
|
-
|
94
|
-
//===--------------------------------------------------------------------===//
|
95
|
-
// Row Data Partitioning
|
96
|
-
//===--------------------------------------------------------------------===//
|
97
|
-
template <idx_t radix_bits>
|
98
|
-
static void InitPartitions(BufferManager &buffer_manager, vector<unique_ptr<RowDataCollection>> &partition_collections,
|
99
|
-
RowDataBlock *partition_blocks[], vector<BufferHandle> &partition_handles,
|
100
|
-
data_ptr_t partition_ptrs[], idx_t block_capacity, idx_t row_width) {
|
101
|
-
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
|
102
|
-
|
103
|
-
partition_collections.reserve(CONSTANTS::NUM_PARTITIONS);
|
104
|
-
partition_handles.reserve(CONSTANTS::NUM_PARTITIONS);
|
105
|
-
for (idx_t i = 0; i < CONSTANTS::NUM_PARTITIONS; i++) {
|
106
|
-
partition_collections.push_back(make_uniq<RowDataCollection>(buffer_manager, block_capacity, row_width));
|
107
|
-
partition_blocks[i] = &partition_collections[i]->CreateBlock();
|
108
|
-
partition_handles.push_back(buffer_manager.Pin(partition_blocks[i]->block));
|
109
|
-
if (partition_ptrs) {
|
110
|
-
partition_ptrs[i] = partition_handles[i].Ptr();
|
111
|
-
}
|
112
|
-
}
|
113
|
-
}
|
114
|
-
|
115
87
|
struct ComputePartitionIndicesFunctor {
|
116
88
|
template <idx_t radix_bits>
|
117
89
|
static void Operation(Vector &hashes, Vector &partition_indices, idx_t count) {
|
@@ -129,6 +101,7 @@ RadixPartitionedColumnData::RadixPartitionedColumnData(ClientContext &context_p,
|
|
129
101
|
idx_t radix_bits_p, idx_t hash_col_idx_p)
|
130
102
|
: PartitionedColumnData(PartitionedColumnDataType::RADIX, context_p, std::move(types_p)), radix_bits(radix_bits_p),
|
131
103
|
hash_col_idx(hash_col_idx_p) {
|
104
|
+
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
|
132
105
|
D_ASSERT(hash_col_idx < types.size());
|
133
106
|
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
134
107
|
allocators->allocators.reserve(num_partitions);
|
@@ -173,6 +146,7 @@ RadixPartitionedTupleData::RadixPartitionedTupleData(BufferManager &buffer_manag
|
|
173
146
|
idx_t radix_bits_p, idx_t hash_col_idx_p)
|
174
147
|
: PartitionedTupleData(PartitionedTupleDataType::RADIX, buffer_manager, layout_p.Copy()), radix_bits(radix_bits_p),
|
175
148
|
hash_col_idx(hash_col_idx_p) {
|
149
|
+
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
|
176
150
|
D_ASSERT(hash_col_idx < layout.GetTypes().size());
|
177
151
|
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
178
152
|
allocators->allocators.reserve(num_partitions);
|
@@ -215,6 +189,9 @@ void RadixPartitionedTupleData::InitializeAppendStateInternal(PartitionedTupleDa
|
|
215
189
|
column_ids.emplace_back(col_idx);
|
216
190
|
}
|
217
191
|
partitions[0]->InitializeAppend(state.chunk_state, std::move(column_ids));
|
192
|
+
|
193
|
+
// Initialize fixed-size map
|
194
|
+
state.fixed_partition_entries.resize(RadixPartitioning::NumberOfPartitions(radix_bits));
|
218
195
|
}
|
219
196
|
|
220
197
|
void RadixPartitionedTupleData::ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) {
|
@@ -76,6 +76,10 @@ void RowOperations::CombineStates(RowOperationsState &state, TupleDataLayout &la
|
|
76
76
|
// Move to the first aggregate states
|
77
77
|
VectorOperations::AddInPlace(sources, layout.GetAggrOffset(), count);
|
78
78
|
VectorOperations::AddInPlace(targets, layout.GetAggrOffset(), count);
|
79
|
+
|
80
|
+
// Keep track of the offset
|
81
|
+
idx_t offset = layout.GetAggrOffset();
|
82
|
+
|
79
83
|
for (auto &aggr : layout.GetAggregates()) {
|
80
84
|
D_ASSERT(aggr.function.combine);
|
81
85
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), state.allocator);
|
@@ -84,23 +88,34 @@ void RowOperations::CombineStates(RowOperationsState &state, TupleDataLayout &la
|
|
84
88
|
// Move to the next aggregate states
|
85
89
|
VectorOperations::AddInPlace(sources, aggr.payload_size, count);
|
86
90
|
VectorOperations::AddInPlace(targets, aggr.payload_size, count);
|
91
|
+
|
92
|
+
// Increment the offset
|
93
|
+
offset += aggr.payload_size;
|
87
94
|
}
|
95
|
+
|
96
|
+
// Now subtract the offset to get back to the original position
|
97
|
+
VectorOperations::AddInPlace(sources, -offset, count);
|
98
|
+
VectorOperations::AddInPlace(targets, -offset, count);
|
88
99
|
}
|
89
100
|
|
90
101
|
void RowOperations::FinalizeStates(RowOperationsState &state, TupleDataLayout &layout, Vector &addresses,
|
91
102
|
DataChunk &result, idx_t aggr_idx) {
|
103
|
+
// Copy the addresses
|
104
|
+
Vector addresses_copy(LogicalType::POINTER);
|
105
|
+
VectorOperations::Copy(addresses, addresses_copy, result.size(), 0, 0);
|
106
|
+
|
92
107
|
// Move to the first aggregate state
|
93
|
-
VectorOperations::AddInPlace(
|
108
|
+
VectorOperations::AddInPlace(addresses_copy, layout.GetAggrOffset(), result.size());
|
94
109
|
|
95
110
|
auto &aggregates = layout.GetAggregates();
|
96
111
|
for (idx_t i = 0; i < aggregates.size(); i++) {
|
97
112
|
auto &target = result.data[aggr_idx + i];
|
98
113
|
auto &aggr = aggregates[i];
|
99
114
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), state.allocator);
|
100
|
-
aggr.function.finalize(
|
115
|
+
aggr.function.finalize(addresses_copy, aggr_input_data, target, result.size(), 0);
|
101
116
|
|
102
117
|
// Move to the next aggregate state
|
103
|
-
VectorOperations::AddInPlace(
|
118
|
+
VectorOperations::AddInPlace(addresses_copy, aggr.payload_size, result.size());
|
104
119
|
}
|
105
120
|
}
|
106
121
|
|
@@ -2,147 +2,132 @@
|
|
2
2
|
|
3
3
|
namespace duckdb {
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
//-------------------------------------------------------------------------
|
6
|
+
// Nested Type Hooks
|
7
|
+
//-------------------------------------------------------------------------
|
8
|
+
void BinaryDeserializer::OnPropertyBegin(const field_id_t field_id, const char *) {
|
9
|
+
auto field = NextField();
|
10
|
+
if (field != field_id) {
|
11
|
+
throw InternalException("Failed to deserialize: field id mismatch, expected: %d, got: %d", field_id, field);
|
11
12
|
}
|
12
13
|
}
|
13
14
|
|
14
|
-
|
15
|
-
// Nested Types Hooks
|
16
|
-
//===--------------------------------------------------------------------===//
|
17
|
-
void BinaryDeserializer::OnObjectBegin() {
|
18
|
-
auto expected_field_id = ReadPrimitive<field_id_t>();
|
19
|
-
auto expected_field_count = ReadPrimitive<uint32_t>();
|
20
|
-
auto expected_size = ReadPrimitive<uint64_t>();
|
21
|
-
D_ASSERT(expected_field_count > 0);
|
22
|
-
D_ASSERT(expected_size > 0);
|
23
|
-
D_ASSERT(expected_field_id == current_field_id);
|
24
|
-
stack.emplace_back(expected_field_count, expected_size, expected_field_id);
|
15
|
+
void BinaryDeserializer::OnPropertyEnd() {
|
25
16
|
}
|
26
17
|
|
27
|
-
|
28
|
-
auto
|
29
|
-
|
30
|
-
|
31
|
-
|
18
|
+
bool BinaryDeserializer::OnOptionalPropertyBegin(const field_id_t field_id, const char *s) {
|
19
|
+
auto next_field = PeekField();
|
20
|
+
auto present = next_field == field_id;
|
21
|
+
if (present) {
|
22
|
+
ConsumeField();
|
32
23
|
}
|
33
|
-
|
34
|
-
}
|
35
|
-
|
36
|
-
idx_t BinaryDeserializer::OnListBegin() {
|
37
|
-
return ReadPrimitive<idx_t>();
|
38
|
-
}
|
39
|
-
|
40
|
-
void BinaryDeserializer::OnListEnd() {
|
41
|
-
}
|
42
|
-
|
43
|
-
// Deserialize maps as [ { key: ..., value: ... } ]
|
44
|
-
idx_t BinaryDeserializer::OnMapBegin() {
|
45
|
-
return ReadPrimitive<idx_t>();
|
46
|
-
}
|
47
|
-
|
48
|
-
void BinaryDeserializer::OnMapEntryBegin() {
|
49
|
-
}
|
50
|
-
|
51
|
-
void BinaryDeserializer::OnMapKeyBegin() {
|
52
|
-
}
|
53
|
-
|
54
|
-
void BinaryDeserializer::OnMapValueBegin() {
|
24
|
+
return present;
|
55
25
|
}
|
56
26
|
|
57
|
-
void BinaryDeserializer::
|
27
|
+
void BinaryDeserializer::OnOptionalPropertyEnd(bool present) {
|
58
28
|
}
|
59
29
|
|
60
|
-
void BinaryDeserializer::
|
30
|
+
void BinaryDeserializer::OnObjectBegin() {
|
31
|
+
nesting_level++;
|
61
32
|
}
|
62
33
|
|
63
|
-
void BinaryDeserializer::
|
34
|
+
void BinaryDeserializer::OnObjectEnd() {
|
35
|
+
auto next_field = NextField();
|
36
|
+
if (next_field != MESSAGE_TERMINATOR_FIELD_ID) {
|
37
|
+
throw InternalException("Failed to deserialize: expected end of object, but found field id: %d", next_field);
|
38
|
+
}
|
39
|
+
nesting_level--;
|
64
40
|
}
|
65
41
|
|
66
|
-
|
42
|
+
idx_t BinaryDeserializer::OnListBegin() {
|
43
|
+
return VarIntDecode<idx_t>();
|
67
44
|
}
|
68
45
|
|
69
|
-
void BinaryDeserializer::
|
46
|
+
void BinaryDeserializer::OnListEnd() {
|
70
47
|
}
|
71
48
|
|
72
|
-
|
49
|
+
bool BinaryDeserializer::OnNullableBegin() {
|
50
|
+
return ReadBool();
|
73
51
|
}
|
74
52
|
|
75
|
-
|
76
|
-
return ReadPrimitive<bool>();
|
53
|
+
void BinaryDeserializer::OnNullableEnd() {
|
77
54
|
}
|
78
55
|
|
79
|
-
|
56
|
+
//-------------------------------------------------------------------------
|
80
57
|
// Primitive Types
|
81
|
-
|
58
|
+
//-------------------------------------------------------------------------
|
82
59
|
bool BinaryDeserializer::ReadBool() {
|
83
|
-
return
|
60
|
+
return static_cast<bool>(ReadPrimitive<uint8_t>());
|
61
|
+
}
|
62
|
+
|
63
|
+
char BinaryDeserializer::ReadChar() {
|
64
|
+
return ReadPrimitive<char>();
|
84
65
|
}
|
85
66
|
|
86
67
|
int8_t BinaryDeserializer::ReadSignedInt8() {
|
87
|
-
return
|
68
|
+
return VarIntDecode<int8_t>();
|
88
69
|
}
|
89
70
|
|
90
71
|
uint8_t BinaryDeserializer::ReadUnsignedInt8() {
|
91
|
-
return
|
72
|
+
return VarIntDecode<uint8_t>();
|
92
73
|
}
|
93
74
|
|
94
75
|
int16_t BinaryDeserializer::ReadSignedInt16() {
|
95
|
-
return
|
76
|
+
return VarIntDecode<int16_t>();
|
96
77
|
}
|
97
78
|
|
98
79
|
uint16_t BinaryDeserializer::ReadUnsignedInt16() {
|
99
|
-
return
|
80
|
+
return VarIntDecode<uint16_t>();
|
100
81
|
}
|
101
82
|
|
102
83
|
int32_t BinaryDeserializer::ReadSignedInt32() {
|
103
|
-
return
|
84
|
+
return VarIntDecode<int32_t>();
|
104
85
|
}
|
105
86
|
|
106
87
|
uint32_t BinaryDeserializer::ReadUnsignedInt32() {
|
107
|
-
return
|
88
|
+
return VarIntDecode<uint32_t>();
|
108
89
|
}
|
109
90
|
|
110
91
|
int64_t BinaryDeserializer::ReadSignedInt64() {
|
111
|
-
return
|
92
|
+
return VarIntDecode<int64_t>();
|
112
93
|
}
|
113
94
|
|
114
95
|
uint64_t BinaryDeserializer::ReadUnsignedInt64() {
|
115
|
-
return
|
96
|
+
return VarIntDecode<uint64_t>();
|
116
97
|
}
|
117
98
|
|
118
99
|
float BinaryDeserializer::ReadFloat() {
|
119
|
-
|
100
|
+
auto value = ReadPrimitive<float>();
|
101
|
+
return value;
|
120
102
|
}
|
121
103
|
|
122
104
|
double BinaryDeserializer::ReadDouble() {
|
123
|
-
|
105
|
+
auto value = ReadPrimitive<double>();
|
106
|
+
return value;
|
124
107
|
}
|
125
108
|
|
126
109
|
string BinaryDeserializer::ReadString() {
|
127
|
-
|
128
|
-
if (
|
110
|
+
auto len = VarIntDecode<uint32_t>();
|
111
|
+
if (len == 0) {
|
129
112
|
return string();
|
130
113
|
}
|
131
|
-
auto buffer = make_unsafe_uniq_array<data_t>(
|
132
|
-
ReadData(buffer.get(),
|
133
|
-
return string(const_char_ptr_cast(buffer.get()),
|
134
|
-
}
|
135
|
-
|
136
|
-
interval_t BinaryDeserializer::ReadInterval() {
|
137
|
-
return ReadPrimitive<interval_t>();
|
114
|
+
auto buffer = make_unsafe_uniq_array<data_t>(len);
|
115
|
+
ReadData(buffer.get(), len);
|
116
|
+
return string(const_char_ptr_cast(buffer.get()), len);
|
138
117
|
}
|
139
118
|
|
140
119
|
hugeint_t BinaryDeserializer::ReadHugeInt() {
|
141
|
-
|
120
|
+
auto upper = VarIntDecode<int64_t>();
|
121
|
+
auto lower = VarIntDecode<uint64_t>();
|
122
|
+
return hugeint_t(upper, lower);
|
142
123
|
}
|
143
124
|
|
144
|
-
void BinaryDeserializer::ReadDataPtr(data_ptr_t &
|
145
|
-
|
125
|
+
void BinaryDeserializer::ReadDataPtr(data_ptr_t &ptr_p, idx_t count) {
|
126
|
+
auto len = VarIntDecode<uint64_t>();
|
127
|
+
if (len != count) {
|
128
|
+
throw SerializationException("Tried to read blob of %d size, but only %d elements are available", count, len);
|
129
|
+
}
|
130
|
+
ReadData(ptr_p, count);
|
146
131
|
}
|
147
132
|
|
148
133
|
} // namespace duckdb
|
@@ -1,121 +1,134 @@
|
|
1
1
|
#include "duckdb/common/serializer/binary_serializer.hpp"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
current_field_id = field_id;
|
7
|
-
current_tag = tag;
|
8
|
-
// Increment the number of fields
|
9
|
-
stack.back().field_count++;
|
10
|
-
}
|
11
|
-
|
12
|
-
//===--------------------------------------------------------------------===//
|
13
|
-
// Nested types
|
14
|
-
//===--------------------------------------------------------------------===//
|
15
|
-
void BinarySerializer::OnOptionalBegin(bool present) {
|
16
|
-
Write(present);
|
17
|
-
}
|
18
|
-
|
19
|
-
void BinarySerializer::OnListBegin(idx_t count) {
|
20
|
-
Write(count);
|
21
|
-
}
|
22
|
-
|
23
|
-
void BinarySerializer::OnListEnd(idx_t count) {
|
24
|
-
}
|
3
|
+
#ifdef DEBUG
|
4
|
+
#include "duckdb/common/string_util.hpp"
|
5
|
+
#endif
|
25
6
|
|
26
|
-
|
27
|
-
void BinarySerializer::OnMapBegin(idx_t count) {
|
28
|
-
Write(count);
|
29
|
-
}
|
7
|
+
namespace duckdb {
|
30
8
|
|
31
|
-
void BinarySerializer::
|
32
|
-
|
9
|
+
void BinarySerializer::OnPropertyBegin(const field_id_t field_id, const char *tag) {
|
10
|
+
// Just write the field id straight up
|
11
|
+
Write<field_id_t>(field_id);
|
12
|
+
#ifdef DEBUG
|
13
|
+
// Check that the tag is unique
|
14
|
+
auto &state = debug_stack.back();
|
15
|
+
auto &seen_field_ids = state.seen_field_ids;
|
16
|
+
auto &seen_field_tags = state.seen_field_tags;
|
17
|
+
auto &seen_fields = state.seen_fields;
|
18
|
+
|
19
|
+
if (seen_field_ids.find(field_id) != seen_field_ids.end() || seen_field_tags.find(tag) != seen_field_tags.end()) {
|
20
|
+
string all_fields;
|
21
|
+
for (auto &field : seen_fields) {
|
22
|
+
all_fields += StringUtil::Format("\"%s\":%d ", field.first, field.second);
|
23
|
+
}
|
24
|
+
throw InternalException("Duplicate field id/tag in field: \"%s\":%d, other fields: %s", tag, field_id,
|
25
|
+
all_fields);
|
26
|
+
}
|
33
27
|
|
34
|
-
|
28
|
+
seen_field_ids.insert(field_id);
|
29
|
+
seen_field_tags.insert(tag);
|
30
|
+
seen_fields.emplace_back(tag, field_id);
|
31
|
+
#else
|
32
|
+
(void)tag;
|
33
|
+
#endif
|
35
34
|
}
|
36
35
|
|
37
|
-
void BinarySerializer::
|
36
|
+
void BinarySerializer::OnPropertyEnd() {
|
37
|
+
// Nothing to do here
|
38
38
|
}
|
39
39
|
|
40
|
-
void BinarySerializer::
|
40
|
+
void BinarySerializer::OnOptionalPropertyBegin(const field_id_t field_id, const char *tag, bool present) {
|
41
|
+
// Dont write anything at all if the property is not present
|
42
|
+
if (present) {
|
43
|
+
OnPropertyBegin(field_id, tag);
|
44
|
+
}
|
41
45
|
}
|
42
46
|
|
43
|
-
void BinarySerializer::
|
47
|
+
void BinarySerializer::OnOptionalPropertyEnd(bool present) {
|
48
|
+
// Nothing to do here
|
44
49
|
}
|
45
50
|
|
51
|
+
//-------------------------------------------------------------------------
|
52
|
+
// Nested Type Hooks
|
53
|
+
//-------------------------------------------------------------------------
|
46
54
|
void BinarySerializer::OnObjectBegin() {
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
// Store the offset so we can patch the field count and size later
|
51
|
-
Write<uint32_t>(0); // Placeholder for the field count
|
52
|
-
Write<uint64_t>(0); // Placeholder for the size
|
55
|
+
#ifdef DEBUG
|
56
|
+
debug_stack.emplace_back();
|
57
|
+
#endif
|
53
58
|
}
|
54
59
|
|
55
60
|
void BinarySerializer::OnObjectEnd() {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
ptr += sizeof(uint32_t); // Skip the field count
|
62
|
-
Store<uint64_t>(frame.size, ptr);
|
63
|
-
stack.pop_back();
|
61
|
+
#ifdef DEBUG
|
62
|
+
debug_stack.pop_back();
|
63
|
+
#endif
|
64
|
+
// Write object terminator
|
65
|
+
Write<field_id_t>(MESSAGE_TERMINATOR_FIELD_ID);
|
64
66
|
}
|
65
67
|
|
66
|
-
void BinarySerializer::
|
68
|
+
void BinarySerializer::OnListBegin(idx_t count) {
|
69
|
+
VarIntEncode(count);
|
67
70
|
}
|
68
71
|
|
69
|
-
void BinarySerializer::
|
72
|
+
void BinarySerializer::OnListEnd() {
|
70
73
|
}
|
71
74
|
|
72
|
-
void BinarySerializer::
|
75
|
+
void BinarySerializer::OnNullableBegin(bool present) {
|
76
|
+
WriteValue(present);
|
73
77
|
}
|
74
78
|
|
75
|
-
void BinarySerializer::
|
79
|
+
void BinarySerializer::OnNullableEnd() {
|
76
80
|
}
|
77
81
|
|
78
|
-
|
79
|
-
// Primitive
|
80
|
-
|
82
|
+
//-------------------------------------------------------------------------
|
83
|
+
// Primitive Types
|
84
|
+
//-------------------------------------------------------------------------
|
81
85
|
void BinarySerializer::WriteNull() {
|
82
86
|
// This should never be called, optional writes should be handled by OnOptionalBegin
|
83
87
|
}
|
84
88
|
|
89
|
+
void BinarySerializer::WriteValue(bool value) {
|
90
|
+
Write<uint8_t>(value);
|
91
|
+
}
|
92
|
+
|
85
93
|
void BinarySerializer::WriteValue(uint8_t value) {
|
94
|
+
VarIntEncode(value);
|
95
|
+
}
|
96
|
+
|
97
|
+
void BinarySerializer::WriteValue(char value) {
|
86
98
|
Write(value);
|
87
99
|
}
|
88
100
|
|
89
101
|
void BinarySerializer::WriteValue(int8_t value) {
|
90
|
-
|
102
|
+
VarIntEncode(value);
|
91
103
|
}
|
92
104
|
|
93
105
|
void BinarySerializer::WriteValue(uint16_t value) {
|
94
|
-
|
106
|
+
VarIntEncode(value);
|
95
107
|
}
|
96
108
|
|
97
109
|
void BinarySerializer::WriteValue(int16_t value) {
|
98
|
-
|
110
|
+
VarIntEncode(value);
|
99
111
|
}
|
100
112
|
|
101
113
|
void BinarySerializer::WriteValue(uint32_t value) {
|
102
|
-
|
114
|
+
VarIntEncode(value);
|
103
115
|
}
|
104
116
|
|
105
117
|
void BinarySerializer::WriteValue(int32_t value) {
|
106
|
-
|
118
|
+
VarIntEncode(value);
|
107
119
|
}
|
108
120
|
|
109
121
|
void BinarySerializer::WriteValue(uint64_t value) {
|
110
|
-
|
122
|
+
VarIntEncode(value);
|
111
123
|
}
|
112
124
|
|
113
125
|
void BinarySerializer::WriteValue(int64_t value) {
|
114
|
-
|
126
|
+
VarIntEncode(value);
|
115
127
|
}
|
116
128
|
|
117
129
|
void BinarySerializer::WriteValue(hugeint_t value) {
|
118
|
-
|
130
|
+
VarIntEncode(value.upper);
|
131
|
+
VarIntEncode(value.lower);
|
119
132
|
}
|
120
133
|
|
121
134
|
void BinarySerializer::WriteValue(float value) {
|
@@ -126,39 +139,26 @@ void BinarySerializer::WriteValue(double value) {
|
|
126
139
|
Write(value);
|
127
140
|
}
|
128
141
|
|
129
|
-
void BinarySerializer::WriteValue(interval_t value) {
|
130
|
-
Write(value);
|
131
|
-
}
|
132
|
-
|
133
142
|
void BinarySerializer::WriteValue(const string &value) {
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
WriteDataInternal(value.c_str(), len);
|
138
|
-
}
|
143
|
+
uint32_t len = value.length();
|
144
|
+
VarIntEncode(len);
|
145
|
+
WriteDataInternal(value.c_str(), len);
|
139
146
|
}
|
140
147
|
|
141
148
|
void BinarySerializer::WriteValue(const string_t value) {
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
WriteDataInternal(value.GetDataUnsafe(), len);
|
146
|
-
}
|
149
|
+
uint32_t len = value.GetSize();
|
150
|
+
VarIntEncode(len);
|
151
|
+
WriteDataInternal(value.GetDataUnsafe(), len);
|
147
152
|
}
|
148
153
|
|
149
154
|
void BinarySerializer::WriteValue(const char *value) {
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
WriteDataInternal(value, len);
|
154
|
-
}
|
155
|
-
}
|
156
|
-
|
157
|
-
void BinarySerializer::WriteValue(bool value) {
|
158
|
-
Write(value);
|
155
|
+
uint32_t len = strlen(value);
|
156
|
+
VarIntEncode(len);
|
157
|
+
WriteDataInternal(value, len);
|
159
158
|
}
|
160
159
|
|
161
160
|
void BinarySerializer::WriteDataPtr(const_data_ptr_t ptr, idx_t count) {
|
161
|
+
VarIntEncode(static_cast<uint64_t>(count));
|
162
162
|
WriteDataInternal(ptr, count);
|
163
163
|
}
|
164
164
|
|