duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -0,0 +1,208 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/fixed_size_map.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/pair.hpp"
|
12
|
+
#include "duckdb/common/types.hpp"
|
13
|
+
#include "duckdb/common/types/validity_mask.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
|
17
|
+
template <typename T>
|
18
|
+
struct fixed_size_map_iterator_t;
|
19
|
+
|
20
|
+
template <typename T>
|
21
|
+
struct const_fixed_size_map_iterator_t;
|
22
|
+
|
23
|
+
template <typename T>
|
24
|
+
class fixed_size_map_t {
|
25
|
+
friend struct fixed_size_map_iterator_t<T>;
|
26
|
+
friend struct const_fixed_size_map_iterator_t<T>;
|
27
|
+
|
28
|
+
public:
|
29
|
+
using key_type = idx_t;
|
30
|
+
using mapped_type = T;
|
31
|
+
|
32
|
+
public:
|
33
|
+
explicit fixed_size_map_t(idx_t capacity_p = 0) : capacity(capacity_p) {
|
34
|
+
resize(capacity);
|
35
|
+
}
|
36
|
+
|
37
|
+
idx_t size() const {
|
38
|
+
return count;
|
39
|
+
}
|
40
|
+
|
41
|
+
void resize(idx_t capacity_p) {
|
42
|
+
capacity = capacity_p;
|
43
|
+
occupied = ValidityMask(capacity);
|
44
|
+
values = make_unsafe_uniq_array<T>(capacity + 1);
|
45
|
+
clear();
|
46
|
+
}
|
47
|
+
|
48
|
+
void clear() {
|
49
|
+
count = 0;
|
50
|
+
occupied.SetAllInvalid(capacity);
|
51
|
+
}
|
52
|
+
|
53
|
+
T &operator[](const idx_t &key) {
|
54
|
+
D_ASSERT(key < capacity);
|
55
|
+
count += 1 - occupied.RowIsValid(key);
|
56
|
+
occupied.SetValidUnsafe(key);
|
57
|
+
return values[key];
|
58
|
+
}
|
59
|
+
|
60
|
+
const T &operator[](const idx_t &key) const {
|
61
|
+
D_ASSERT(key < capacity);
|
62
|
+
return values[key];
|
63
|
+
}
|
64
|
+
|
65
|
+
fixed_size_map_iterator_t<T> begin() {
|
66
|
+
return fixed_size_map_iterator_t<T>(begin_internal(), *this);
|
67
|
+
}
|
68
|
+
|
69
|
+
const_fixed_size_map_iterator_t<T> begin() const {
|
70
|
+
return const_fixed_size_map_iterator_t<T>(begin_internal(), *this);
|
71
|
+
}
|
72
|
+
|
73
|
+
fixed_size_map_iterator_t<T> end() {
|
74
|
+
return fixed_size_map_iterator_t<T>(capacity, *this);
|
75
|
+
}
|
76
|
+
|
77
|
+
const_fixed_size_map_iterator_t<T> end() const {
|
78
|
+
return const_fixed_size_map_iterator_t<T>(capacity, *this);
|
79
|
+
}
|
80
|
+
|
81
|
+
fixed_size_map_iterator_t<T> find(const idx_t &index) {
|
82
|
+
if (occupied.RowIsValid(index)) {
|
83
|
+
return fixed_size_map_iterator_t<T>(index, *this);
|
84
|
+
} else {
|
85
|
+
return end();
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
const_fixed_size_map_iterator_t<T> find(const idx_t &index) const {
|
90
|
+
if (occupied.RowIsValid(index)) {
|
91
|
+
return const_fixed_size_map_iterator_t<T>(index, *this);
|
92
|
+
} else {
|
93
|
+
return end();
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
private:
|
98
|
+
idx_t begin_internal() const {
|
99
|
+
idx_t index;
|
100
|
+
for (index = 0; index < capacity; index++) {
|
101
|
+
if (occupied.RowIsValid(index)) {
|
102
|
+
break;
|
103
|
+
}
|
104
|
+
}
|
105
|
+
return index;
|
106
|
+
}
|
107
|
+
|
108
|
+
private:
|
109
|
+
idx_t capacity;
|
110
|
+
idx_t count;
|
111
|
+
|
112
|
+
ValidityMask occupied;
|
113
|
+
unsafe_unique_array<T> values;
|
114
|
+
};
|
115
|
+
|
116
|
+
template <typename T>
|
117
|
+
struct fixed_size_map_iterator_t {
|
118
|
+
public:
|
119
|
+
fixed_size_map_iterator_t(idx_t index_p, fixed_size_map_t<T> &map_p) : map(map_p), current(index_p) {
|
120
|
+
}
|
121
|
+
|
122
|
+
fixed_size_map_iterator_t<T> &operator++() {
|
123
|
+
for (current++; current < map.capacity; current++) {
|
124
|
+
if (map.occupied.RowIsValidUnsafe(current)) {
|
125
|
+
break;
|
126
|
+
}
|
127
|
+
}
|
128
|
+
return *this;
|
129
|
+
}
|
130
|
+
|
131
|
+
fixed_size_map_iterator_t<T> operator++(int) {
|
132
|
+
fixed_size_map_iterator_t<T> tmp = *this;
|
133
|
+
++(*this);
|
134
|
+
return tmp;
|
135
|
+
}
|
136
|
+
|
137
|
+
idx_t &GetKey() {
|
138
|
+
return current;
|
139
|
+
}
|
140
|
+
|
141
|
+
const idx_t &GetKey() const {
|
142
|
+
return current;
|
143
|
+
}
|
144
|
+
|
145
|
+
T &GetValue() {
|
146
|
+
return map.values[current];
|
147
|
+
}
|
148
|
+
|
149
|
+
const T &GetValue() const {
|
150
|
+
return map.values[current];
|
151
|
+
}
|
152
|
+
|
153
|
+
friend bool operator==(const fixed_size_map_iterator_t<T> &a, const fixed_size_map_iterator_t<T> &b) {
|
154
|
+
return a.current == b.current;
|
155
|
+
}
|
156
|
+
|
157
|
+
friend bool operator!=(const fixed_size_map_iterator_t<T> &a, const fixed_size_map_iterator_t<T> &b) {
|
158
|
+
return !(a == b);
|
159
|
+
}
|
160
|
+
|
161
|
+
private:
|
162
|
+
fixed_size_map_t<T> ↦
|
163
|
+
idx_t current;
|
164
|
+
};
|
165
|
+
|
166
|
+
template <typename T>
|
167
|
+
struct const_fixed_size_map_iterator_t {
|
168
|
+
public:
|
169
|
+
const_fixed_size_map_iterator_t(idx_t index_p, const fixed_size_map_t<T> &map_p) : map(map_p), current(index_p) {
|
170
|
+
}
|
171
|
+
|
172
|
+
const_fixed_size_map_iterator_t<T> &operator++() {
|
173
|
+
for (current++; current < map.capacity; current++) {
|
174
|
+
if (map.occupied.RowIsValidUnsafe(current)) {
|
175
|
+
break;
|
176
|
+
}
|
177
|
+
}
|
178
|
+
return *this;
|
179
|
+
}
|
180
|
+
|
181
|
+
const_fixed_size_map_iterator_t<T> operator++(int) {
|
182
|
+
const_fixed_size_map_iterator_t<T> tmp = *this;
|
183
|
+
++(*this);
|
184
|
+
return tmp;
|
185
|
+
}
|
186
|
+
|
187
|
+
const idx_t &GetKey() const {
|
188
|
+
return current;
|
189
|
+
}
|
190
|
+
|
191
|
+
const T &GetValue() const {
|
192
|
+
return map.values[current];
|
193
|
+
}
|
194
|
+
|
195
|
+
friend bool operator==(const const_fixed_size_map_iterator_t<T> &a, const const_fixed_size_map_iterator_t<T> &b) {
|
196
|
+
return a.current == b.current;
|
197
|
+
}
|
198
|
+
|
199
|
+
friend bool operator!=(const const_fixed_size_map_iterator_t<T> &a, const const_fixed_size_map_iterator_t<T> &b) {
|
200
|
+
return !(a == b);
|
201
|
+
}
|
202
|
+
|
203
|
+
private:
|
204
|
+
const fixed_size_map_t<T> ↦
|
205
|
+
idx_t current;
|
206
|
+
};
|
207
|
+
|
208
|
+
} // namespace duckdb
|
@@ -31,6 +31,9 @@ public:
|
|
31
31
|
bool IsValid() const {
|
32
32
|
return index != DConstants::INVALID_INDEX;
|
33
33
|
}
|
34
|
+
void Invalidate() {
|
35
|
+
index = INVALID_INDEX;
|
36
|
+
}
|
34
37
|
idx_t GetIndex() {
|
35
38
|
if (index == INVALID_INDEX) {
|
36
39
|
throw InternalException("Attempting to get the index of an optional_idx that is not set");
|
@@ -1,7 +1,7 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/common/
|
4
|
+
// duckdb/common/perfect_map_set.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/types.hpp"
|
12
|
+
#include "duckdb/common/types/validity_mask.hpp"
|
12
13
|
#include "duckdb/common/unordered_map.hpp"
|
13
14
|
#include "duckdb/common/unordered_set.hpp"
|
14
15
|
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/string_util.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
|
@@ -21,6 +22,16 @@ public:
|
|
21
22
|
DUCKDB_API static void Print(OutputStream stream, const string &str);
|
22
23
|
//! Print the object to stderr
|
23
24
|
DUCKDB_API static void Print(const string &str);
|
25
|
+
//! Print the formatted object to the stream
|
26
|
+
template <typename... Args>
|
27
|
+
static void PrintF(OutputStream stream, const string &str, Args... params) {
|
28
|
+
Printer::Print(stream, StringUtil::Format(str, params...));
|
29
|
+
}
|
30
|
+
//! Print the formatted object to stderr
|
31
|
+
template <typename... Args>
|
32
|
+
static void PrintF(const string &str, Args... params) {
|
33
|
+
Printer::PrintF(OutputStream::STREAM_STDERR, str, std::forward<Args>(params)...);
|
34
|
+
}
|
24
35
|
//! Directly prints the string to stdout without a newline
|
25
36
|
DUCKDB_API static void RawPrint(OutputStream stream, const string &str);
|
26
37
|
//! Flush an output stream
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/serializer/format_deserializer.hpp"
|
12
|
+
#include "duckdb/common/serializer/encoding_util.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
class ClientContext;
|
@@ -20,6 +21,7 @@ public:
|
|
20
21
|
OnObjectBegin();
|
21
22
|
auto result = T::FormatDeserialize(*this);
|
22
23
|
OnObjectEnd();
|
24
|
+
D_ASSERT(nesting_level == 0); // make sure we are at the root level
|
23
25
|
return result;
|
24
26
|
}
|
25
27
|
|
@@ -42,23 +44,35 @@ private:
|
|
42
44
|
explicit BinaryDeserializer(data_ptr_t ptr, idx_t length) : ptr(ptr), end_ptr(ptr + length) {
|
43
45
|
deserialize_enum_from_string = false;
|
44
46
|
}
|
45
|
-
struct State {
|
46
|
-
uint32_t expected_field_count;
|
47
|
-
idx_t expected_size;
|
48
|
-
field_id_t expected_field_id;
|
49
|
-
uint32_t read_field_count;
|
50
47
|
|
51
|
-
State(uint32_t expected_field_count, idx_t expected_size, field_id_t expected_field_id)
|
52
|
-
: expected_field_count(expected_field_count), expected_size(expected_size),
|
53
|
-
expected_field_id(expected_field_id), read_field_count(0) {
|
54
|
-
}
|
55
|
-
};
|
56
|
-
|
57
|
-
const char *current_tag = nullptr;
|
58
|
-
field_id_t current_field_id = 0;
|
59
48
|
data_ptr_t ptr;
|
60
49
|
data_ptr_t end_ptr;
|
61
|
-
|
50
|
+
idx_t nesting_level = 0;
|
51
|
+
|
52
|
+
// Allow peeking 1 field ahead
|
53
|
+
bool has_buffered_field = false;
|
54
|
+
field_id_t buffered_field = 0;
|
55
|
+
field_id_t PeekField() {
|
56
|
+
if (!has_buffered_field) {
|
57
|
+
buffered_field = ReadPrimitive<field_id_t>();
|
58
|
+
has_buffered_field = true;
|
59
|
+
}
|
60
|
+
return buffered_field;
|
61
|
+
}
|
62
|
+
void ConsumeField() {
|
63
|
+
if (!has_buffered_field) {
|
64
|
+
buffered_field = ReadPrimitive<field_id_t>();
|
65
|
+
} else {
|
66
|
+
has_buffered_field = false;
|
67
|
+
}
|
68
|
+
}
|
69
|
+
field_id_t NextField() {
|
70
|
+
if (has_buffered_field) {
|
71
|
+
has_buffered_field = false;
|
72
|
+
return buffered_field;
|
73
|
+
}
|
74
|
+
return ReadPrimitive<field_id_t>();
|
75
|
+
}
|
62
76
|
|
63
77
|
template <class T>
|
64
78
|
T ReadPrimitive() {
|
@@ -69,39 +83,39 @@ private:
|
|
69
83
|
|
70
84
|
void ReadData(data_ptr_t buffer, idx_t read_size) {
|
71
85
|
if (ptr + read_size > end_ptr) {
|
72
|
-
throw
|
86
|
+
throw InternalException("Failed to deserialize: not enough data in buffer to fulfill read request");
|
73
87
|
}
|
74
88
|
memcpy(buffer, ptr, read_size);
|
75
89
|
ptr += read_size;
|
76
90
|
}
|
77
91
|
|
78
|
-
|
79
|
-
|
92
|
+
template <class T>
|
93
|
+
T VarIntDecode() {
|
94
|
+
T value;
|
95
|
+
auto read_size = EncodingUtil::DecodeLEB128<T>(ptr, value);
|
96
|
+
ptr += read_size;
|
97
|
+
return value;
|
98
|
+
}
|
80
99
|
|
81
100
|
//===--------------------------------------------------------------------===//
|
82
101
|
// Nested Types Hooks
|
83
102
|
//===--------------------------------------------------------------------===//
|
103
|
+
void OnPropertyBegin(const field_id_t field_id, const char *tag) final;
|
104
|
+
void OnPropertyEnd() final;
|
105
|
+
bool OnOptionalPropertyBegin(const field_id_t field_id, const char *tag) final;
|
106
|
+
void OnOptionalPropertyEnd(bool present) final;
|
84
107
|
void OnObjectBegin() final;
|
85
108
|
void OnObjectEnd() final;
|
86
109
|
idx_t OnListBegin() final;
|
87
110
|
void OnListEnd() final;
|
88
|
-
|
89
|
-
void
|
90
|
-
void OnMapEntryBegin() final;
|
91
|
-
void OnMapEntryEnd() final;
|
92
|
-
void OnMapKeyBegin() final;
|
93
|
-
void OnMapValueBegin() final;
|
94
|
-
bool OnOptionalBegin() final;
|
95
|
-
|
96
|
-
void OnPairBegin() final;
|
97
|
-
void OnPairKeyBegin() final;
|
98
|
-
void OnPairValueBegin() final;
|
99
|
-
void OnPairEnd() final;
|
111
|
+
bool OnNullableBegin() final;
|
112
|
+
void OnNullableEnd() final;
|
100
113
|
|
101
114
|
//===--------------------------------------------------------------------===//
|
102
115
|
// Primitive Types
|
103
116
|
//===--------------------------------------------------------------------===//
|
104
117
|
bool ReadBool() final;
|
118
|
+
char ReadChar() final;
|
105
119
|
int8_t ReadSignedInt8() final;
|
106
120
|
uint8_t ReadUnsignedInt8() final;
|
107
121
|
int16_t ReadSignedInt16() final;
|
@@ -113,7 +127,6 @@ private:
|
|
113
127
|
float ReadFloat() final;
|
114
128
|
double ReadDouble() final;
|
115
129
|
string ReadString() final;
|
116
|
-
interval_t ReadInterval() final;
|
117
130
|
hugeint_t ReadHugeInt() final;
|
118
131
|
void ReadDataPtr(data_ptr_t &ptr, idx_t count) final;
|
119
132
|
};
|
@@ -9,25 +9,21 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/serializer/format_serializer.hpp"
|
12
|
+
#include "duckdb/common/pair.hpp"
|
13
|
+
#include "duckdb/common/serializer/encoding_util.hpp"
|
12
14
|
|
13
15
|
namespace duckdb {
|
14
16
|
|
15
17
|
struct BinarySerializer : public FormatSerializer {
|
16
18
|
private:
|
17
|
-
struct
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
uint64_t size;
|
22
|
-
// the offset of the object start in the buffer
|
23
|
-
uint64_t offset;
|
19
|
+
struct DebugState {
|
20
|
+
unordered_set<const char *> seen_field_tags;
|
21
|
+
unordered_set<field_id_t> seen_field_ids;
|
22
|
+
vector<pair<const char *, field_id_t>> seen_fields;
|
24
23
|
};
|
25
24
|
|
26
|
-
|
27
|
-
field_id_t current_field_id = 0;
|
28
|
-
|
25
|
+
vector<DebugState> debug_stack;
|
29
26
|
vector<data_t> data;
|
30
|
-
vector<State> stack;
|
31
27
|
|
32
28
|
template <class T>
|
33
29
|
void Write(T element) {
|
@@ -36,51 +32,57 @@ private:
|
|
36
32
|
}
|
37
33
|
void WriteDataInternal(const_data_ptr_t buffer, idx_t write_size) {
|
38
34
|
data.insert(data.end(), buffer, buffer + write_size);
|
39
|
-
stack.back().size += write_size;
|
40
35
|
}
|
41
36
|
void WriteDataInternal(const char *ptr, idx_t write_size) {
|
42
37
|
WriteDataInternal(const_data_ptr_cast(ptr), write_size);
|
43
38
|
}
|
44
39
|
|
45
|
-
|
40
|
+
template <class T>
|
41
|
+
void VarIntEncode(T value) {
|
42
|
+
uint8_t buffer[16];
|
43
|
+
auto write_size = EncodingUtil::EncodeLEB128<T>(buffer, value);
|
44
|
+
D_ASSERT(write_size <= sizeof(buffer));
|
45
|
+
WriteDataInternal(buffer, write_size);
|
46
|
+
}
|
47
|
+
|
48
|
+
explicit BinarySerializer(bool serialize_default_values_p) {
|
49
|
+
serialize_default_values = serialize_default_values_p;
|
46
50
|
serialize_enum_as_string = false;
|
47
51
|
}
|
48
52
|
|
49
53
|
public:
|
54
|
+
//! Serializes the given object into a binary blob, optionally serializing default values if
|
55
|
+
//! serialize_default_values is set to true, otherwise properties set to their provided default value
|
56
|
+
//! will not be serialized
|
50
57
|
template <class T>
|
51
|
-
static vector<data_t> Serialize(T &obj) {
|
52
|
-
BinarySerializer serializer;
|
58
|
+
static vector<data_t> Serialize(T &obj, bool serialize_default_values) {
|
59
|
+
BinarySerializer serializer(serialize_default_values);
|
53
60
|
serializer.OnObjectBegin();
|
54
61
|
obj.FormatSerialize(serializer);
|
55
62
|
serializer.OnObjectEnd();
|
56
63
|
return std::move(serializer.data);
|
57
64
|
}
|
58
65
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
//
|
63
|
-
|
64
|
-
void
|
66
|
+
//-------------------------------------------------------------------------
|
67
|
+
// Nested Type Hooks
|
68
|
+
//-------------------------------------------------------------------------
|
69
|
+
// We serialize optional values as a message with a "present" flag, followed by the value.
|
70
|
+
void OnPropertyBegin(const field_id_t field_id, const char *tag) final;
|
71
|
+
void OnPropertyEnd() final;
|
72
|
+
void OnOptionalPropertyBegin(const field_id_t field_id, const char *tag, bool present) final;
|
73
|
+
void OnOptionalPropertyEnd(bool present) final;
|
65
74
|
void OnListBegin(idx_t count) final;
|
66
|
-
void OnListEnd(
|
67
|
-
void OnMapBegin(idx_t count) final;
|
68
|
-
void OnMapEntryBegin() final;
|
69
|
-
void OnMapEntryEnd() final;
|
70
|
-
void OnMapKeyBegin() final;
|
71
|
-
void OnMapValueBegin() final;
|
72
|
-
void OnMapEnd(idx_t count) final;
|
75
|
+
void OnListEnd() final;
|
73
76
|
void OnObjectBegin() final;
|
74
77
|
void OnObjectEnd() final;
|
75
|
-
void
|
76
|
-
void
|
77
|
-
void OnPairValueBegin() final;
|
78
|
-
void OnPairEnd() final;
|
78
|
+
void OnNullableBegin(bool present) final;
|
79
|
+
void OnNullableEnd() final;
|
79
80
|
|
80
|
-
|
81
|
+
//-------------------------------------------------------------------------
|
81
82
|
// Primitive Types
|
82
|
-
|
83
|
+
//-------------------------------------------------------------------------
|
83
84
|
void WriteNull() final;
|
85
|
+
void WriteValue(char value) final;
|
84
86
|
void WriteValue(uint8_t value) final;
|
85
87
|
void WriteValue(int8_t value) final;
|
86
88
|
void WriteValue(uint16_t value) final;
|
@@ -92,7 +94,6 @@ public:
|
|
92
94
|
void WriteValue(hugeint_t value) final;
|
93
95
|
void WriteValue(float value) final;
|
94
96
|
void WriteValue(double value) final;
|
95
|
-
void WriteValue(interval_t value) final;
|
96
97
|
void WriteValue(const string_t value) final;
|
97
98
|
void WriteValue(const string &value) final;
|
98
99
|
void WriteValue(const char *value) final;
|
@@ -21,6 +21,7 @@ struct DeserializationData {
|
|
21
21
|
stack<reference<ClientContext>> contexts;
|
22
22
|
stack<idx_t> enums;
|
23
23
|
stack<reference<bound_parameter_map_t>> parameter_data;
|
24
|
+
stack<reference<LogicalType>> types;
|
24
25
|
|
25
26
|
template <class T>
|
26
27
|
void Set(T entry) = delete;
|
@@ -107,4 +108,21 @@ inline void DeserializationData::Unset<bound_parameter_map_t>() {
|
|
107
108
|
parameter_data.pop();
|
108
109
|
}
|
109
110
|
|
111
|
+
template <>
|
112
|
+
inline void DeserializationData::Set(LogicalType &type) {
|
113
|
+
types.emplace(type);
|
114
|
+
}
|
115
|
+
|
116
|
+
template <>
|
117
|
+
inline LogicalType &DeserializationData::Get() {
|
118
|
+
AssertNotEmpty(types);
|
119
|
+
return types.top();
|
120
|
+
}
|
121
|
+
|
122
|
+
template <>
|
123
|
+
inline void DeserializationData::Unset<LogicalType>() {
|
124
|
+
AssertNotEmpty(types);
|
125
|
+
types.pop();
|
126
|
+
}
|
127
|
+
|
110
128
|
} // namespace duckdb
|
@@ -0,0 +1,132 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/serializer/encoding_util.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/typedefs.hpp"
|
12
|
+
#include <type_traits>
|
13
|
+
|
14
|
+
namespace duckdb {
|
15
|
+
|
16
|
+
struct EncodingUtil {
|
17
|
+
|
18
|
+
// Encode unsigned integer, returns the number of bytes written
|
19
|
+
template <class T>
|
20
|
+
static idx_t EncodeUnsignedLEB128(data_ptr_t target, T value) {
|
21
|
+
static_assert(std::is_integral<T>::value, "Must be integral");
|
22
|
+
static_assert(std::is_unsigned<T>::value, "Must be unsigned");
|
23
|
+
static_assert(sizeof(T) <= sizeof(uint64_t), "Must be uint64_t or smaller");
|
24
|
+
|
25
|
+
idx_t offset = 0;
|
26
|
+
do {
|
27
|
+
uint8_t byte = value & 0x7F;
|
28
|
+
value >>= 7;
|
29
|
+
if (value != 0) {
|
30
|
+
byte |= 0x80;
|
31
|
+
}
|
32
|
+
target[offset++] = byte;
|
33
|
+
} while (value != 0);
|
34
|
+
return offset;
|
35
|
+
}
|
36
|
+
|
37
|
+
// Decode unsigned integer, returns the number of bytes read
|
38
|
+
template <class T>
|
39
|
+
static idx_t DecodeUnsignedLEB128(const_data_ptr_t source, T &result) {
|
40
|
+
static_assert(std::is_integral<T>::value, "Must be integral");
|
41
|
+
static_assert(std::is_unsigned<T>::value, "Must be unsigned");
|
42
|
+
static_assert(sizeof(T) <= sizeof(uint64_t), "Must be uint64_t or smaller");
|
43
|
+
|
44
|
+
result = 0;
|
45
|
+
idx_t shift = 0;
|
46
|
+
idx_t offset = 0;
|
47
|
+
uint8_t byte;
|
48
|
+
do {
|
49
|
+
byte = source[offset++];
|
50
|
+
result |= static_cast<T>(byte & 0x7F) << shift;
|
51
|
+
shift += 7;
|
52
|
+
} while (byte & 0x80);
|
53
|
+
|
54
|
+
return offset;
|
55
|
+
}
|
56
|
+
|
57
|
+
// Encode signed integer, returns the number of bytes written
|
58
|
+
template <class T>
|
59
|
+
static idx_t EncodeSignedLEB128(data_ptr_t target, T value) {
|
60
|
+
static_assert(std::is_integral<T>::value, "Must be integral");
|
61
|
+
static_assert(std::is_signed<T>::value, "Must be signed");
|
62
|
+
static_assert(sizeof(T) <= sizeof(int64_t), "Must be int64_t or smaller");
|
63
|
+
|
64
|
+
idx_t offset = 0;
|
65
|
+
do {
|
66
|
+
uint8_t byte = value & 0x7F;
|
67
|
+
value >>= 7;
|
68
|
+
|
69
|
+
// Determine whether more bytes are needed
|
70
|
+
if ((value == 0 && (byte & 0x40) == 0) || (value == -1 && (byte & 0x40))) {
|
71
|
+
target[offset++] = byte;
|
72
|
+
break;
|
73
|
+
} else {
|
74
|
+
byte |= 0x80;
|
75
|
+
target[offset++] = byte;
|
76
|
+
}
|
77
|
+
} while (true);
|
78
|
+
return offset;
|
79
|
+
}
|
80
|
+
|
81
|
+
// Decode signed integer, returns the number of bytes read
|
82
|
+
template <class T>
|
83
|
+
static idx_t DecodeSignedLEB128(const_data_ptr_t source, T &result) {
|
84
|
+
static_assert(std::is_integral<T>::value, "Must be integral");
|
85
|
+
static_assert(std::is_signed<T>::value, "Must be signed");
|
86
|
+
static_assert(sizeof(T) <= sizeof(int64_t), "Must be int64_t or smaller");
|
87
|
+
|
88
|
+
// This is used to avoid undefined behavior when shifting into the sign bit
|
89
|
+
using unsigned_type = typename std::make_unsigned<T>::type;
|
90
|
+
|
91
|
+
result = 0;
|
92
|
+
idx_t shift = 0;
|
93
|
+
idx_t offset = 0;
|
94
|
+
|
95
|
+
uint8_t byte;
|
96
|
+
do {
|
97
|
+
byte = source[offset++];
|
98
|
+
result |= static_cast<unsigned_type>(byte & 0x7F) << shift;
|
99
|
+
shift += 7;
|
100
|
+
} while (byte & 0x80);
|
101
|
+
|
102
|
+
// Sign-extend if the most significant bit of the last byte is set
|
103
|
+
if (shift < sizeof(T) * 8 && (byte & 0x40)) {
|
104
|
+
result |= -(static_cast<unsigned_type>(1) << shift);
|
105
|
+
}
|
106
|
+
return offset;
|
107
|
+
}
|
108
|
+
|
109
|
+
template <class T>
|
110
|
+
static typename std::enable_if<std::is_signed<T>::value, idx_t>::type DecodeLEB128(const_data_ptr_t source,
|
111
|
+
T &result) {
|
112
|
+
return DecodeSignedLEB128(source, result);
|
113
|
+
}
|
114
|
+
|
115
|
+
template <class T>
|
116
|
+
static typename std::enable_if<std::is_unsigned<T>::value, idx_t>::type DecodeLEB128(const_data_ptr_t source,
|
117
|
+
T &result) {
|
118
|
+
return DecodeUnsignedLEB128(source, result);
|
119
|
+
}
|
120
|
+
|
121
|
+
template <class T>
|
122
|
+
static typename std::enable_if<std::is_signed<T>::value, idx_t>::type EncodeLEB128(data_ptr_t target, T value) {
|
123
|
+
return EncodeSignedLEB128(target, value);
|
124
|
+
}
|
125
|
+
|
126
|
+
template <class T>
|
127
|
+
static typename std::enable_if<std::is_unsigned<T>::value, idx_t>::type EncodeLEB128(data_ptr_t target, T value) {
|
128
|
+
return EncodeUnsignedLEB128(target, value);
|
129
|
+
}
|
130
|
+
};
|
131
|
+
|
132
|
+
} // namespace duckdb
|