duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  6. package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
  7. package/src/duckdb/extension/json/json_scan.cpp +1 -1
  8. package/src/duckdb/extension/json/json_serializer.cpp +26 -69
  9. package/src/duckdb/src/common/enum_util.cpp +119 -7
  10. package/src/duckdb/src/common/extra_type_info.cpp +7 -3
  11. package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
  12. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  13. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
  14. package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
  15. package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
  16. package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
  17. package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
  18. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  19. package/src/duckdb/src/common/types/interval.cpp +3 -0
  20. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
  21. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  22. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  23. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
  24. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  26. package/src/duckdb/src/common/types/value.cpp +63 -42
  27. package/src/duckdb/src/common/types/vector.cpp +33 -67
  28. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
  30. package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
  31. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
  32. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
  33. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
  34. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  36. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  39. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
  40. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  41. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  42. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
  43. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  44. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  45. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  46. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  47. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  48. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  49. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  50. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
  51. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
  52. package/src/duckdb/src/execution/window_executor.cpp +6 -5
  53. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  54. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  55. package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
  56. package/src/duckdb/src/function/table/read_csv.cpp +150 -136
  57. package/src/duckdb/src/function/table/table_scan.cpp +0 -2
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
  60. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  61. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  62. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  64. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  65. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
  66. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
  67. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
  68. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  69. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
  70. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
  71. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  73. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
  74. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  76. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  77. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  78. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  80. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  81. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
  82. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  83. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  84. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
  85. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
  86. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
  88. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
  90. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
  91. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  92. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  93. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  94. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  95. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
  97. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  98. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  99. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  100. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
  101. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  102. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
  103. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
  104. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
  105. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
  106. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  108. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
  110. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  111. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  112. package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
  113. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  115. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  117. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  119. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
  120. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  121. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
  122. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
  123. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  124. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  125. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  126. package/src/duckdb/src/include/duckdb.h +12 -0
  127. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  128. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  129. package/src/duckdb/src/main/client_verify.cpp +1 -0
  130. package/src/duckdb/src/main/config.cpp +2 -2
  131. package/src/duckdb/src/main/connection.cpp +3 -3
  132. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  133. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  134. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
  135. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  136. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
  137. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
  138. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
  139. package/src/duckdb/src/planner/logical_operator.cpp +1 -1
  140. package/src/duckdb/src/planner/planner.cpp +1 -1
  141. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  142. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
  143. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
  144. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
  145. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
  146. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  147. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
  148. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
  149. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
  150. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
  151. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
  152. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
  153. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  154. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
  155. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  156. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  157. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  158. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  159. package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
  160. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  161. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  162. package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
  163. package/src/duckdb/src/storage/table/row_group.cpp +68 -1
  164. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  165. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  166. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  167. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  168. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  169. package/src/duckdb/ub_src_execution.cpp +0 -2
  170. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  171. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  172. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
  173. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  174. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  175. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  176. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  177. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  178. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  179. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  180. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -0,0 +1,208 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/fixed_size_map.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/pair.hpp"
12
+ #include "duckdb/common/types.hpp"
13
+ #include "duckdb/common/types/validity_mask.hpp"
14
+
15
+ namespace duckdb {
16
+
17
+ template <typename T>
18
+ struct fixed_size_map_iterator_t;
19
+
20
+ template <typename T>
21
+ struct const_fixed_size_map_iterator_t;
22
+
23
+ template <typename T>
24
+ class fixed_size_map_t {
25
+ friend struct fixed_size_map_iterator_t<T>;
26
+ friend struct const_fixed_size_map_iterator_t<T>;
27
+
28
+ public:
29
+ using key_type = idx_t;
30
+ using mapped_type = T;
31
+
32
+ public:
33
+ explicit fixed_size_map_t(idx_t capacity_p = 0) : capacity(capacity_p) {
34
+ resize(capacity);
35
+ }
36
+
37
+ idx_t size() const {
38
+ return count;
39
+ }
40
+
41
+ void resize(idx_t capacity_p) {
42
+ capacity = capacity_p;
43
+ occupied = ValidityMask(capacity);
44
+ values = make_unsafe_uniq_array<T>(capacity + 1);
45
+ clear();
46
+ }
47
+
48
+ void clear() {
49
+ count = 0;
50
+ occupied.SetAllInvalid(capacity);
51
+ }
52
+
53
+ T &operator[](const idx_t &key) {
54
+ D_ASSERT(key < capacity);
55
+ count += 1 - occupied.RowIsValid(key);
56
+ occupied.SetValidUnsafe(key);
57
+ return values[key];
58
+ }
59
+
60
+ const T &operator[](const idx_t &key) const {
61
+ D_ASSERT(key < capacity);
62
+ return values[key];
63
+ }
64
+
65
+ fixed_size_map_iterator_t<T> begin() {
66
+ return fixed_size_map_iterator_t<T>(begin_internal(), *this);
67
+ }
68
+
69
+ const_fixed_size_map_iterator_t<T> begin() const {
70
+ return const_fixed_size_map_iterator_t<T>(begin_internal(), *this);
71
+ }
72
+
73
+ fixed_size_map_iterator_t<T> end() {
74
+ return fixed_size_map_iterator_t<T>(capacity, *this);
75
+ }
76
+
77
+ const_fixed_size_map_iterator_t<T> end() const {
78
+ return const_fixed_size_map_iterator_t<T>(capacity, *this);
79
+ }
80
+
81
+ fixed_size_map_iterator_t<T> find(const idx_t &index) {
82
+ if (occupied.RowIsValid(index)) {
83
+ return fixed_size_map_iterator_t<T>(index, *this);
84
+ } else {
85
+ return end();
86
+ }
87
+ }
88
+
89
+ const_fixed_size_map_iterator_t<T> find(const idx_t &index) const {
90
+ if (occupied.RowIsValid(index)) {
91
+ return const_fixed_size_map_iterator_t<T>(index, *this);
92
+ } else {
93
+ return end();
94
+ }
95
+ }
96
+
97
+ private:
98
+ idx_t begin_internal() const {
99
+ idx_t index;
100
+ for (index = 0; index < capacity; index++) {
101
+ if (occupied.RowIsValid(index)) {
102
+ break;
103
+ }
104
+ }
105
+ return index;
106
+ }
107
+
108
+ private:
109
+ idx_t capacity;
110
+ idx_t count;
111
+
112
+ ValidityMask occupied;
113
+ unsafe_unique_array<T> values;
114
+ };
115
+
116
+ template <typename T>
117
+ struct fixed_size_map_iterator_t {
118
+ public:
119
+ fixed_size_map_iterator_t(idx_t index_p, fixed_size_map_t<T> &map_p) : map(map_p), current(index_p) {
120
+ }
121
+
122
+ fixed_size_map_iterator_t<T> &operator++() {
123
+ for (current++; current < map.capacity; current++) {
124
+ if (map.occupied.RowIsValidUnsafe(current)) {
125
+ break;
126
+ }
127
+ }
128
+ return *this;
129
+ }
130
+
131
+ fixed_size_map_iterator_t<T> operator++(int) {
132
+ fixed_size_map_iterator_t<T> tmp = *this;
133
+ ++(*this);
134
+ return tmp;
135
+ }
136
+
137
+ idx_t &GetKey() {
138
+ return current;
139
+ }
140
+
141
+ const idx_t &GetKey() const {
142
+ return current;
143
+ }
144
+
145
+ T &GetValue() {
146
+ return map.values[current];
147
+ }
148
+
149
+ const T &GetValue() const {
150
+ return map.values[current];
151
+ }
152
+
153
+ friend bool operator==(const fixed_size_map_iterator_t<T> &a, const fixed_size_map_iterator_t<T> &b) {
154
+ return a.current == b.current;
155
+ }
156
+
157
+ friend bool operator!=(const fixed_size_map_iterator_t<T> &a, const fixed_size_map_iterator_t<T> &b) {
158
+ return !(a == b);
159
+ }
160
+
161
+ private:
162
+ fixed_size_map_t<T> &map;
163
+ idx_t current;
164
+ };
165
+
166
+ template <typename T>
167
+ struct const_fixed_size_map_iterator_t {
168
+ public:
169
+ const_fixed_size_map_iterator_t(idx_t index_p, const fixed_size_map_t<T> &map_p) : map(map_p), current(index_p) {
170
+ }
171
+
172
+ const_fixed_size_map_iterator_t<T> &operator++() {
173
+ for (current++; current < map.capacity; current++) {
174
+ if (map.occupied.RowIsValidUnsafe(current)) {
175
+ break;
176
+ }
177
+ }
178
+ return *this;
179
+ }
180
+
181
+ const_fixed_size_map_iterator_t<T> operator++(int) {
182
+ const_fixed_size_map_iterator_t<T> tmp = *this;
183
+ ++(*this);
184
+ return tmp;
185
+ }
186
+
187
+ const idx_t &GetKey() const {
188
+ return current;
189
+ }
190
+
191
+ const T &GetValue() const {
192
+ return map.values[current];
193
+ }
194
+
195
+ friend bool operator==(const const_fixed_size_map_iterator_t<T> &a, const const_fixed_size_map_iterator_t<T> &b) {
196
+ return a.current == b.current;
197
+ }
198
+
199
+ friend bool operator!=(const const_fixed_size_map_iterator_t<T> &a, const const_fixed_size_map_iterator_t<T> &b) {
200
+ return !(a == b);
201
+ }
202
+
203
+ private:
204
+ const fixed_size_map_t<T> &map;
205
+ idx_t current;
206
+ };
207
+
208
+ } // namespace duckdb
@@ -31,6 +31,9 @@ public:
31
31
  bool IsValid() const {
32
32
  return index != DConstants::INVALID_INDEX;
33
33
  }
34
+ void Invalidate() {
35
+ index = INVALID_INDEX;
36
+ }
34
37
  idx_t GetIndex() {
35
38
  if (index == INVALID_INDEX) {
36
39
  throw InternalException("Attempting to get the index of an optional_idx that is not set");
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/string_map_set.hpp
4
+ // duckdb/common/perfect_map_set.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/types.hpp"
12
+ #include "duckdb/common/types/validity_mask.hpp"
12
13
  #include "duckdb/common/unordered_map.hpp"
13
14
  #include "duckdb/common/unordered_set.hpp"
14
15
 
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/string_util.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -21,6 +22,16 @@ public:
21
22
  DUCKDB_API static void Print(OutputStream stream, const string &str);
22
23
  //! Print the object to stderr
23
24
  DUCKDB_API static void Print(const string &str);
25
+ //! Print the formatted object to the stream
26
+ template <typename... Args>
27
+ static void PrintF(OutputStream stream, const string &str, Args... params) {
28
+ Printer::Print(stream, StringUtil::Format(str, params...));
29
+ }
30
+ //! Print the formatted object to stderr
31
+ template <typename... Args>
32
+ static void PrintF(const string &str, Args... params) {
33
+ Printer::PrintF(OutputStream::STREAM_STDERR, str, std::forward<Args>(params)...);
34
+ }
24
35
  //! Directly prints the string to stdout without a newline
25
36
  DUCKDB_API static void RawPrint(OutputStream stream, const string &str);
26
37
  //! Flush an output stream
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/serializer/format_deserializer.hpp"
12
+ #include "duckdb/common/serializer/encoding_util.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
  class ClientContext;
@@ -20,6 +21,7 @@ public:
20
21
  OnObjectBegin();
21
22
  auto result = T::FormatDeserialize(*this);
22
23
  OnObjectEnd();
24
+ D_ASSERT(nesting_level == 0); // make sure we are at the root level
23
25
  return result;
24
26
  }
25
27
 
@@ -42,23 +44,35 @@ private:
42
44
  explicit BinaryDeserializer(data_ptr_t ptr, idx_t length) : ptr(ptr), end_ptr(ptr + length) {
43
45
  deserialize_enum_from_string = false;
44
46
  }
45
- struct State {
46
- uint32_t expected_field_count;
47
- idx_t expected_size;
48
- field_id_t expected_field_id;
49
- uint32_t read_field_count;
50
47
 
51
- State(uint32_t expected_field_count, idx_t expected_size, field_id_t expected_field_id)
52
- : expected_field_count(expected_field_count), expected_size(expected_size),
53
- expected_field_id(expected_field_id), read_field_count(0) {
54
- }
55
- };
56
-
57
- const char *current_tag = nullptr;
58
- field_id_t current_field_id = 0;
59
48
  data_ptr_t ptr;
60
49
  data_ptr_t end_ptr;
61
- vector<State> stack;
50
+ idx_t nesting_level = 0;
51
+
52
+ // Allow peeking 1 field ahead
53
+ bool has_buffered_field = false;
54
+ field_id_t buffered_field = 0;
55
+ field_id_t PeekField() {
56
+ if (!has_buffered_field) {
57
+ buffered_field = ReadPrimitive<field_id_t>();
58
+ has_buffered_field = true;
59
+ }
60
+ return buffered_field;
61
+ }
62
+ void ConsumeField() {
63
+ if (!has_buffered_field) {
64
+ buffered_field = ReadPrimitive<field_id_t>();
65
+ } else {
66
+ has_buffered_field = false;
67
+ }
68
+ }
69
+ field_id_t NextField() {
70
+ if (has_buffered_field) {
71
+ has_buffered_field = false;
72
+ return buffered_field;
73
+ }
74
+ return ReadPrimitive<field_id_t>();
75
+ }
62
76
 
63
77
  template <class T>
64
78
  T ReadPrimitive() {
@@ -69,39 +83,39 @@ private:
69
83
 
70
84
  void ReadData(data_ptr_t buffer, idx_t read_size) {
71
85
  if (ptr + read_size > end_ptr) {
72
- throw SerializationException("Failed to deserialize: not enough data in buffer to fulfill read request");
86
+ throw InternalException("Failed to deserialize: not enough data in buffer to fulfill read request");
73
87
  }
74
88
  memcpy(buffer, ptr, read_size);
75
89
  ptr += read_size;
76
90
  }
77
91
 
78
- // Set the 'tag' of the property to read
79
- void SetTag(const field_id_t field_id, const char *tag) final;
92
+ template <class T>
93
+ T VarIntDecode() {
94
+ T value;
95
+ auto read_size = EncodingUtil::DecodeLEB128<T>(ptr, value);
96
+ ptr += read_size;
97
+ return value;
98
+ }
80
99
 
81
100
  //===--------------------------------------------------------------------===//
82
101
  // Nested Types Hooks
83
102
  //===--------------------------------------------------------------------===//
103
+ void OnPropertyBegin(const field_id_t field_id, const char *tag) final;
104
+ void OnPropertyEnd() final;
105
+ bool OnOptionalPropertyBegin(const field_id_t field_id, const char *tag) final;
106
+ void OnOptionalPropertyEnd(bool present) final;
84
107
  void OnObjectBegin() final;
85
108
  void OnObjectEnd() final;
86
109
  idx_t OnListBegin() final;
87
110
  void OnListEnd() final;
88
- idx_t OnMapBegin() final;
89
- void OnMapEnd() final;
90
- void OnMapEntryBegin() final;
91
- void OnMapEntryEnd() final;
92
- void OnMapKeyBegin() final;
93
- void OnMapValueBegin() final;
94
- bool OnOptionalBegin() final;
95
-
96
- void OnPairBegin() final;
97
- void OnPairKeyBegin() final;
98
- void OnPairValueBegin() final;
99
- void OnPairEnd() final;
111
+ bool OnNullableBegin() final;
112
+ void OnNullableEnd() final;
100
113
 
101
114
  //===--------------------------------------------------------------------===//
102
115
  // Primitive Types
103
116
  //===--------------------------------------------------------------------===//
104
117
  bool ReadBool() final;
118
+ char ReadChar() final;
105
119
  int8_t ReadSignedInt8() final;
106
120
  uint8_t ReadUnsignedInt8() final;
107
121
  int16_t ReadSignedInt16() final;
@@ -113,7 +127,6 @@ private:
113
127
  float ReadFloat() final;
114
128
  double ReadDouble() final;
115
129
  string ReadString() final;
116
- interval_t ReadInterval() final;
117
130
  hugeint_t ReadHugeInt() final;
118
131
  void ReadDataPtr(data_ptr_t &ptr, idx_t count) final;
119
132
  };
@@ -9,25 +9,21 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/serializer/format_serializer.hpp"
12
+ #include "duckdb/common/pair.hpp"
13
+ #include "duckdb/common/serializer/encoding_util.hpp"
12
14
 
13
15
  namespace duckdb {
14
16
 
15
17
  struct BinarySerializer : public FormatSerializer {
16
18
  private:
17
- struct State {
18
- // how many fields are present in the object
19
- uint32_t field_count;
20
- // the size of the object
21
- uint64_t size;
22
- // the offset of the object start in the buffer
23
- uint64_t offset;
19
+ struct DebugState {
20
+ unordered_set<const char *> seen_field_tags;
21
+ unordered_set<field_id_t> seen_field_ids;
22
+ vector<pair<const char *, field_id_t>> seen_fields;
24
23
  };
25
24
 
26
- const char *current_tag;
27
- field_id_t current_field_id = 0;
28
-
25
+ vector<DebugState> debug_stack;
29
26
  vector<data_t> data;
30
- vector<State> stack;
31
27
 
32
28
  template <class T>
33
29
  void Write(T element) {
@@ -36,51 +32,57 @@ private:
36
32
  }
37
33
  void WriteDataInternal(const_data_ptr_t buffer, idx_t write_size) {
38
34
  data.insert(data.end(), buffer, buffer + write_size);
39
- stack.back().size += write_size;
40
35
  }
41
36
  void WriteDataInternal(const char *ptr, idx_t write_size) {
42
37
  WriteDataInternal(const_data_ptr_cast(ptr), write_size);
43
38
  }
44
39
 
45
- explicit BinarySerializer() {
40
+ template <class T>
41
+ void VarIntEncode(T value) {
42
+ uint8_t buffer[16];
43
+ auto write_size = EncodingUtil::EncodeLEB128<T>(buffer, value);
44
+ D_ASSERT(write_size <= sizeof(buffer));
45
+ WriteDataInternal(buffer, write_size);
46
+ }
47
+
48
+ explicit BinarySerializer(bool serialize_default_values_p) {
49
+ serialize_default_values = serialize_default_values_p;
46
50
  serialize_enum_as_string = false;
47
51
  }
48
52
 
49
53
  public:
54
+ //! Serializes the given object into a binary blob, optionally serializing default values if
55
+ //! serialize_default_values is set to true, otherwise properties set to their provided default value
56
+ //! will not be serialized
50
57
  template <class T>
51
- static vector<data_t> Serialize(T &obj) {
52
- BinarySerializer serializer;
58
+ static vector<data_t> Serialize(T &obj, bool serialize_default_values) {
59
+ BinarySerializer serializer(serialize_default_values);
53
60
  serializer.OnObjectBegin();
54
61
  obj.FormatSerialize(serializer);
55
62
  serializer.OnObjectEnd();
56
63
  return std::move(serializer.data);
57
64
  }
58
65
 
59
- void SetTag(const field_id_t field_id, const char *tag) final;
60
-
61
- //===--------------------------------------------------------------------===//
62
- // Nested Types Hooks
63
- //===--------------------------------------------------------------------===//
64
- void OnOptionalBegin(bool present) final;
66
+ //-------------------------------------------------------------------------
67
+ // Nested Type Hooks
68
+ //-------------------------------------------------------------------------
69
+ // We serialize optional values as a message with a "present" flag, followed by the value.
70
+ void OnPropertyBegin(const field_id_t field_id, const char *tag) final;
71
+ void OnPropertyEnd() final;
72
+ void OnOptionalPropertyBegin(const field_id_t field_id, const char *tag, bool present) final;
73
+ void OnOptionalPropertyEnd(bool present) final;
65
74
  void OnListBegin(idx_t count) final;
66
- void OnListEnd(idx_t count) final;
67
- void OnMapBegin(idx_t count) final;
68
- void OnMapEntryBegin() final;
69
- void OnMapEntryEnd() final;
70
- void OnMapKeyBegin() final;
71
- void OnMapValueBegin() final;
72
- void OnMapEnd(idx_t count) final;
75
+ void OnListEnd() final;
73
76
  void OnObjectBegin() final;
74
77
  void OnObjectEnd() final;
75
- void OnPairBegin() final;
76
- void OnPairKeyBegin() final;
77
- void OnPairValueBegin() final;
78
- void OnPairEnd() final;
78
+ void OnNullableBegin(bool present) final;
79
+ void OnNullableEnd() final;
79
80
 
80
- //===--------------------------------------------------------------------===//
81
+ //-------------------------------------------------------------------------
81
82
  // Primitive Types
82
- //===--------------------------------------------------------------------===//
83
+ //-------------------------------------------------------------------------
83
84
  void WriteNull() final;
85
+ void WriteValue(char value) final;
84
86
  void WriteValue(uint8_t value) final;
85
87
  void WriteValue(int8_t value) final;
86
88
  void WriteValue(uint16_t value) final;
@@ -92,7 +94,6 @@ public:
92
94
  void WriteValue(hugeint_t value) final;
93
95
  void WriteValue(float value) final;
94
96
  void WriteValue(double value) final;
95
- void WriteValue(interval_t value) final;
96
97
  void WriteValue(const string_t value) final;
97
98
  void WriteValue(const string &value) final;
98
99
  void WriteValue(const char *value) final;
@@ -21,6 +21,7 @@ struct DeserializationData {
21
21
  stack<reference<ClientContext>> contexts;
22
22
  stack<idx_t> enums;
23
23
  stack<reference<bound_parameter_map_t>> parameter_data;
24
+ stack<reference<LogicalType>> types;
24
25
 
25
26
  template <class T>
26
27
  void Set(T entry) = delete;
@@ -107,4 +108,21 @@ inline void DeserializationData::Unset<bound_parameter_map_t>() {
107
108
  parameter_data.pop();
108
109
  }
109
110
 
111
+ template <>
112
+ inline void DeserializationData::Set(LogicalType &type) {
113
+ types.emplace(type);
114
+ }
115
+
116
+ template <>
117
+ inline LogicalType &DeserializationData::Get() {
118
+ AssertNotEmpty(types);
119
+ return types.top();
120
+ }
121
+
122
+ template <>
123
+ inline void DeserializationData::Unset<LogicalType>() {
124
+ AssertNotEmpty(types);
125
+ types.pop();
126
+ }
127
+
110
128
  } // namespace duckdb
@@ -0,0 +1,132 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/serializer/encoding_util.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/typedefs.hpp"
12
+ #include <type_traits>
13
+
14
+ namespace duckdb {
15
+
16
+ struct EncodingUtil {
17
+
18
+ // Encode unsigned integer, returns the number of bytes written
19
+ template <class T>
20
+ static idx_t EncodeUnsignedLEB128(data_ptr_t target, T value) {
21
+ static_assert(std::is_integral<T>::value, "Must be integral");
22
+ static_assert(std::is_unsigned<T>::value, "Must be unsigned");
23
+ static_assert(sizeof(T) <= sizeof(uint64_t), "Must be uint64_t or smaller");
24
+
25
+ idx_t offset = 0;
26
+ do {
27
+ uint8_t byte = value & 0x7F;
28
+ value >>= 7;
29
+ if (value != 0) {
30
+ byte |= 0x80;
31
+ }
32
+ target[offset++] = byte;
33
+ } while (value != 0);
34
+ return offset;
35
+ }
36
+
37
+ // Decode unsigned integer, returns the number of bytes read
38
+ template <class T>
39
+ static idx_t DecodeUnsignedLEB128(const_data_ptr_t source, T &result) {
40
+ static_assert(std::is_integral<T>::value, "Must be integral");
41
+ static_assert(std::is_unsigned<T>::value, "Must be unsigned");
42
+ static_assert(sizeof(T) <= sizeof(uint64_t), "Must be uint64_t or smaller");
43
+
44
+ result = 0;
45
+ idx_t shift = 0;
46
+ idx_t offset = 0;
47
+ uint8_t byte;
48
+ do {
49
+ byte = source[offset++];
50
+ result |= static_cast<T>(byte & 0x7F) << shift;
51
+ shift += 7;
52
+ } while (byte & 0x80);
53
+
54
+ return offset;
55
+ }
56
+
57
+ // Encode signed integer, returns the number of bytes written
58
+ template <class T>
59
+ static idx_t EncodeSignedLEB128(data_ptr_t target, T value) {
60
+ static_assert(std::is_integral<T>::value, "Must be integral");
61
+ static_assert(std::is_signed<T>::value, "Must be signed");
62
+ static_assert(sizeof(T) <= sizeof(int64_t), "Must be int64_t or smaller");
63
+
64
+ idx_t offset = 0;
65
+ do {
66
+ uint8_t byte = value & 0x7F;
67
+ value >>= 7;
68
+
69
+ // Determine whether more bytes are needed
70
+ if ((value == 0 && (byte & 0x40) == 0) || (value == -1 && (byte & 0x40))) {
71
+ target[offset++] = byte;
72
+ break;
73
+ } else {
74
+ byte |= 0x80;
75
+ target[offset++] = byte;
76
+ }
77
+ } while (true);
78
+ return offset;
79
+ }
80
+
81
+ // Decode signed integer, returns the number of bytes read
82
+ template <class T>
83
+ static idx_t DecodeSignedLEB128(const_data_ptr_t source, T &result) {
84
+ static_assert(std::is_integral<T>::value, "Must be integral");
85
+ static_assert(std::is_signed<T>::value, "Must be signed");
86
+ static_assert(sizeof(T) <= sizeof(int64_t), "Must be int64_t or smaller");
87
+
88
+ // This is used to avoid undefined behavior when shifting into the sign bit
89
+ using unsigned_type = typename std::make_unsigned<T>::type;
90
+
91
+ result = 0;
92
+ idx_t shift = 0;
93
+ idx_t offset = 0;
94
+
95
+ uint8_t byte;
96
+ do {
97
+ byte = source[offset++];
98
+ result |= static_cast<unsigned_type>(byte & 0x7F) << shift;
99
+ shift += 7;
100
+ } while (byte & 0x80);
101
+
102
+ // Sign-extend if the most significant bit of the last byte is set
103
+ if (shift < sizeof(T) * 8 && (byte & 0x40)) {
104
+ result |= -(static_cast<unsigned_type>(1) << shift);
105
+ }
106
+ return offset;
107
+ }
108
+
109
+ template <class T>
110
+ static typename std::enable_if<std::is_signed<T>::value, idx_t>::type DecodeLEB128(const_data_ptr_t source,
111
+ T &result) {
112
+ return DecodeSignedLEB128(source, result);
113
+ }
114
+
115
+ template <class T>
116
+ static typename std::enable_if<std::is_unsigned<T>::value, idx_t>::type DecodeLEB128(const_data_ptr_t source,
117
+ T &result) {
118
+ return DecodeUnsignedLEB128(source, result);
119
+ }
120
+
121
+ template <class T>
122
+ static typename std::enable_if<std::is_signed<T>::value, idx_t>::type EncodeLEB128(data_ptr_t target, T value) {
123
+ return EncodeSignedLEB128(target, value);
124
+ }
125
+
126
+ template <class T>
127
+ static typename std::enable_if<std::is_unsigned<T>::value, idx_t>::type EncodeLEB128(data_ptr_t target, T value) {
128
+ return EncodeUnsignedLEB128(target, value);
129
+ }
130
+ };
131
+
132
+ } // namespace duckdb