duckdb 0.8.2-dev4711.0 → 0.8.2-dev5002.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/binding.gyp +0 -1
  2. package/binding.gyp.in +0 -1
  3. package/package.json +1 -1
  4. package/src/connection.cpp +10 -23
  5. package/src/data_chunk.cpp +1 -3
  6. package/src/database.cpp +4 -9
  7. package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
  9. package/src/duckdb/extension/json/json_functions.cpp +4 -6
  10. package/src/duckdb/src/common/enum_util.cpp +10 -5
  11. package/src/duckdb/src/common/operator/cast_operators.cpp +18 -0
  12. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  13. package/src/duckdb/src/common/row_operations/row_matcher.cpp +375 -0
  14. package/src/duckdb/src/common/types/data_chunk.cpp +48 -11
  15. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
  16. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
  17. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
  18. package/src/duckdb/src/common/types/vector.cpp +0 -1
  19. package/src/duckdb/src/common/types.cpp +1 -1
  20. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
  21. package/src/duckdb/src/core_functions/function_list.cpp +1 -1
  22. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
  23. package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
  24. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +5 -1
  25. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +10 -1
  26. package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +0 -2
  27. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
  28. package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
  29. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
  30. package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
  31. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp +20 -27
  32. package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +21 -9
  33. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
  34. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +1 -1
  35. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
  36. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
  37. package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
  38. package/src/duckdb/src/function/cast/time_casts.cpp +12 -0
  39. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
  40. package/src/duckdb/src/function/function_binder.cpp +10 -9
  41. package/src/duckdb/src/function/pragma/pragma_queries.cpp +3 -0
  42. package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
  43. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  44. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
  45. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +27 -0
  47. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +38 -2
  48. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
  49. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
  50. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
  51. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  52. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +4 -4
  53. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +4 -4
  54. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +5 -5
  55. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +7 -7
  56. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +12 -12
  57. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +12 -12
  58. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -3
  59. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +33 -33
  60. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +2 -2
  61. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -3
  62. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +13 -13
  63. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +2 -2
  64. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
  66. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
  67. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
  68. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -0
  69. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +1 -2
  70. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -1
  75. package/src/duckdb/src/main/config.cpp +1 -1
  76. package/src/duckdb/src/main/relation.cpp +10 -0
  77. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
  78. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +28 -6
  79. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +3 -0
  80. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
  81. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +14 -6
  82. package/src/duckdb/src/planner/operator/logical_create_table.cpp +3 -3
  83. package/src/duckdb/src/planner/operator/logical_delete.cpp +3 -2
  84. package/src/duckdb/src/planner/operator/logical_insert.cpp +3 -2
  85. package/src/duckdb/src/planner/operator/logical_update.cpp +3 -2
  86. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
  87. package/src/duckdb/src/storage/data_table.cpp +18 -8
  88. package/src/duckdb/src/storage/local_storage.cpp +2 -3
  89. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +64 -80
  90. package/src/duckdb/src/storage/storage_manager.cpp +6 -2
  91. package/src/duckdb/src/storage/table/row_group.cpp +6 -0
  92. package/src/duckdb/src/storage/table/row_group_collection.cpp +4 -3
  93. package/src/duckdb/src/storage/table/struct_column_data.cpp +2 -0
  94. package/src/duckdb/src/transaction/duck_transaction.cpp +1 -0
  95. package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
  96. package/src/statement.cpp +2 -4
  97. package/test/database_fail.test.ts +6 -0
  98. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -0,0 +1,375 @@
1
+ #include "duckdb/common/row_operations/row_matcher.hpp"
2
+
3
+ #include "duckdb/common/enum_util.hpp"
4
+ #include "duckdb/common/exception.hpp"
5
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ using ValidityBytes = TupleDataLayout::ValidityBytes;
10
+
11
+ template <bool NO_MATCH_SEL, class T, class OP>
12
+ static idx_t TemplatedMatch(Vector &, const TupleDataVectorFormat &lhs_format, SelectionVector &sel, const idx_t count,
13
+ const TupleDataLayout &rhs_layout, Vector &rhs_row_locations, const idx_t col_idx,
14
+ const vector<MatchFunction> &, SelectionVector *no_match_sel, idx_t &no_match_count) {
15
+ using COMPARISON_OP = ComparisonOperationWrapper<OP>;
16
+
17
+ // LHS
18
+ const auto &lhs_sel = *lhs_format.unified.sel;
19
+ const auto lhs_data = UnifiedVectorFormat::GetData<T>(lhs_format.unified);
20
+ const auto &lhs_validity = lhs_format.unified.validity;
21
+
22
+ // RHS
23
+ const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
24
+ const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
25
+ idx_t entry_idx;
26
+ idx_t idx_in_entry;
27
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
28
+
29
+ idx_t match_count = 0;
30
+ for (idx_t i = 0; i < count; i++) {
31
+ const auto idx = sel.get_index(i);
32
+
33
+ const auto lhs_idx = lhs_sel.get_index(idx);
34
+ const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
35
+
36
+ const auto &rhs_location = rhs_locations[idx];
37
+ const ValidityBytes rhs_mask(rhs_location);
38
+ const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
39
+
40
+ if (COMPARISON_OP::template Operation<T>(lhs_data[lhs_idx], Load<T>(rhs_location + rhs_offset_in_row), lhs_null,
41
+ rhs_null)) {
42
+ sel.set_index(match_count++, idx);
43
+ } else if (NO_MATCH_SEL) {
44
+ no_match_sel->set_index(no_match_count++, idx);
45
+ }
46
+ }
47
+ return match_count;
48
+ }
49
+
50
+ template <bool NO_MATCH_SEL, class OP>
51
+ static idx_t StructMatchEquality(Vector &lhs_vector, const TupleDataVectorFormat &lhs_format, SelectionVector &sel,
52
+ const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
53
+ const idx_t col_idx, const vector<MatchFunction> &child_functions,
54
+ SelectionVector *no_match_sel, idx_t &no_match_count) {
55
+ using COMPARISON_OP = ComparisonOperationWrapper<OP>;
56
+
57
+ // LHS
58
+ const auto &lhs_sel = *lhs_format.unified.sel;
59
+ const auto &lhs_validity = lhs_format.unified.validity;
60
+
61
+ // RHS
62
+ const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
63
+ idx_t entry_idx;
64
+ idx_t idx_in_entry;
65
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
66
+
67
+ idx_t match_count = 0;
68
+ for (idx_t i = 0; i < count; i++) {
69
+ const auto idx = sel.get_index(i);
70
+
71
+ const auto lhs_idx = lhs_sel.get_index(idx);
72
+ const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
73
+
74
+ const auto &rhs_location = rhs_locations[idx];
75
+ const ValidityBytes rhs_mask(rhs_location);
76
+ const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
77
+
78
+ // For structs there is no value to compare, here we match NULLs and let recursion do the rest
79
+ // So we use the comparison only if rhs or LHS is NULL and COMPARE_NULL is true
80
+ if (!(lhs_null || rhs_null) ||
81
+ (COMPARISON_OP::COMPARE_NULL && COMPARISON_OP::template Operation<uint32_t>(0, 0, lhs_null, rhs_null))) {
82
+ sel.set_index(match_count++, idx);
83
+ } else if (NO_MATCH_SEL) {
84
+ no_match_sel->set_index(no_match_count++, idx);
85
+ }
86
+ }
87
+
88
+ // Create a Vector of pointers to the start of the TupleDataLayout of the STRUCT
89
+ Vector rhs_struct_row_locations(LogicalType::POINTER);
90
+ const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
91
+ auto rhs_struct_locations = FlatVector::GetData<data_ptr_t>(rhs_struct_row_locations);
92
+ for (idx_t i = 0; i < match_count; i++) {
93
+ const auto idx = sel.get_index(i);
94
+ rhs_struct_locations[idx] = rhs_locations[idx] + rhs_offset_in_row;
95
+ }
96
+
97
+ // Get the struct layout and struct entries
98
+ const auto &rhs_struct_layout = rhs_layout.GetStructLayout(col_idx);
99
+ auto &lhs_struct_vectors = StructVector::GetEntries(lhs_vector);
100
+ D_ASSERT(rhs_struct_layout.ColumnCount() == lhs_struct_vectors.size());
101
+
102
+ for (idx_t struct_col_idx = 0; struct_col_idx < rhs_struct_layout.ColumnCount(); struct_col_idx++) {
103
+ auto &lhs_struct_vector = *lhs_struct_vectors[struct_col_idx];
104
+ auto &lhs_struct_format = lhs_format.children[struct_col_idx];
105
+ const auto &child_function = child_functions[struct_col_idx];
106
+ match_count = child_function.function(lhs_struct_vector, lhs_struct_format, sel, match_count, rhs_struct_layout,
107
+ rhs_struct_row_locations, struct_col_idx, child_function.child_functions,
108
+ no_match_sel, no_match_count);
109
+ }
110
+
111
+ return match_count;
112
+ }
113
+
114
+ template <typename OP>
115
+ static idx_t SelectComparison(Vector &, Vector &, const SelectionVector &, idx_t, SelectionVector *,
116
+ SelectionVector *) {
117
+ throw NotImplementedException("Unsupported list comparison operand for RowMatcher::GetMatchFunction");
118
+ }
119
+
120
+ template <>
121
+ idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
122
+ SelectionVector *true_sel, SelectionVector *false_sel) {
123
+ return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel);
124
+ }
125
+
126
+ template <>
127
+ idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
128
+ SelectionVector *true_sel, SelectionVector *false_sel) {
129
+ return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel);
130
+ }
131
+
132
+ template <>
133
+ idx_t SelectComparison<DistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
134
+ SelectionVector *true_sel, SelectionVector *false_sel) {
135
+ return VectorOperations::DistinctFrom(left, right, &sel, count, true_sel, false_sel);
136
+ }
137
+
138
+ template <>
139
+ idx_t SelectComparison<NotDistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
140
+ SelectionVector *true_sel, SelectionVector *false_sel) {
141
+ return VectorOperations::NotDistinctFrom(left, right, &sel, count, true_sel, false_sel);
142
+ }
143
+
144
+ template <>
145
+ idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
146
+ SelectionVector *true_sel, SelectionVector *false_sel) {
147
+ return VectorOperations::DistinctGreaterThan(left, right, &sel, count, true_sel, false_sel);
148
+ }
149
+
150
+ template <>
151
+ idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
152
+ SelectionVector *true_sel, SelectionVector *false_sel) {
153
+ return VectorOperations::DistinctGreaterThanEquals(left, right, &sel, count, true_sel, false_sel);
154
+ }
155
+
156
+ template <>
157
+ idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
158
+ SelectionVector *true_sel, SelectionVector *false_sel) {
159
+ return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
160
+ }
161
+
162
+ template <>
163
+ idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
164
+ SelectionVector *true_sel, SelectionVector *false_sel) {
165
+ return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
166
+ }
167
+
168
+ template <bool NO_MATCH_SEL, class OP>
169
+ static idx_t GenericNestedMatch(Vector &lhs_vector, const TupleDataVectorFormat &, SelectionVector &sel,
170
+ const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
171
+ const idx_t col_idx, const vector<MatchFunction> &, SelectionVector *no_match_sel,
172
+ idx_t &no_match_count) {
173
+ const auto &type = rhs_layout.GetTypes()[col_idx];
174
+
175
+ // Gather a dense Vector containing the column values being matched
176
+ Vector key(type);
177
+ const auto gather_function = TupleDataCollection::GetGatherFunction(type);
178
+ gather_function.function(rhs_layout, rhs_row_locations, col_idx, sel, count, key,
179
+ *FlatVector::IncrementalSelectionVector(), key, gather_function.child_functions);
180
+
181
+ // Densify the input column
182
+ Vector sliced(lhs_vector, sel, count);
183
+
184
+ if (NO_MATCH_SEL) {
185
+ SelectionVector no_match_sel_offset(no_match_sel->data() + no_match_count);
186
+ auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset);
187
+ no_match_count += count - match_count;
188
+ return match_count;
189
+ }
190
+ return SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr);
191
+ }
192
+
193
+ void RowMatcher::Initialize(const bool no_match_sel, const TupleDataLayout &layout, const Predicates &predicates) {
194
+ match_functions.reserve(predicates.size());
195
+ for (idx_t col_idx = 0; col_idx < predicates.size(); col_idx++) {
196
+ match_functions.push_back(GetMatchFunction(no_match_sel, layout.GetTypes()[col_idx], predicates[col_idx]));
197
+ }
198
+ }
199
+
200
+ idx_t RowMatcher::Match(DataChunk &lhs, const vector<TupleDataVectorFormat> &lhs_formats, SelectionVector &sel,
201
+ idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
202
+ SelectionVector *no_match_sel, idx_t &no_match_count) {
203
+ D_ASSERT(!match_functions.empty());
204
+ for (idx_t col_idx = 0; col_idx < match_functions.size(); col_idx++) {
205
+ const auto &match_function = match_functions[col_idx];
206
+ count =
207
+ match_function.function(lhs.data[col_idx], lhs_formats[col_idx], sel, count, rhs_layout, rhs_row_locations,
208
+ col_idx, match_function.child_functions, no_match_sel, no_match_count);
209
+ }
210
+ return count;
211
+ }
212
+
213
+ MatchFunction RowMatcher::GetMatchFunction(const bool no_match_sel, const LogicalType &type,
214
+ const ExpressionType predicate) {
215
+ return no_match_sel ? GetMatchFunction<true>(type, predicate) : GetMatchFunction<false>(type, predicate);
216
+ }
217
+
218
+ template <bool NO_MATCH_SEL>
219
+ MatchFunction RowMatcher::GetMatchFunction(const LogicalType &type, const ExpressionType predicate) {
220
+ switch (type.InternalType()) {
221
+ case PhysicalType::BOOL:
222
+ return GetMatchFunction<NO_MATCH_SEL, bool>(predicate);
223
+ case PhysicalType::INT8:
224
+ return GetMatchFunction<NO_MATCH_SEL, int8_t>(predicate);
225
+ case PhysicalType::INT16:
226
+ return GetMatchFunction<NO_MATCH_SEL, int16_t>(predicate);
227
+ case PhysicalType::INT32:
228
+ return GetMatchFunction<NO_MATCH_SEL, int32_t>(predicate);
229
+ case PhysicalType::INT64:
230
+ return GetMatchFunction<NO_MATCH_SEL, int64_t>(predicate);
231
+ case PhysicalType::INT128:
232
+ return GetMatchFunction<NO_MATCH_SEL, hugeint_t>(predicate);
233
+ case PhysicalType::UINT8:
234
+ return GetMatchFunction<NO_MATCH_SEL, uint8_t>(predicate);
235
+ case PhysicalType::UINT16:
236
+ return GetMatchFunction<NO_MATCH_SEL, uint16_t>(predicate);
237
+ case PhysicalType::UINT32:
238
+ return GetMatchFunction<NO_MATCH_SEL, uint32_t>(predicate);
239
+ case PhysicalType::UINT64:
240
+ return GetMatchFunction<NO_MATCH_SEL, uint64_t>(predicate);
241
+ case PhysicalType::FLOAT:
242
+ return GetMatchFunction<NO_MATCH_SEL, float>(predicate);
243
+ case PhysicalType::DOUBLE:
244
+ return GetMatchFunction<NO_MATCH_SEL, double>(predicate);
245
+ case PhysicalType::INTERVAL:
246
+ return GetMatchFunction<NO_MATCH_SEL, interval_t>(predicate);
247
+ case PhysicalType::VARCHAR:
248
+ return GetMatchFunction<NO_MATCH_SEL, string_t>(predicate);
249
+ case PhysicalType::STRUCT:
250
+ return GetStructMatchFunction<NO_MATCH_SEL>(type, predicate);
251
+ case PhysicalType::LIST:
252
+ return GetListMatchFunction<NO_MATCH_SEL>(predicate);
253
+ default:
254
+ throw InternalException("Unsupported PhysicalType for RowMatcher::GetMatchFunction: %s",
255
+ EnumUtil::ToString(type.InternalType()));
256
+ }
257
+ }
258
+
259
+ template <bool NO_MATCH_SEL, class T>
260
+ MatchFunction RowMatcher::GetMatchFunction(const ExpressionType predicate) {
261
+ MatchFunction result;
262
+ switch (predicate) {
263
+ case ExpressionType::COMPARE_EQUAL:
264
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, Equals>;
265
+ break;
266
+ case ExpressionType::COMPARE_NOTEQUAL:
267
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, NotEquals>;
268
+ break;
269
+ case ExpressionType::COMPARE_DISTINCT_FROM:
270
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, DistinctFrom>;
271
+ break;
272
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
273
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, NotDistinctFrom>;
274
+ break;
275
+ case ExpressionType::COMPARE_GREATERTHAN:
276
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThan>;
277
+ break;
278
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
279
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThanEquals>;
280
+ break;
281
+ case ExpressionType::COMPARE_LESSTHAN:
282
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThan>;
283
+ break;
284
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
285
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThanEquals>;
286
+ break;
287
+ default:
288
+ throw InternalException("Unsupported ExpressionType for RowMatcher::GetMatchFunction: %s",
289
+ EnumUtil::ToString(predicate));
290
+ }
291
+ return result;
292
+ }
293
+
294
+ template <bool NO_MATCH_SEL>
295
+ MatchFunction RowMatcher::GetStructMatchFunction(const LogicalType &type, const ExpressionType predicate) {
296
+ // We perform equality conditions like it's just a row, but we cannot perform inequality conditions like a row,
297
+ // because for equality conditions we need to always loop through all columns, but for inequality conditions,
298
+ // we need to find the first inequality, so the loop looks very different
299
+ MatchFunction result;
300
+ ExpressionType child_predicate = predicate;
301
+ switch (predicate) {
302
+ case ExpressionType::COMPARE_EQUAL:
303
+ result.function = StructMatchEquality<NO_MATCH_SEL, Equals>;
304
+ child_predicate = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
305
+ break;
306
+ case ExpressionType::COMPARE_NOTEQUAL:
307
+ result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
308
+ return result;
309
+ case ExpressionType::COMPARE_DISTINCT_FROM:
310
+ result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
311
+ return result;
312
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
313
+ result.function = StructMatchEquality<NO_MATCH_SEL, NotDistinctFrom>;
314
+ break;
315
+ case ExpressionType::COMPARE_GREATERTHAN:
316
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
317
+ return result;
318
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
319
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
320
+ return result;
321
+ case ExpressionType::COMPARE_LESSTHAN:
322
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
323
+ return result;
324
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
325
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
326
+ return result;
327
+ default:
328
+ throw InternalException("Unsupported ExpressionType for RowMatcher::GetStructMatchFunction: %s",
329
+ EnumUtil::ToString(predicate));
330
+ }
331
+
332
+ result.child_functions.reserve(StructType::GetChildCount(type));
333
+ for (const auto &child_type : StructType::GetChildTypes(type)) {
334
+ result.child_functions.push_back(GetMatchFunction<NO_MATCH_SEL>(child_type.second, child_predicate));
335
+ }
336
+
337
+ return result;
338
+ }
339
+
340
+ template <bool NO_MATCH_SEL>
341
+ MatchFunction RowMatcher::GetListMatchFunction(const ExpressionType predicate) {
342
+ MatchFunction result;
343
+ switch (predicate) {
344
+ case ExpressionType::COMPARE_EQUAL:
345
+ result.function = GenericNestedMatch<NO_MATCH_SEL, Equals>;
346
+ break;
347
+ case ExpressionType::COMPARE_NOTEQUAL:
348
+ result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
349
+ break;
350
+ case ExpressionType::COMPARE_DISTINCT_FROM:
351
+ result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
352
+ break;
353
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
354
+ result.function = GenericNestedMatch<NO_MATCH_SEL, NotDistinctFrom>;
355
+ break;
356
+ case ExpressionType::COMPARE_GREATERTHAN:
357
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
358
+ break;
359
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
360
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
361
+ break;
362
+ case ExpressionType::COMPARE_LESSTHAN:
363
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
364
+ break;
365
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
366
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
367
+ break;
368
+ default:
369
+ throw InternalException("Unsupported ExpressionType for RowMatcher::GetListMatchFunction: %s",
370
+ EnumUtil::ToString(predicate));
371
+ }
372
+ return result;
373
+ }
374
+
375
+ } // namespace duckdb
@@ -13,6 +13,10 @@
13
13
  #include "duckdb/common/vector_operations/vector_operations.hpp"
14
14
  #include "duckdb/execution/execution_context.hpp"
15
15
 
16
+ #include "duckdb/common/serializer/memory_stream.hpp"
17
+ #include "duckdb/common/serializer/binary_serializer.hpp"
18
+ #include "duckdb/common/serializer/binary_deserializer.hpp"
19
+
16
20
  namespace duckdb {
17
21
 
18
22
  DataChunk::DataChunk() : count(0), capacity(STANDARD_VECTOR_SIZE) {
@@ -231,16 +235,20 @@ string DataChunk::ToString() const {
231
235
  }
232
236
 
233
237
  void DataChunk::Serialize(Serializer &serializer) const {
238
+
234
239
  // write the count
235
240
  auto row_count = size();
236
241
  serializer.WriteProperty<sel_t>(100, "rows", row_count);
242
+
243
+ // we should never try to serialize empty data chunks
237
244
  auto column_count = ColumnCount();
245
+ D_ASSERT(column_count);
238
246
 
239
- // Write the types
247
+ // write the types
240
248
  serializer.WriteList(101, "types", column_count,
241
249
  [&](Serializer::List &list, idx_t i) { list.WriteElement(data[i].GetType()); });
242
250
 
243
- // Write the data
251
+ // write the data
244
252
  serializer.WriteList(102, "columns", column_count, [&](Serializer::List &list, idx_t i) {
245
253
  list.WriteObject([&](Serializer &object) {
246
254
  // Reference the vector to avoid potentially mutating it during serialization
@@ -252,21 +260,23 @@ void DataChunk::Serialize(Serializer &serializer) const {
252
260
  }
253
261
 
254
262
  void DataChunk::Deserialize(Deserializer &deserializer) {
255
- // read the count
263
+
264
+ // read and set the row count
256
265
  auto row_count = deserializer.ReadProperty<sel_t>(100, "rows");
266
+ SetCardinality(row_count);
257
267
 
258
- // Read the types
268
+ // read the types
259
269
  vector<LogicalType> types;
260
270
  deserializer.ReadList(101, "types", [&](Deserializer::List &list, idx_t i) {
261
271
  auto type = list.ReadElement<LogicalType>();
262
272
  types.push_back(type);
263
273
  });
264
- Initialize(Allocator::DefaultAllocator(), types);
265
274
 
266
- // now load the column data
267
- SetCardinality(row_count);
275
+ // initialize the data chunk
276
+ D_ASSERT(!types.empty());
277
+ Initialize(Allocator::DefaultAllocator(), types);
268
278
 
269
- // Read the data
279
+ // read the data
270
280
  deserializer.ReadList(102, "columns", [&](Deserializer::List &list, idx_t i) {
271
281
  list.ReadObject([&](Deserializer &object) { data[i].Deserialize(object, row_count); });
272
282
  });
@@ -296,11 +306,11 @@ void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count_
296
306
  }
297
307
 
298
308
  unsafe_unique_array<UnifiedVectorFormat> DataChunk::ToUnifiedFormat() {
299
- auto orrified_data = make_unsafe_uniq_array<UnifiedVectorFormat>(ColumnCount());
309
+ auto unified_data = make_unsafe_uniq_array<UnifiedVectorFormat>(ColumnCount());
300
310
  for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) {
301
- data[col_idx].ToUnifiedFormat(size(), orrified_data[col_idx]);
311
+ data[col_idx].ToUnifiedFormat(size(), unified_data[col_idx]);
302
312
  }
303
- return orrified_data;
313
+ return unified_data;
304
314
  }
305
315
 
306
316
  void DataChunk::Hash(Vector &result) {
@@ -324,10 +334,37 @@ void DataChunk::Hash(vector<idx_t> &column_ids, Vector &result) {
324
334
  void DataChunk::Verify() {
325
335
  #ifdef DEBUG
326
336
  D_ASSERT(size() <= capacity);
337
+
327
338
  // verify that all vectors in this chunk have the chunk selection vector
328
339
  for (idx_t i = 0; i < ColumnCount(); i++) {
329
340
  data[i].Verify(size());
330
341
  }
342
+
343
+ if (!ColumnCount()) {
344
+ // don't try to round-trip dummy data chunks with no data
345
+ // e.g., these exist in queries like 'SELECT distinct(col0, col1) FROM tbl', where we have groups, but no
346
+ // payload so the payload will be such an empty data chunk
347
+ return;
348
+ }
349
+
350
+ // verify that we can round-trip chunk serialization
351
+ MemoryStream mem_stream;
352
+ BinarySerializer serializer(mem_stream);
353
+
354
+ serializer.Begin();
355
+ Serialize(serializer);
356
+ serializer.End();
357
+
358
+ mem_stream.Rewind();
359
+
360
+ BinaryDeserializer deserializer(mem_stream);
361
+ DataChunk new_chunk;
362
+
363
+ deserializer.Begin();
364
+ new_chunk.Deserialize(deserializer);
365
+ deserializer.End();
366
+
367
+ D_ASSERT(size() == new_chunk.size());
331
368
  #endif
332
369
  }
333
370
 
@@ -294,7 +294,7 @@ static inline void VerifyStrings(const LogicalTypeId type_id, const data_ptr_t r
294
294
  for (idx_t i = 0; i < count; i++) {
295
295
  const auto &row_location = row_locations[offset + i] + base_col_offset;
296
296
  ValidityBytes row_mask(row_location);
297
- if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
297
+ if (row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
298
298
  auto recomputed_string = Load<string_t>(row_location + col_offset);
299
299
  recomputed_string.Verify();
300
300
  }
@@ -328,7 +328,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
328
328
  const auto idx = offset + i;
329
329
  const auto &row_location = row_locations[idx] + base_col_offset;
330
330
  ValidityBytes row_mask(row_location);
331
- if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
331
+ if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
332
332
  continue;
333
333
  }
334
334
 
@@ -352,7 +352,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
352
352
  const auto idx = offset + i;
353
353
  const auto &row_location = row_locations[idx] + base_col_offset;
354
354
  ValidityBytes row_mask(row_location);
355
- if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
355
+ if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
356
356
  continue;
357
357
  }
358
358
 
@@ -37,13 +37,17 @@ void TupleDataCollection::Initialize() {
37
37
  }
38
38
  }
39
39
 
40
- void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
41
- column_ids.reserve(layout.ColumnCount());
42
- for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
40
+ void GetAllColumnIDsInternal(vector<column_t> &column_ids, const idx_t column_count) {
41
+ column_ids.reserve(column_count);
42
+ for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
43
43
  column_ids.emplace_back(col_idx);
44
44
  }
45
45
  }
46
46
 
47
+ void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
48
+ GetAllColumnIDsInternal(column_ids, layout.ColumnCount());
49
+ }
50
+
47
51
  const TupleDataLayout &TupleDataCollection::GetLayout() const {
48
52
  return layout;
49
53
  }
@@ -108,7 +112,7 @@ void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, v
108
112
  TupleDataPinProperties properties) {
109
113
  VerifyAppendColumns(layout, column_ids);
110
114
  InitializeAppend(append_state.pin_state, properties);
111
- InitializeAppend(append_state.chunk_state, std::move(column_ids));
115
+ InitializeChunkState(append_state.chunk_state, std::move(column_ids));
112
116
  }
113
117
 
114
118
  void TupleDataCollection::InitializeAppend(TupleDataPinState &pin_state, TupleDataPinProperties properties) {
@@ -130,11 +134,11 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
130
134
  for (const auto &child_entry : child_list) {
131
135
  child_types.emplace_back(child_entry.second);
132
136
  }
133
- InitializeVectorFormat(vector_data[col_idx].child_formats, child_types);
137
+ InitializeVectorFormat(vector_data[col_idx].children, child_types);
134
138
  break;
135
139
  }
136
140
  case PhysicalType::LIST:
137
- InitializeVectorFormat(vector_data[col_idx].child_formats, {ListType::GetChildType(type)});
141
+ InitializeVectorFormat(vector_data[col_idx].children, {ListType::GetChildType(type)});
138
142
  break;
139
143
  default:
140
144
  break;
@@ -142,11 +146,16 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
142
146
  }
143
147
  }
144
148
 
145
- void TupleDataCollection::InitializeAppend(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
149
+ void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
150
+ TupleDataCollection::InitializeChunkState(chunk_state, layout.GetTypes(), std::move(column_ids));
151
+ }
152
+
153
+ void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, const vector<LogicalType> &types,
154
+ vector<column_t> column_ids) {
146
155
  if (column_ids.empty()) {
147
- GetAllColumnIDs(column_ids);
156
+ GetAllColumnIDsInternal(column_ids, types.size());
148
157
  }
149
- InitializeVectorFormat(chunk_state.vector_data, layout.GetTypes());
158
+ InitializeVectorFormat(chunk_state.vector_data, types);
150
159
  chunk_state.column_ids = std::move(column_ids);
151
160
  }
152
161
 
@@ -211,21 +220,23 @@ void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataC
211
220
  }
212
221
 
213
222
  static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
214
- vector.ToUnifiedFormat(count, format.data);
215
- format.original_sel = format.data.sel;
216
- format.original_owned_sel.Initialize(format.data.owned_sel);
223
+ vector.ToUnifiedFormat(count, format.unified);
224
+ format.original_sel = format.unified.sel;
225
+ format.original_owned_sel.Initialize(format.unified.owned_sel);
217
226
  switch (vector.GetType().InternalType()) {
218
227
  case PhysicalType::STRUCT: {
219
228
  auto &entries = StructVector::GetEntries(vector);
220
- D_ASSERT(format.child_formats.size() == entries.size());
229
+ D_ASSERT(format.children.size() == entries.size());
221
230
  for (idx_t struct_col_idx = 0; struct_col_idx < entries.size(); struct_col_idx++) {
222
- ToUnifiedFormatInternal(format.child_formats[struct_col_idx], *entries[struct_col_idx], count);
231
+ ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[struct_col_idx]),
232
+ *entries[struct_col_idx], count);
223
233
  }
224
234
  break;
225
235
  }
226
236
  case PhysicalType::LIST:
227
- D_ASSERT(format.child_formats.size() == 1);
228
- ToUnifiedFormatInternal(format.child_formats[0], ListVector::GetEntry(vector), ListVector::GetListSize(vector));
237
+ D_ASSERT(format.children.size() == 1);
238
+ ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[0]),
239
+ ListVector::GetEntry(vector), ListVector::GetListSize(vector));
229
240
  break;
230
241
  default:
231
242
  break;
@@ -242,7 +253,7 @@ void TupleDataCollection::ToUnifiedFormat(TupleDataChunkState &chunk_state, Data
242
253
  void TupleDataCollection::GetVectorData(const TupleDataChunkState &chunk_state, UnifiedVectorFormat result[]) {
243
254
  const auto &vector_data = chunk_state.vector_data;
244
255
  for (idx_t i = 0; i < vector_data.size(); i++) {
245
- const auto &source = vector_data[i].data;
256
+ const auto &source = vector_data[i].unified;
246
257
  auto &target = result[i];
247
258
  target.sel = source.sel;
248
259
  target.data = source.data;