duckdb 0.8.2-dev4653.0 → 0.8.2-dev4871.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/binding.gyp +0 -1
  2. package/binding.gyp.in +0 -1
  3. package/package.json +1 -1
  4. package/src/connection.cpp +10 -23
  5. package/src/data_chunk.cpp +1 -3
  6. package/src/database.cpp +4 -9
  7. package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
  9. package/src/duckdb/extension/json/json_functions.cpp +4 -6
  10. package/src/duckdb/src/common/enum_util.cpp +10 -5
  11. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  12. package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
  13. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
  14. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +35 -17
  15. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
  16. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
  17. package/src/duckdb/src/core_functions/function_list.cpp +1 -1
  18. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
  19. package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
  20. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
  21. package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
  22. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
  23. package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
  24. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
  25. package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +5 -1
  26. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +18 -9
  27. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +11 -27
  28. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +1 -2
  29. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +4 -0
  30. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +11 -2
  31. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +8 -8
  32. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +7 -6
  33. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +27 -6
  34. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
  35. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
  36. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -41
  37. package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
  38. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
  39. package/src/duckdb/src/function/function_binder.cpp +10 -9
  40. package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
  41. package/src/duckdb/src/function/table/read_csv.cpp +12 -9
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  43. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
  44. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
  45. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +8 -2
  46. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
  47. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  48. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
  49. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
  50. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
  51. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +4 -0
  52. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +1 -1
  53. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +4 -0
  54. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +2 -4
  55. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +3 -1
  56. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +1 -0
  58. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +1 -2
  59. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +3 -0
  60. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
  61. package/src/duckdb/src/main/config.cpp +1 -1
  62. package/src/duckdb/src/main/query_result.cpp +16 -10
  63. package/src/duckdb/src/main/relation.cpp +10 -0
  64. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
  65. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
  66. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
  67. package/src/duckdb/src/storage/data_table.cpp +10 -0
  68. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +42 -44
  69. package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
  70. package/src/statement.cpp +2 -4
  71. package/test/database_fail.test.ts +6 -0
  72. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -0,0 +1,408 @@
1
+ #include "duckdb/common/row_operations/row_matcher.hpp"
2
+
3
+ #include "duckdb/common/enum_util.hpp"
4
+ #include "duckdb/common/exception.hpp"
5
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ using ValidityBytes = TupleDataLayout::ValidityBytes;
10
+
11
+ template <class OP>
12
+ struct RowMatchOperator {
13
+ static constexpr const bool COMPARE_NULL = false;
14
+
15
+ template <class T>
16
+ static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
17
+ if (right_null || left_null) {
18
+ return false;
19
+ }
20
+ return OP::template Operation<T>(left, right);
21
+ }
22
+ };
23
+
24
+ template <>
25
+ struct RowMatchOperator<DistinctFrom> {
26
+ static constexpr const bool COMPARE_NULL = true;
27
+
28
+ template <class T>
29
+ static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
30
+ return DistinctFrom::template Operation<T>(left, right, left_null, right_null);
31
+ }
32
+ };
33
+
34
+ template <>
35
+ struct RowMatchOperator<NotDistinctFrom> {
36
+ static constexpr const bool COMPARE_NULL = true;
37
+
38
+ template <class T>
39
+ static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
40
+ return NotDistinctFrom::template Operation<T>(left, right, left_null, right_null);
41
+ }
42
+ };
43
+
44
+ template <bool NO_MATCH_SEL, class T, class OP>
45
+ static idx_t TemplatedMatch(Vector &, const TupleDataVectorFormat &lhs_format, SelectionVector &sel, const idx_t count,
46
+ const TupleDataLayout &rhs_layout, Vector &rhs_row_locations, const idx_t col_idx,
47
+ const vector<MatchFunction> &, SelectionVector *no_match_sel, idx_t &no_match_count) {
48
+ using MATCH_OP = RowMatchOperator<OP>;
49
+
50
+ // LHS
51
+ const auto &lhs_sel = *lhs_format.unified.sel;
52
+ const auto lhs_data = UnifiedVectorFormat::GetData<T>(lhs_format.unified);
53
+ const auto &lhs_validity = lhs_format.unified.validity;
54
+
55
+ // RHS
56
+ const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
57
+ const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
58
+ idx_t entry_idx;
59
+ idx_t idx_in_entry;
60
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
61
+
62
+ idx_t match_count = 0;
63
+ for (idx_t i = 0; i < count; i++) {
64
+ const auto idx = sel.get_index(i);
65
+
66
+ const auto lhs_idx = lhs_sel.get_index(idx);
67
+ const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
68
+
69
+ const auto &rhs_location = rhs_locations[idx];
70
+ const ValidityBytes rhs_mask(rhs_location);
71
+ const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
72
+
73
+ if (MATCH_OP::template Operation<T>(lhs_data[lhs_idx], Load<T>(rhs_location + rhs_offset_in_row), lhs_null,
74
+ rhs_null)) {
75
+ sel.set_index(match_count++, idx);
76
+ } else if (NO_MATCH_SEL) {
77
+ no_match_sel->set_index(no_match_count++, idx);
78
+ }
79
+ }
80
+ return match_count;
81
+ }
82
+
83
+ template <bool NO_MATCH_SEL, class OP>
84
+ static idx_t StructMatchEquality(Vector &lhs_vector, const TupleDataVectorFormat &lhs_format, SelectionVector &sel,
85
+ const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
86
+ const idx_t col_idx, const vector<MatchFunction> &child_functions,
87
+ SelectionVector *no_match_sel, idx_t &no_match_count) {
88
+ using MATCH_OP = RowMatchOperator<OP>;
89
+
90
+ // LHS
91
+ const auto &lhs_sel = *lhs_format.unified.sel;
92
+ const auto &lhs_validity = lhs_format.unified.validity;
93
+
94
+ // RHS
95
+ const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
96
+ idx_t entry_idx;
97
+ idx_t idx_in_entry;
98
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
99
+
100
+ idx_t match_count = 0;
101
+ for (idx_t i = 0; i < count; i++) {
102
+ const auto idx = sel.get_index(i);
103
+
104
+ const auto lhs_idx = lhs_sel.get_index(idx);
105
+ const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
106
+
107
+ const auto &rhs_location = rhs_locations[idx];
108
+ const ValidityBytes rhs_mask(rhs_location);
109
+ const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
110
+
111
+ // For structs there is no value to compare, here we match NULLs and let recursion do the rest
112
+ // So we use the comparison only if rhs or LHS is NULL and COMPARE_NULL is true
113
+ if (!(lhs_null || rhs_null) ||
114
+ (MATCH_OP::COMPARE_NULL && MATCH_OP::template Operation<uint32_t>(0, 0, lhs_null, rhs_null))) {
115
+ sel.set_index(match_count++, idx);
116
+ } else if (NO_MATCH_SEL) {
117
+ no_match_sel->set_index(no_match_count++, idx);
118
+ }
119
+ }
120
+
121
+ // Create a Vector of pointers to the start of the TupleDataLayout of the STRUCT
122
+ Vector rhs_struct_row_locations(LogicalType::POINTER);
123
+ const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
124
+ auto rhs_struct_locations = FlatVector::GetData<data_ptr_t>(rhs_struct_row_locations);
125
+ for (idx_t i = 0; i < match_count; i++) {
126
+ const auto idx = sel.get_index(i);
127
+ rhs_struct_locations[idx] = rhs_locations[idx] + rhs_offset_in_row;
128
+ }
129
+
130
+ // Get the struct layout and struct entries
131
+ const auto &rhs_struct_layout = rhs_layout.GetStructLayout(col_idx);
132
+ auto &lhs_struct_vectors = StructVector::GetEntries(lhs_vector);
133
+ D_ASSERT(rhs_struct_layout.ColumnCount() == lhs_struct_vectors.size());
134
+
135
+ for (idx_t struct_col_idx = 0; struct_col_idx < rhs_struct_layout.ColumnCount(); struct_col_idx++) {
136
+ auto &lhs_struct_vector = *lhs_struct_vectors[struct_col_idx];
137
+ auto &lhs_struct_format = lhs_format.children[struct_col_idx];
138
+ const auto &child_function = child_functions[struct_col_idx];
139
+ match_count = child_function.function(lhs_struct_vector, lhs_struct_format, sel, match_count, rhs_struct_layout,
140
+ rhs_struct_row_locations, struct_col_idx, child_function.child_functions,
141
+ no_match_sel, no_match_count);
142
+ }
143
+
144
+ return match_count;
145
+ }
146
+
147
+ template <typename OP>
148
+ static idx_t SelectComparison(Vector &, Vector &, const SelectionVector &, idx_t, SelectionVector *,
149
+ SelectionVector *) {
150
+ throw NotImplementedException("Unsupported list comparison operand for RowMatcher::GetMatchFunction");
151
+ }
152
+
153
+ template <>
154
+ idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
155
+ SelectionVector *true_sel, SelectionVector *false_sel) {
156
+ return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel);
157
+ }
158
+
159
+ template <>
160
+ idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
161
+ SelectionVector *true_sel, SelectionVector *false_sel) {
162
+ return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel);
163
+ }
164
+
165
+ template <>
166
+ idx_t SelectComparison<DistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
167
+ SelectionVector *true_sel, SelectionVector *false_sel) {
168
+ return VectorOperations::DistinctFrom(left, right, &sel, count, true_sel, false_sel);
169
+ }
170
+
171
+ template <>
172
+ idx_t SelectComparison<NotDistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
173
+ SelectionVector *true_sel, SelectionVector *false_sel) {
174
+ return VectorOperations::NotDistinctFrom(left, right, &sel, count, true_sel, false_sel);
175
+ }
176
+
177
+ template <>
178
+ idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
179
+ SelectionVector *true_sel, SelectionVector *false_sel) {
180
+ return VectorOperations::DistinctGreaterThan(left, right, &sel, count, true_sel, false_sel);
181
+ }
182
+
183
+ template <>
184
+ idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
185
+ SelectionVector *true_sel, SelectionVector *false_sel) {
186
+ return VectorOperations::DistinctGreaterThanEquals(left, right, &sel, count, true_sel, false_sel);
187
+ }
188
+
189
+ template <>
190
+ idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
191
+ SelectionVector *true_sel, SelectionVector *false_sel) {
192
+ return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
193
+ }
194
+
195
+ template <>
196
+ idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
197
+ SelectionVector *true_sel, SelectionVector *false_sel) {
198
+ return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
199
+ }
200
+
201
+ template <bool NO_MATCH_SEL, class OP>
202
+ static idx_t GenericNestedMatch(Vector &lhs_vector, const TupleDataVectorFormat &, SelectionVector &sel,
203
+ const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
204
+ const idx_t col_idx, const vector<MatchFunction> &, SelectionVector *no_match_sel,
205
+ idx_t &no_match_count) {
206
+ const auto &type = rhs_layout.GetTypes()[col_idx];
207
+
208
+ // Gather a dense Vector containing the column values being matched
209
+ Vector key(type);
210
+ const auto gather_function = TupleDataCollection::GetGatherFunction(type);
211
+ gather_function.function(rhs_layout, rhs_row_locations, col_idx, sel, count, key,
212
+ *FlatVector::IncrementalSelectionVector(), key, gather_function.child_functions);
213
+
214
+ // Densify the input column
215
+ Vector sliced(lhs_vector, sel, count);
216
+
217
+ if (NO_MATCH_SEL) {
218
+ SelectionVector no_match_sel_offset(no_match_sel->data() + no_match_count);
219
+ auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset);
220
+ no_match_count += count - match_count;
221
+ return match_count;
222
+ }
223
+ return SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr);
224
+ }
225
+
226
+ void RowMatcher::Initialize(const bool no_match_sel, const TupleDataLayout &layout, const Predicates &predicates) {
227
+ match_functions.reserve(predicates.size());
228
+ for (idx_t col_idx = 0; col_idx < predicates.size(); col_idx++) {
229
+ match_functions.push_back(GetMatchFunction(no_match_sel, layout.GetTypes()[col_idx], predicates[col_idx]));
230
+ }
231
+ }
232
+
233
+ idx_t RowMatcher::Match(DataChunk &lhs, const vector<TupleDataVectorFormat> &lhs_formats, SelectionVector &sel,
234
+ idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
235
+ SelectionVector *no_match_sel, idx_t &no_match_count) {
236
+ D_ASSERT(!match_functions.empty());
237
+ for (idx_t col_idx = 0; col_idx < match_functions.size(); col_idx++) {
238
+ const auto &match_function = match_functions[col_idx];
239
+ count =
240
+ match_function.function(lhs.data[col_idx], lhs_formats[col_idx], sel, count, rhs_layout, rhs_row_locations,
241
+ col_idx, match_function.child_functions, no_match_sel, no_match_count);
242
+ }
243
+ return count;
244
+ }
245
+
246
+ MatchFunction RowMatcher::GetMatchFunction(const bool no_match_sel, const LogicalType &type,
247
+ const ExpressionType predicate) {
248
+ return no_match_sel ? GetMatchFunction<true>(type, predicate) : GetMatchFunction<false>(type, predicate);
249
+ }
250
+
251
+ template <bool NO_MATCH_SEL>
252
+ MatchFunction RowMatcher::GetMatchFunction(const LogicalType &type, const ExpressionType predicate) {
253
+ switch (type.InternalType()) {
254
+ case PhysicalType::BOOL:
255
+ return GetMatchFunction<NO_MATCH_SEL, bool>(predicate);
256
+ case PhysicalType::INT8:
257
+ return GetMatchFunction<NO_MATCH_SEL, int8_t>(predicate);
258
+ case PhysicalType::INT16:
259
+ return GetMatchFunction<NO_MATCH_SEL, int16_t>(predicate);
260
+ case PhysicalType::INT32:
261
+ return GetMatchFunction<NO_MATCH_SEL, int32_t>(predicate);
262
+ case PhysicalType::INT64:
263
+ return GetMatchFunction<NO_MATCH_SEL, int64_t>(predicate);
264
+ case PhysicalType::INT128:
265
+ return GetMatchFunction<NO_MATCH_SEL, hugeint_t>(predicate);
266
+ case PhysicalType::UINT8:
267
+ return GetMatchFunction<NO_MATCH_SEL, uint8_t>(predicate);
268
+ case PhysicalType::UINT16:
269
+ return GetMatchFunction<NO_MATCH_SEL, uint16_t>(predicate);
270
+ case PhysicalType::UINT32:
271
+ return GetMatchFunction<NO_MATCH_SEL, uint32_t>(predicate);
272
+ case PhysicalType::UINT64:
273
+ return GetMatchFunction<NO_MATCH_SEL, uint64_t>(predicate);
274
+ case PhysicalType::FLOAT:
275
+ return GetMatchFunction<NO_MATCH_SEL, float>(predicate);
276
+ case PhysicalType::DOUBLE:
277
+ return GetMatchFunction<NO_MATCH_SEL, double>(predicate);
278
+ case PhysicalType::INTERVAL:
279
+ return GetMatchFunction<NO_MATCH_SEL, interval_t>(predicate);
280
+ case PhysicalType::VARCHAR:
281
+ return GetMatchFunction<NO_MATCH_SEL, string_t>(predicate);
282
+ case PhysicalType::STRUCT:
283
+ return GetStructMatchFunction<NO_MATCH_SEL>(type, predicate);
284
+ case PhysicalType::LIST:
285
+ return GetListMatchFunction<NO_MATCH_SEL>(predicate);
286
+ default:
287
+ throw InternalException("Unsupported PhysicalType for RowMatcher::GetMatchFunction: %s",
288
+ EnumUtil::ToString(type.InternalType()));
289
+ }
290
+ }
291
+
292
+ template <bool NO_MATCH_SEL, class T>
293
+ MatchFunction RowMatcher::GetMatchFunction(const ExpressionType predicate) {
294
+ MatchFunction result;
295
+ switch (predicate) {
296
+ case ExpressionType::COMPARE_EQUAL:
297
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, Equals>;
298
+ break;
299
+ case ExpressionType::COMPARE_NOTEQUAL:
300
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, NotEquals>;
301
+ break;
302
+ case ExpressionType::COMPARE_DISTINCT_FROM:
303
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, DistinctFrom>;
304
+ break;
305
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
306
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, NotDistinctFrom>;
307
+ break;
308
+ case ExpressionType::COMPARE_GREATERTHAN:
309
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThan>;
310
+ break;
311
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
312
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThanEquals>;
313
+ break;
314
+ case ExpressionType::COMPARE_LESSTHAN:
315
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThan>;
316
+ break;
317
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
318
+ result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThanEquals>;
319
+ break;
320
+ default:
321
+ throw InternalException("Unsupported ExpressionType for RowMatcher::GetMatchFunction: %s",
322
+ EnumUtil::ToString(predicate));
323
+ }
324
+ return result;
325
+ }
326
+
327
+ template <bool NO_MATCH_SEL>
328
+ MatchFunction RowMatcher::GetStructMatchFunction(const LogicalType &type, const ExpressionType predicate) {
329
+ // We perform equality conditions like it's just a row, but we cannot perform inequality conditions like a row,
330
+ // because for equality conditions we need to always loop through all columns, but for inequality conditions,
331
+ // we need to find the first inequality, so the loop looks very different
332
+ MatchFunction result;
333
+ ExpressionType child_predicate = predicate;
334
+ switch (predicate) {
335
+ case ExpressionType::COMPARE_EQUAL:
336
+ result.function = StructMatchEquality<NO_MATCH_SEL, Equals>;
337
+ child_predicate = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
338
+ break;
339
+ case ExpressionType::COMPARE_NOTEQUAL:
340
+ result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
341
+ return result;
342
+ case ExpressionType::COMPARE_DISTINCT_FROM:
343
+ result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
344
+ return result;
345
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
346
+ result.function = StructMatchEquality<NO_MATCH_SEL, NotDistinctFrom>;
347
+ break;
348
+ case ExpressionType::COMPARE_GREATERTHAN:
349
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
350
+ return result;
351
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
352
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
353
+ return result;
354
+ case ExpressionType::COMPARE_LESSTHAN:
355
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
356
+ return result;
357
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
358
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
359
+ return result;
360
+ default:
361
+ throw InternalException("Unsupported ExpressionType for RowMatcher::GetStructMatchFunction: %s",
362
+ EnumUtil::ToString(predicate));
363
+ }
364
+
365
+ result.child_functions.reserve(StructType::GetChildCount(type));
366
+ for (const auto &child_type : StructType::GetChildTypes(type)) {
367
+ result.child_functions.push_back(GetMatchFunction<NO_MATCH_SEL>(child_type.second, child_predicate));
368
+ }
369
+
370
+ return result;
371
+ }
372
+
373
+ template <bool NO_MATCH_SEL>
374
+ MatchFunction RowMatcher::GetListMatchFunction(const ExpressionType predicate) {
375
+ MatchFunction result;
376
+ switch (predicate) {
377
+ case ExpressionType::COMPARE_EQUAL:
378
+ result.function = GenericNestedMatch<NO_MATCH_SEL, Equals>;
379
+ break;
380
+ case ExpressionType::COMPARE_NOTEQUAL:
381
+ result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
382
+ break;
383
+ case ExpressionType::COMPARE_DISTINCT_FROM:
384
+ result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
385
+ break;
386
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
387
+ result.function = GenericNestedMatch<NO_MATCH_SEL, NotDistinctFrom>;
388
+ break;
389
+ case ExpressionType::COMPARE_GREATERTHAN:
390
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
391
+ break;
392
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
393
+ result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
394
+ break;
395
+ case ExpressionType::COMPARE_LESSTHAN:
396
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
397
+ break;
398
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
399
+ result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
400
+ break;
401
+ default:
402
+ throw InternalException("Unsupported ExpressionType for RowMatcher::GetListMatchFunction: %s",
403
+ EnumUtil::ToString(predicate));
404
+ }
405
+ return result;
406
+ }
407
+
408
+ } // namespace duckdb
@@ -294,7 +294,7 @@ static inline void VerifyStrings(const LogicalTypeId type_id, const data_ptr_t r
294
294
  for (idx_t i = 0; i < count; i++) {
295
295
  const auto &row_location = row_locations[offset + i] + base_col_offset;
296
296
  ValidityBytes row_mask(row_location);
297
- if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
297
+ if (row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
298
298
  auto recomputed_string = Load<string_t>(row_location + col_offset);
299
299
  recomputed_string.Verify();
300
300
  }
@@ -328,7 +328,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
328
328
  const auto idx = offset + i;
329
329
  const auto &row_location = row_locations[idx] + base_col_offset;
330
330
  ValidityBytes row_mask(row_location);
331
- if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
331
+ if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
332
332
  continue;
333
333
  }
334
334
 
@@ -352,7 +352,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
352
352
  const auto idx = offset + i;
353
353
  const auto &row_location = row_locations[idx] + base_col_offset;
354
354
  ValidityBytes row_mask(row_location);
355
- if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
355
+ if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
356
356
  continue;
357
357
  }
358
358
 
@@ -37,13 +37,17 @@ void TupleDataCollection::Initialize() {
37
37
  }
38
38
  }
39
39
 
40
- void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
41
- column_ids.reserve(layout.ColumnCount());
42
- for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
40
+ void GetAllColumnIDsInternal(vector<column_t> &column_ids, const idx_t column_count) {
41
+ column_ids.reserve(column_count);
42
+ for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
43
43
  column_ids.emplace_back(col_idx);
44
44
  }
45
45
  }
46
46
 
47
+ void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
48
+ GetAllColumnIDsInternal(column_ids, layout.ColumnCount());
49
+ }
50
+
47
51
  const TupleDataLayout &TupleDataCollection::GetLayout() const {
48
52
  return layout;
49
53
  }
@@ -108,7 +112,7 @@ void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, v
108
112
  TupleDataPinProperties properties) {
109
113
  VerifyAppendColumns(layout, column_ids);
110
114
  InitializeAppend(append_state.pin_state, properties);
111
- InitializeAppend(append_state.chunk_state, std::move(column_ids));
115
+ InitializeChunkState(append_state.chunk_state, std::move(column_ids));
112
116
  }
113
117
 
114
118
  void TupleDataCollection::InitializeAppend(TupleDataPinState &pin_state, TupleDataPinProperties properties) {
@@ -130,11 +134,11 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
130
134
  for (const auto &child_entry : child_list) {
131
135
  child_types.emplace_back(child_entry.second);
132
136
  }
133
- InitializeVectorFormat(vector_data[col_idx].child_formats, child_types);
137
+ InitializeVectorFormat(vector_data[col_idx].children, child_types);
134
138
  break;
135
139
  }
136
140
  case PhysicalType::LIST:
137
- InitializeVectorFormat(vector_data[col_idx].child_formats, {ListType::GetChildType(type)});
141
+ InitializeVectorFormat(vector_data[col_idx].children, {ListType::GetChildType(type)});
138
142
  break;
139
143
  default:
140
144
  break;
@@ -142,11 +146,16 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
142
146
  }
143
147
  }
144
148
 
145
- void TupleDataCollection::InitializeAppend(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
149
+ void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
150
+ TupleDataCollection::InitializeChunkState(chunk_state, layout.GetTypes(), std::move(column_ids));
151
+ }
152
+
153
+ void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, const vector<LogicalType> &types,
154
+ vector<column_t> column_ids) {
146
155
  if (column_ids.empty()) {
147
- GetAllColumnIDs(column_ids);
156
+ GetAllColumnIDsInternal(column_ids, types.size());
148
157
  }
149
- InitializeVectorFormat(chunk_state.vector_data, layout.GetTypes());
158
+ InitializeVectorFormat(chunk_state.vector_data, types);
150
159
  chunk_state.column_ids = std::move(column_ids);
151
160
  }
152
161
 
@@ -211,21 +220,23 @@ void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataC
211
220
  }
212
221
 
213
222
  static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
214
- vector.ToUnifiedFormat(count, format.data);
215
- format.original_sel = format.data.sel;
216
- format.original_owned_sel.Initialize(format.data.owned_sel);
223
+ vector.ToUnifiedFormat(count, format.unified);
224
+ format.original_sel = format.unified.sel;
225
+ format.original_owned_sel.Initialize(format.unified.owned_sel);
217
226
  switch (vector.GetType().InternalType()) {
218
227
  case PhysicalType::STRUCT: {
219
228
  auto &entries = StructVector::GetEntries(vector);
220
- D_ASSERT(format.child_formats.size() == entries.size());
229
+ D_ASSERT(format.children.size() == entries.size());
221
230
  for (idx_t struct_col_idx = 0; struct_col_idx < entries.size(); struct_col_idx++) {
222
- ToUnifiedFormatInternal(format.child_formats[struct_col_idx], *entries[struct_col_idx], count);
231
+ ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[struct_col_idx]),
232
+ *entries[struct_col_idx], count);
223
233
  }
224
234
  break;
225
235
  }
226
236
  case PhysicalType::LIST:
227
- D_ASSERT(format.child_formats.size() == 1);
228
- ToUnifiedFormatInternal(format.child_formats[0], ListVector::GetEntry(vector), ListVector::GetListSize(vector));
237
+ D_ASSERT(format.children.size() == 1);
238
+ ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[0]),
239
+ ListVector::GetEntry(vector), ListVector::GetListSize(vector));
229
240
  break;
230
241
  default:
231
242
  break;
@@ -242,7 +253,7 @@ void TupleDataCollection::ToUnifiedFormat(TupleDataChunkState &chunk_state, Data
242
253
  void TupleDataCollection::GetVectorData(const TupleDataChunkState &chunk_state, UnifiedVectorFormat result[]) {
243
254
  const auto &vector_data = chunk_state.vector_data;
244
255
  for (idx_t i = 0; i < vector_data.size(); i++) {
245
- const auto &source = vector_data[i].data;
256
+ const auto &source = vector_data[i].unified;
246
257
  auto &target = result[i];
247
258
  target.sel = source.sel;
248
259
  target.data = source.data;
@@ -433,6 +444,13 @@ bool TupleDataCollection::Scan(TupleDataParallelScanState &gstate, TupleDataLoca
433
444
  return true;
434
445
  }
435
446
 
447
+ bool TupleDataCollection::ScanComplete(const TupleDataScanState &state) const {
448
+ if (Count() == 0) {
449
+ return true;
450
+ }
451
+ return state.segment_index == segments.size() - 1 && state.chunk_index == segments.back().ChunkCount();
452
+ }
453
+
436
454
  void TupleDataCollection::FinalizePinState(TupleDataPinState &pin_state, TupleDataSegment &segment) {
437
455
  segment.allocator->ReleaseOrStoreHandles(pin_state, segment);
438
456
  }