duckdb 0.8.2-dev4711.0 → 0.8.2-dev4871.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +0 -1
- package/binding.gyp.in +0 -1
- package/package.json +1 -1
- package/src/connection.cpp +10 -23
- package/src/data_chunk.cpp +1 -3
- package/src/database.cpp +4 -9
- package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +4 -6
- package/src/duckdb/src/common/enum_util.cpp +10 -5
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
- package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
- package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
- package/src/duckdb/src/function/function_binder.cpp +10 -9
- package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
- package/src/duckdb/src/main/config.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +10 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
- package/src/duckdb/src/storage/data_table.cpp +10 -0
- package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
- package/src/statement.cpp +2 -4
- package/test/database_fail.test.ts +6 -0
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -0,0 +1,408 @@
|
|
1
|
+
#include "duckdb/common/row_operations/row_matcher.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/enum_util.hpp"
|
4
|
+
#include "duckdb/common/exception.hpp"
|
5
|
+
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
using ValidityBytes = TupleDataLayout::ValidityBytes;
|
10
|
+
|
11
|
+
template <class OP>
|
12
|
+
struct RowMatchOperator {
|
13
|
+
static constexpr const bool COMPARE_NULL = false;
|
14
|
+
|
15
|
+
template <class T>
|
16
|
+
static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
|
17
|
+
if (right_null || left_null) {
|
18
|
+
return false;
|
19
|
+
}
|
20
|
+
return OP::template Operation<T>(left, right);
|
21
|
+
}
|
22
|
+
};
|
23
|
+
|
24
|
+
template <>
|
25
|
+
struct RowMatchOperator<DistinctFrom> {
|
26
|
+
static constexpr const bool COMPARE_NULL = true;
|
27
|
+
|
28
|
+
template <class T>
|
29
|
+
static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
|
30
|
+
return DistinctFrom::template Operation<T>(left, right, left_null, right_null);
|
31
|
+
}
|
32
|
+
};
|
33
|
+
|
34
|
+
template <>
|
35
|
+
struct RowMatchOperator<NotDistinctFrom> {
|
36
|
+
static constexpr const bool COMPARE_NULL = true;
|
37
|
+
|
38
|
+
template <class T>
|
39
|
+
static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
|
40
|
+
return NotDistinctFrom::template Operation<T>(left, right, left_null, right_null);
|
41
|
+
}
|
42
|
+
};
|
43
|
+
|
44
|
+
template <bool NO_MATCH_SEL, class T, class OP>
|
45
|
+
static idx_t TemplatedMatch(Vector &, const TupleDataVectorFormat &lhs_format, SelectionVector &sel, const idx_t count,
|
46
|
+
const TupleDataLayout &rhs_layout, Vector &rhs_row_locations, const idx_t col_idx,
|
47
|
+
const vector<MatchFunction> &, SelectionVector *no_match_sel, idx_t &no_match_count) {
|
48
|
+
using MATCH_OP = RowMatchOperator<OP>;
|
49
|
+
|
50
|
+
// LHS
|
51
|
+
const auto &lhs_sel = *lhs_format.unified.sel;
|
52
|
+
const auto lhs_data = UnifiedVectorFormat::GetData<T>(lhs_format.unified);
|
53
|
+
const auto &lhs_validity = lhs_format.unified.validity;
|
54
|
+
|
55
|
+
// RHS
|
56
|
+
const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
|
57
|
+
const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
|
58
|
+
idx_t entry_idx;
|
59
|
+
idx_t idx_in_entry;
|
60
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
61
|
+
|
62
|
+
idx_t match_count = 0;
|
63
|
+
for (idx_t i = 0; i < count; i++) {
|
64
|
+
const auto idx = sel.get_index(i);
|
65
|
+
|
66
|
+
const auto lhs_idx = lhs_sel.get_index(idx);
|
67
|
+
const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
|
68
|
+
|
69
|
+
const auto &rhs_location = rhs_locations[idx];
|
70
|
+
const ValidityBytes rhs_mask(rhs_location);
|
71
|
+
const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
|
72
|
+
|
73
|
+
if (MATCH_OP::template Operation<T>(lhs_data[lhs_idx], Load<T>(rhs_location + rhs_offset_in_row), lhs_null,
|
74
|
+
rhs_null)) {
|
75
|
+
sel.set_index(match_count++, idx);
|
76
|
+
} else if (NO_MATCH_SEL) {
|
77
|
+
no_match_sel->set_index(no_match_count++, idx);
|
78
|
+
}
|
79
|
+
}
|
80
|
+
return match_count;
|
81
|
+
}
|
82
|
+
|
83
|
+
template <bool NO_MATCH_SEL, class OP>
|
84
|
+
static idx_t StructMatchEquality(Vector &lhs_vector, const TupleDataVectorFormat &lhs_format, SelectionVector &sel,
|
85
|
+
const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
|
86
|
+
const idx_t col_idx, const vector<MatchFunction> &child_functions,
|
87
|
+
SelectionVector *no_match_sel, idx_t &no_match_count) {
|
88
|
+
using MATCH_OP = RowMatchOperator<OP>;
|
89
|
+
|
90
|
+
// LHS
|
91
|
+
const auto &lhs_sel = *lhs_format.unified.sel;
|
92
|
+
const auto &lhs_validity = lhs_format.unified.validity;
|
93
|
+
|
94
|
+
// RHS
|
95
|
+
const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
|
96
|
+
idx_t entry_idx;
|
97
|
+
idx_t idx_in_entry;
|
98
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
99
|
+
|
100
|
+
idx_t match_count = 0;
|
101
|
+
for (idx_t i = 0; i < count; i++) {
|
102
|
+
const auto idx = sel.get_index(i);
|
103
|
+
|
104
|
+
const auto lhs_idx = lhs_sel.get_index(idx);
|
105
|
+
const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
|
106
|
+
|
107
|
+
const auto &rhs_location = rhs_locations[idx];
|
108
|
+
const ValidityBytes rhs_mask(rhs_location);
|
109
|
+
const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
|
110
|
+
|
111
|
+
// For structs there is no value to compare, here we match NULLs and let recursion do the rest
|
112
|
+
// So we use the comparison only if rhs or LHS is NULL and COMPARE_NULL is true
|
113
|
+
if (!(lhs_null || rhs_null) ||
|
114
|
+
(MATCH_OP::COMPARE_NULL && MATCH_OP::template Operation<uint32_t>(0, 0, lhs_null, rhs_null))) {
|
115
|
+
sel.set_index(match_count++, idx);
|
116
|
+
} else if (NO_MATCH_SEL) {
|
117
|
+
no_match_sel->set_index(no_match_count++, idx);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
// Create a Vector of pointers to the start of the TupleDataLayout of the STRUCT
|
122
|
+
Vector rhs_struct_row_locations(LogicalType::POINTER);
|
123
|
+
const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
|
124
|
+
auto rhs_struct_locations = FlatVector::GetData<data_ptr_t>(rhs_struct_row_locations);
|
125
|
+
for (idx_t i = 0; i < match_count; i++) {
|
126
|
+
const auto idx = sel.get_index(i);
|
127
|
+
rhs_struct_locations[idx] = rhs_locations[idx] + rhs_offset_in_row;
|
128
|
+
}
|
129
|
+
|
130
|
+
// Get the struct layout and struct entries
|
131
|
+
const auto &rhs_struct_layout = rhs_layout.GetStructLayout(col_idx);
|
132
|
+
auto &lhs_struct_vectors = StructVector::GetEntries(lhs_vector);
|
133
|
+
D_ASSERT(rhs_struct_layout.ColumnCount() == lhs_struct_vectors.size());
|
134
|
+
|
135
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < rhs_struct_layout.ColumnCount(); struct_col_idx++) {
|
136
|
+
auto &lhs_struct_vector = *lhs_struct_vectors[struct_col_idx];
|
137
|
+
auto &lhs_struct_format = lhs_format.children[struct_col_idx];
|
138
|
+
const auto &child_function = child_functions[struct_col_idx];
|
139
|
+
match_count = child_function.function(lhs_struct_vector, lhs_struct_format, sel, match_count, rhs_struct_layout,
|
140
|
+
rhs_struct_row_locations, struct_col_idx, child_function.child_functions,
|
141
|
+
no_match_sel, no_match_count);
|
142
|
+
}
|
143
|
+
|
144
|
+
return match_count;
|
145
|
+
}
|
146
|
+
|
147
|
+
template <typename OP>
|
148
|
+
static idx_t SelectComparison(Vector &, Vector &, const SelectionVector &, idx_t, SelectionVector *,
|
149
|
+
SelectionVector *) {
|
150
|
+
throw NotImplementedException("Unsupported list comparison operand for RowMatcher::GetMatchFunction");
|
151
|
+
}
|
152
|
+
|
153
|
+
template <>
|
154
|
+
idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
155
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
156
|
+
return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel);
|
157
|
+
}
|
158
|
+
|
159
|
+
template <>
|
160
|
+
idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
161
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
162
|
+
return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel);
|
163
|
+
}
|
164
|
+
|
165
|
+
template <>
|
166
|
+
idx_t SelectComparison<DistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
167
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
168
|
+
return VectorOperations::DistinctFrom(left, right, &sel, count, true_sel, false_sel);
|
169
|
+
}
|
170
|
+
|
171
|
+
template <>
|
172
|
+
idx_t SelectComparison<NotDistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
173
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
174
|
+
return VectorOperations::NotDistinctFrom(left, right, &sel, count, true_sel, false_sel);
|
175
|
+
}
|
176
|
+
|
177
|
+
template <>
|
178
|
+
idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
179
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
180
|
+
return VectorOperations::DistinctGreaterThan(left, right, &sel, count, true_sel, false_sel);
|
181
|
+
}
|
182
|
+
|
183
|
+
template <>
|
184
|
+
idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
185
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
186
|
+
return VectorOperations::DistinctGreaterThanEquals(left, right, &sel, count, true_sel, false_sel);
|
187
|
+
}
|
188
|
+
|
189
|
+
template <>
|
190
|
+
idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
191
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
192
|
+
return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
|
193
|
+
}
|
194
|
+
|
195
|
+
template <>
|
196
|
+
idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
197
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
198
|
+
return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
|
199
|
+
}
|
200
|
+
|
201
|
+
template <bool NO_MATCH_SEL, class OP>
|
202
|
+
static idx_t GenericNestedMatch(Vector &lhs_vector, const TupleDataVectorFormat &, SelectionVector &sel,
|
203
|
+
const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
|
204
|
+
const idx_t col_idx, const vector<MatchFunction> &, SelectionVector *no_match_sel,
|
205
|
+
idx_t &no_match_count) {
|
206
|
+
const auto &type = rhs_layout.GetTypes()[col_idx];
|
207
|
+
|
208
|
+
// Gather a dense Vector containing the column values being matched
|
209
|
+
Vector key(type);
|
210
|
+
const auto gather_function = TupleDataCollection::GetGatherFunction(type);
|
211
|
+
gather_function.function(rhs_layout, rhs_row_locations, col_idx, sel, count, key,
|
212
|
+
*FlatVector::IncrementalSelectionVector(), key, gather_function.child_functions);
|
213
|
+
|
214
|
+
// Densify the input column
|
215
|
+
Vector sliced(lhs_vector, sel, count);
|
216
|
+
|
217
|
+
if (NO_MATCH_SEL) {
|
218
|
+
SelectionVector no_match_sel_offset(no_match_sel->data() + no_match_count);
|
219
|
+
auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset);
|
220
|
+
no_match_count += count - match_count;
|
221
|
+
return match_count;
|
222
|
+
}
|
223
|
+
return SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr);
|
224
|
+
}
|
225
|
+
|
226
|
+
void RowMatcher::Initialize(const bool no_match_sel, const TupleDataLayout &layout, const Predicates &predicates) {
|
227
|
+
match_functions.reserve(predicates.size());
|
228
|
+
for (idx_t col_idx = 0; col_idx < predicates.size(); col_idx++) {
|
229
|
+
match_functions.push_back(GetMatchFunction(no_match_sel, layout.GetTypes()[col_idx], predicates[col_idx]));
|
230
|
+
}
|
231
|
+
}
|
232
|
+
|
233
|
+
idx_t RowMatcher::Match(DataChunk &lhs, const vector<TupleDataVectorFormat> &lhs_formats, SelectionVector &sel,
|
234
|
+
idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
|
235
|
+
SelectionVector *no_match_sel, idx_t &no_match_count) {
|
236
|
+
D_ASSERT(!match_functions.empty());
|
237
|
+
for (idx_t col_idx = 0; col_idx < match_functions.size(); col_idx++) {
|
238
|
+
const auto &match_function = match_functions[col_idx];
|
239
|
+
count =
|
240
|
+
match_function.function(lhs.data[col_idx], lhs_formats[col_idx], sel, count, rhs_layout, rhs_row_locations,
|
241
|
+
col_idx, match_function.child_functions, no_match_sel, no_match_count);
|
242
|
+
}
|
243
|
+
return count;
|
244
|
+
}
|
245
|
+
|
246
|
+
MatchFunction RowMatcher::GetMatchFunction(const bool no_match_sel, const LogicalType &type,
|
247
|
+
const ExpressionType predicate) {
|
248
|
+
return no_match_sel ? GetMatchFunction<true>(type, predicate) : GetMatchFunction<false>(type, predicate);
|
249
|
+
}
|
250
|
+
|
251
|
+
template <bool NO_MATCH_SEL>
|
252
|
+
MatchFunction RowMatcher::GetMatchFunction(const LogicalType &type, const ExpressionType predicate) {
|
253
|
+
switch (type.InternalType()) {
|
254
|
+
case PhysicalType::BOOL:
|
255
|
+
return GetMatchFunction<NO_MATCH_SEL, bool>(predicate);
|
256
|
+
case PhysicalType::INT8:
|
257
|
+
return GetMatchFunction<NO_MATCH_SEL, int8_t>(predicate);
|
258
|
+
case PhysicalType::INT16:
|
259
|
+
return GetMatchFunction<NO_MATCH_SEL, int16_t>(predicate);
|
260
|
+
case PhysicalType::INT32:
|
261
|
+
return GetMatchFunction<NO_MATCH_SEL, int32_t>(predicate);
|
262
|
+
case PhysicalType::INT64:
|
263
|
+
return GetMatchFunction<NO_MATCH_SEL, int64_t>(predicate);
|
264
|
+
case PhysicalType::INT128:
|
265
|
+
return GetMatchFunction<NO_MATCH_SEL, hugeint_t>(predicate);
|
266
|
+
case PhysicalType::UINT8:
|
267
|
+
return GetMatchFunction<NO_MATCH_SEL, uint8_t>(predicate);
|
268
|
+
case PhysicalType::UINT16:
|
269
|
+
return GetMatchFunction<NO_MATCH_SEL, uint16_t>(predicate);
|
270
|
+
case PhysicalType::UINT32:
|
271
|
+
return GetMatchFunction<NO_MATCH_SEL, uint32_t>(predicate);
|
272
|
+
case PhysicalType::UINT64:
|
273
|
+
return GetMatchFunction<NO_MATCH_SEL, uint64_t>(predicate);
|
274
|
+
case PhysicalType::FLOAT:
|
275
|
+
return GetMatchFunction<NO_MATCH_SEL, float>(predicate);
|
276
|
+
case PhysicalType::DOUBLE:
|
277
|
+
return GetMatchFunction<NO_MATCH_SEL, double>(predicate);
|
278
|
+
case PhysicalType::INTERVAL:
|
279
|
+
return GetMatchFunction<NO_MATCH_SEL, interval_t>(predicate);
|
280
|
+
case PhysicalType::VARCHAR:
|
281
|
+
return GetMatchFunction<NO_MATCH_SEL, string_t>(predicate);
|
282
|
+
case PhysicalType::STRUCT:
|
283
|
+
return GetStructMatchFunction<NO_MATCH_SEL>(type, predicate);
|
284
|
+
case PhysicalType::LIST:
|
285
|
+
return GetListMatchFunction<NO_MATCH_SEL>(predicate);
|
286
|
+
default:
|
287
|
+
throw InternalException("Unsupported PhysicalType for RowMatcher::GetMatchFunction: %s",
|
288
|
+
EnumUtil::ToString(type.InternalType()));
|
289
|
+
}
|
290
|
+
}
|
291
|
+
|
292
|
+
template <bool NO_MATCH_SEL, class T>
|
293
|
+
MatchFunction RowMatcher::GetMatchFunction(const ExpressionType predicate) {
|
294
|
+
MatchFunction result;
|
295
|
+
switch (predicate) {
|
296
|
+
case ExpressionType::COMPARE_EQUAL:
|
297
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, Equals>;
|
298
|
+
break;
|
299
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
300
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, NotEquals>;
|
301
|
+
break;
|
302
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
303
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, DistinctFrom>;
|
304
|
+
break;
|
305
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
306
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, NotDistinctFrom>;
|
307
|
+
break;
|
308
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
309
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThan>;
|
310
|
+
break;
|
311
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
312
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThanEquals>;
|
313
|
+
break;
|
314
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
315
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThan>;
|
316
|
+
break;
|
317
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
318
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThanEquals>;
|
319
|
+
break;
|
320
|
+
default:
|
321
|
+
throw InternalException("Unsupported ExpressionType for RowMatcher::GetMatchFunction: %s",
|
322
|
+
EnumUtil::ToString(predicate));
|
323
|
+
}
|
324
|
+
return result;
|
325
|
+
}
|
326
|
+
|
327
|
+
template <bool NO_MATCH_SEL>
|
328
|
+
MatchFunction RowMatcher::GetStructMatchFunction(const LogicalType &type, const ExpressionType predicate) {
|
329
|
+
// We perform equality conditions like it's just a row, but we cannot perform inequality conditions like a row,
|
330
|
+
// because for equality conditions we need to always loop through all columns, but for inequality conditions,
|
331
|
+
// we need to find the first inequality, so the loop looks very different
|
332
|
+
MatchFunction result;
|
333
|
+
ExpressionType child_predicate = predicate;
|
334
|
+
switch (predicate) {
|
335
|
+
case ExpressionType::COMPARE_EQUAL:
|
336
|
+
result.function = StructMatchEquality<NO_MATCH_SEL, Equals>;
|
337
|
+
child_predicate = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
|
338
|
+
break;
|
339
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
340
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
|
341
|
+
return result;
|
342
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
343
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
|
344
|
+
return result;
|
345
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
346
|
+
result.function = StructMatchEquality<NO_MATCH_SEL, NotDistinctFrom>;
|
347
|
+
break;
|
348
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
349
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
|
350
|
+
return result;
|
351
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
352
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
|
353
|
+
return result;
|
354
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
355
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
|
356
|
+
return result;
|
357
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
358
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
|
359
|
+
return result;
|
360
|
+
default:
|
361
|
+
throw InternalException("Unsupported ExpressionType for RowMatcher::GetStructMatchFunction: %s",
|
362
|
+
EnumUtil::ToString(predicate));
|
363
|
+
}
|
364
|
+
|
365
|
+
result.child_functions.reserve(StructType::GetChildCount(type));
|
366
|
+
for (const auto &child_type : StructType::GetChildTypes(type)) {
|
367
|
+
result.child_functions.push_back(GetMatchFunction<NO_MATCH_SEL>(child_type.second, child_predicate));
|
368
|
+
}
|
369
|
+
|
370
|
+
return result;
|
371
|
+
}
|
372
|
+
|
373
|
+
template <bool NO_MATCH_SEL>
|
374
|
+
MatchFunction RowMatcher::GetListMatchFunction(const ExpressionType predicate) {
|
375
|
+
MatchFunction result;
|
376
|
+
switch (predicate) {
|
377
|
+
case ExpressionType::COMPARE_EQUAL:
|
378
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, Equals>;
|
379
|
+
break;
|
380
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
381
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
|
382
|
+
break;
|
383
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
384
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
|
385
|
+
break;
|
386
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
387
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, NotDistinctFrom>;
|
388
|
+
break;
|
389
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
390
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
|
391
|
+
break;
|
392
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
393
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
|
394
|
+
break;
|
395
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
396
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
|
397
|
+
break;
|
398
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
399
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
|
400
|
+
break;
|
401
|
+
default:
|
402
|
+
throw InternalException("Unsupported ExpressionType for RowMatcher::GetListMatchFunction: %s",
|
403
|
+
EnumUtil::ToString(predicate));
|
404
|
+
}
|
405
|
+
return result;
|
406
|
+
}
|
407
|
+
|
408
|
+
} // namespace duckdb
|
@@ -294,7 +294,7 @@ static inline void VerifyStrings(const LogicalTypeId type_id, const data_ptr_t r
|
|
294
294
|
for (idx_t i = 0; i < count; i++) {
|
295
295
|
const auto &row_location = row_locations[offset + i] + base_col_offset;
|
296
296
|
ValidityBytes row_mask(row_location);
|
297
|
-
if (row_mask.RowIsValid(row_mask.
|
297
|
+
if (row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
298
298
|
auto recomputed_string = Load<string_t>(row_location + col_offset);
|
299
299
|
recomputed_string.Verify();
|
300
300
|
}
|
@@ -328,7 +328,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
|
|
328
328
|
const auto idx = offset + i;
|
329
329
|
const auto &row_location = row_locations[idx] + base_col_offset;
|
330
330
|
ValidityBytes row_mask(row_location);
|
331
|
-
if (!row_mask.RowIsValid(row_mask.
|
331
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
332
332
|
continue;
|
333
333
|
}
|
334
334
|
|
@@ -352,7 +352,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
|
|
352
352
|
const auto idx = offset + i;
|
353
353
|
const auto &row_location = row_locations[idx] + base_col_offset;
|
354
354
|
ValidityBytes row_mask(row_location);
|
355
|
-
if (!row_mask.RowIsValid(row_mask.
|
355
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
356
356
|
continue;
|
357
357
|
}
|
358
358
|
|
@@ -37,13 +37,17 @@ void TupleDataCollection::Initialize() {
|
|
37
37
|
}
|
38
38
|
}
|
39
39
|
|
40
|
-
void
|
41
|
-
column_ids.reserve(
|
42
|
-
for (idx_t col_idx = 0; col_idx <
|
40
|
+
void GetAllColumnIDsInternal(vector<column_t> &column_ids, const idx_t column_count) {
|
41
|
+
column_ids.reserve(column_count);
|
42
|
+
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
43
43
|
column_ids.emplace_back(col_idx);
|
44
44
|
}
|
45
45
|
}
|
46
46
|
|
47
|
+
void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
|
48
|
+
GetAllColumnIDsInternal(column_ids, layout.ColumnCount());
|
49
|
+
}
|
50
|
+
|
47
51
|
const TupleDataLayout &TupleDataCollection::GetLayout() const {
|
48
52
|
return layout;
|
49
53
|
}
|
@@ -108,7 +112,7 @@ void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, v
|
|
108
112
|
TupleDataPinProperties properties) {
|
109
113
|
VerifyAppendColumns(layout, column_ids);
|
110
114
|
InitializeAppend(append_state.pin_state, properties);
|
111
|
-
|
115
|
+
InitializeChunkState(append_state.chunk_state, std::move(column_ids));
|
112
116
|
}
|
113
117
|
|
114
118
|
void TupleDataCollection::InitializeAppend(TupleDataPinState &pin_state, TupleDataPinProperties properties) {
|
@@ -130,11 +134,11 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
|
|
130
134
|
for (const auto &child_entry : child_list) {
|
131
135
|
child_types.emplace_back(child_entry.second);
|
132
136
|
}
|
133
|
-
InitializeVectorFormat(vector_data[col_idx].
|
137
|
+
InitializeVectorFormat(vector_data[col_idx].children, child_types);
|
134
138
|
break;
|
135
139
|
}
|
136
140
|
case PhysicalType::LIST:
|
137
|
-
InitializeVectorFormat(vector_data[col_idx].
|
141
|
+
InitializeVectorFormat(vector_data[col_idx].children, {ListType::GetChildType(type)});
|
138
142
|
break;
|
139
143
|
default:
|
140
144
|
break;
|
@@ -142,11 +146,16 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
|
|
142
146
|
}
|
143
147
|
}
|
144
148
|
|
145
|
-
void TupleDataCollection::
|
149
|
+
void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
|
150
|
+
TupleDataCollection::InitializeChunkState(chunk_state, layout.GetTypes(), std::move(column_ids));
|
151
|
+
}
|
152
|
+
|
153
|
+
void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, const vector<LogicalType> &types,
|
154
|
+
vector<column_t> column_ids) {
|
146
155
|
if (column_ids.empty()) {
|
147
|
-
|
156
|
+
GetAllColumnIDsInternal(column_ids, types.size());
|
148
157
|
}
|
149
|
-
InitializeVectorFormat(chunk_state.vector_data,
|
158
|
+
InitializeVectorFormat(chunk_state.vector_data, types);
|
150
159
|
chunk_state.column_ids = std::move(column_ids);
|
151
160
|
}
|
152
161
|
|
@@ -211,21 +220,23 @@ void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataC
|
|
211
220
|
}
|
212
221
|
|
213
222
|
static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
|
214
|
-
vector.ToUnifiedFormat(count, format.
|
215
|
-
format.original_sel = format.
|
216
|
-
format.original_owned_sel.Initialize(format.
|
223
|
+
vector.ToUnifiedFormat(count, format.unified);
|
224
|
+
format.original_sel = format.unified.sel;
|
225
|
+
format.original_owned_sel.Initialize(format.unified.owned_sel);
|
217
226
|
switch (vector.GetType().InternalType()) {
|
218
227
|
case PhysicalType::STRUCT: {
|
219
228
|
auto &entries = StructVector::GetEntries(vector);
|
220
|
-
D_ASSERT(format.
|
229
|
+
D_ASSERT(format.children.size() == entries.size());
|
221
230
|
for (idx_t struct_col_idx = 0; struct_col_idx < entries.size(); struct_col_idx++) {
|
222
|
-
ToUnifiedFormatInternal(format.
|
231
|
+
ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[struct_col_idx]),
|
232
|
+
*entries[struct_col_idx], count);
|
223
233
|
}
|
224
234
|
break;
|
225
235
|
}
|
226
236
|
case PhysicalType::LIST:
|
227
|
-
D_ASSERT(format.
|
228
|
-
ToUnifiedFormatInternal(format.
|
237
|
+
D_ASSERT(format.children.size() == 1);
|
238
|
+
ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[0]),
|
239
|
+
ListVector::GetEntry(vector), ListVector::GetListSize(vector));
|
229
240
|
break;
|
230
241
|
default:
|
231
242
|
break;
|
@@ -242,7 +253,7 @@ void TupleDataCollection::ToUnifiedFormat(TupleDataChunkState &chunk_state, Data
|
|
242
253
|
void TupleDataCollection::GetVectorData(const TupleDataChunkState &chunk_state, UnifiedVectorFormat result[]) {
|
243
254
|
const auto &vector_data = chunk_state.vector_data;
|
244
255
|
for (idx_t i = 0; i < vector_data.size(); i++) {
|
245
|
-
const auto &source = vector_data[i].
|
256
|
+
const auto &source = vector_data[i].unified;
|
246
257
|
auto &target = result[i];
|
247
258
|
target.sel = source.sel;
|
248
259
|
target.data = source.data;
|