duckdb 0.8.2-dev4711.0 → 0.8.2-dev5002.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +0 -1
- package/binding.gyp.in +0 -1
- package/package.json +1 -1
- package/src/connection.cpp +10 -23
- package/src/data_chunk.cpp +1 -3
- package/src/database.cpp +4 -9
- package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +4 -6
- package/src/duckdb/src/common/enum_util.cpp +10 -5
- package/src/duckdb/src/common/operator/cast_operators.cpp +18 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +375 -0
- package/src/duckdb/src/common/types/data_chunk.cpp +48 -11
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
- package/src/duckdb/src/common/types/vector.cpp +0 -1
- package/src/duckdb/src/common/types.cpp +1 -1
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
- package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +5 -1
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +10 -1
- package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +0 -2
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
- package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp +20 -27
- package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +21 -9
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
- package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
- package/src/duckdb/src/function/cast/time_casts.cpp +12 -0
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
- package/src/duckdb/src/function/function_binder.cpp +10 -9
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +3 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +38 -2
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +5 -5
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +7 -7
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +12 -12
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +12 -12
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +33 -33
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +13 -13
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -1
- package/src/duckdb/src/main/config.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +10 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +28 -6
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +3 -0
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +14 -6
- package/src/duckdb/src/planner/operator/logical_create_table.cpp +3 -3
- package/src/duckdb/src/planner/operator/logical_delete.cpp +3 -2
- package/src/duckdb/src/planner/operator/logical_insert.cpp +3 -2
- package/src/duckdb/src/planner/operator/logical_update.cpp +3 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
- package/src/duckdb/src/storage/data_table.cpp +18 -8
- package/src/duckdb/src/storage/local_storage.cpp +2 -3
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +64 -80
- package/src/duckdb/src/storage/storage_manager.cpp +6 -2
- package/src/duckdb/src/storage/table/row_group.cpp +6 -0
- package/src/duckdb/src/storage/table/row_group_collection.cpp +4 -3
- package/src/duckdb/src/storage/table/struct_column_data.cpp +2 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +1 -0
- package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
- package/src/statement.cpp +2 -4
- package/test/database_fail.test.ts +6 -0
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -0,0 +1,375 @@
|
|
1
|
+
#include "duckdb/common/row_operations/row_matcher.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/enum_util.hpp"
|
4
|
+
#include "duckdb/common/exception.hpp"
|
5
|
+
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
using ValidityBytes = TupleDataLayout::ValidityBytes;
|
10
|
+
|
11
|
+
template <bool NO_MATCH_SEL, class T, class OP>
|
12
|
+
static idx_t TemplatedMatch(Vector &, const TupleDataVectorFormat &lhs_format, SelectionVector &sel, const idx_t count,
|
13
|
+
const TupleDataLayout &rhs_layout, Vector &rhs_row_locations, const idx_t col_idx,
|
14
|
+
const vector<MatchFunction> &, SelectionVector *no_match_sel, idx_t &no_match_count) {
|
15
|
+
using COMPARISON_OP = ComparisonOperationWrapper<OP>;
|
16
|
+
|
17
|
+
// LHS
|
18
|
+
const auto &lhs_sel = *lhs_format.unified.sel;
|
19
|
+
const auto lhs_data = UnifiedVectorFormat::GetData<T>(lhs_format.unified);
|
20
|
+
const auto &lhs_validity = lhs_format.unified.validity;
|
21
|
+
|
22
|
+
// RHS
|
23
|
+
const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
|
24
|
+
const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
|
25
|
+
idx_t entry_idx;
|
26
|
+
idx_t idx_in_entry;
|
27
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
28
|
+
|
29
|
+
idx_t match_count = 0;
|
30
|
+
for (idx_t i = 0; i < count; i++) {
|
31
|
+
const auto idx = sel.get_index(i);
|
32
|
+
|
33
|
+
const auto lhs_idx = lhs_sel.get_index(idx);
|
34
|
+
const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
|
35
|
+
|
36
|
+
const auto &rhs_location = rhs_locations[idx];
|
37
|
+
const ValidityBytes rhs_mask(rhs_location);
|
38
|
+
const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
|
39
|
+
|
40
|
+
if (COMPARISON_OP::template Operation<T>(lhs_data[lhs_idx], Load<T>(rhs_location + rhs_offset_in_row), lhs_null,
|
41
|
+
rhs_null)) {
|
42
|
+
sel.set_index(match_count++, idx);
|
43
|
+
} else if (NO_MATCH_SEL) {
|
44
|
+
no_match_sel->set_index(no_match_count++, idx);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
return match_count;
|
48
|
+
}
|
49
|
+
|
50
|
+
template <bool NO_MATCH_SEL, class OP>
|
51
|
+
static idx_t StructMatchEquality(Vector &lhs_vector, const TupleDataVectorFormat &lhs_format, SelectionVector &sel,
|
52
|
+
const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
|
53
|
+
const idx_t col_idx, const vector<MatchFunction> &child_functions,
|
54
|
+
SelectionVector *no_match_sel, idx_t &no_match_count) {
|
55
|
+
using COMPARISON_OP = ComparisonOperationWrapper<OP>;
|
56
|
+
|
57
|
+
// LHS
|
58
|
+
const auto &lhs_sel = *lhs_format.unified.sel;
|
59
|
+
const auto &lhs_validity = lhs_format.unified.validity;
|
60
|
+
|
61
|
+
// RHS
|
62
|
+
const auto rhs_locations = FlatVector::GetData<data_ptr_t>(rhs_row_locations);
|
63
|
+
idx_t entry_idx;
|
64
|
+
idx_t idx_in_entry;
|
65
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
66
|
+
|
67
|
+
idx_t match_count = 0;
|
68
|
+
for (idx_t i = 0; i < count; i++) {
|
69
|
+
const auto idx = sel.get_index(i);
|
70
|
+
|
71
|
+
const auto lhs_idx = lhs_sel.get_index(idx);
|
72
|
+
const auto lhs_null = lhs_validity.AllValid() ? false : !lhs_validity.RowIsValid(lhs_idx);
|
73
|
+
|
74
|
+
const auto &rhs_location = rhs_locations[idx];
|
75
|
+
const ValidityBytes rhs_mask(rhs_location);
|
76
|
+
const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
|
77
|
+
|
78
|
+
// For structs there is no value to compare, here we match NULLs and let recursion do the rest
|
79
|
+
// So we use the comparison only if rhs or LHS is NULL and COMPARE_NULL is true
|
80
|
+
if (!(lhs_null || rhs_null) ||
|
81
|
+
(COMPARISON_OP::COMPARE_NULL && COMPARISON_OP::template Operation<uint32_t>(0, 0, lhs_null, rhs_null))) {
|
82
|
+
sel.set_index(match_count++, idx);
|
83
|
+
} else if (NO_MATCH_SEL) {
|
84
|
+
no_match_sel->set_index(no_match_count++, idx);
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
// Create a Vector of pointers to the start of the TupleDataLayout of the STRUCT
|
89
|
+
Vector rhs_struct_row_locations(LogicalType::POINTER);
|
90
|
+
const auto rhs_offset_in_row = rhs_layout.GetOffsets()[col_idx];
|
91
|
+
auto rhs_struct_locations = FlatVector::GetData<data_ptr_t>(rhs_struct_row_locations);
|
92
|
+
for (idx_t i = 0; i < match_count; i++) {
|
93
|
+
const auto idx = sel.get_index(i);
|
94
|
+
rhs_struct_locations[idx] = rhs_locations[idx] + rhs_offset_in_row;
|
95
|
+
}
|
96
|
+
|
97
|
+
// Get the struct layout and struct entries
|
98
|
+
const auto &rhs_struct_layout = rhs_layout.GetStructLayout(col_idx);
|
99
|
+
auto &lhs_struct_vectors = StructVector::GetEntries(lhs_vector);
|
100
|
+
D_ASSERT(rhs_struct_layout.ColumnCount() == lhs_struct_vectors.size());
|
101
|
+
|
102
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < rhs_struct_layout.ColumnCount(); struct_col_idx++) {
|
103
|
+
auto &lhs_struct_vector = *lhs_struct_vectors[struct_col_idx];
|
104
|
+
auto &lhs_struct_format = lhs_format.children[struct_col_idx];
|
105
|
+
const auto &child_function = child_functions[struct_col_idx];
|
106
|
+
match_count = child_function.function(lhs_struct_vector, lhs_struct_format, sel, match_count, rhs_struct_layout,
|
107
|
+
rhs_struct_row_locations, struct_col_idx, child_function.child_functions,
|
108
|
+
no_match_sel, no_match_count);
|
109
|
+
}
|
110
|
+
|
111
|
+
return match_count;
|
112
|
+
}
|
113
|
+
|
114
|
+
template <typename OP>
|
115
|
+
static idx_t SelectComparison(Vector &, Vector &, const SelectionVector &, idx_t, SelectionVector *,
|
116
|
+
SelectionVector *) {
|
117
|
+
throw NotImplementedException("Unsupported list comparison operand for RowMatcher::GetMatchFunction");
|
118
|
+
}
|
119
|
+
|
120
|
+
template <>
|
121
|
+
idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
122
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
123
|
+
return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel);
|
124
|
+
}
|
125
|
+
|
126
|
+
template <>
|
127
|
+
idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
128
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
129
|
+
return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel);
|
130
|
+
}
|
131
|
+
|
132
|
+
template <>
|
133
|
+
idx_t SelectComparison<DistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
134
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
135
|
+
return VectorOperations::DistinctFrom(left, right, &sel, count, true_sel, false_sel);
|
136
|
+
}
|
137
|
+
|
138
|
+
template <>
|
139
|
+
idx_t SelectComparison<NotDistinctFrom>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
140
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
141
|
+
return VectorOperations::NotDistinctFrom(left, right, &sel, count, true_sel, false_sel);
|
142
|
+
}
|
143
|
+
|
144
|
+
template <>
|
145
|
+
idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
146
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
147
|
+
return VectorOperations::DistinctGreaterThan(left, right, &sel, count, true_sel, false_sel);
|
148
|
+
}
|
149
|
+
|
150
|
+
template <>
|
151
|
+
idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
152
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
153
|
+
return VectorOperations::DistinctGreaterThanEquals(left, right, &sel, count, true_sel, false_sel);
|
154
|
+
}
|
155
|
+
|
156
|
+
template <>
|
157
|
+
idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
158
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
159
|
+
return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
|
160
|
+
}
|
161
|
+
|
162
|
+
template <>
|
163
|
+
idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
164
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
165
|
+
return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
|
166
|
+
}
|
167
|
+
|
168
|
+
template <bool NO_MATCH_SEL, class OP>
|
169
|
+
static idx_t GenericNestedMatch(Vector &lhs_vector, const TupleDataVectorFormat &, SelectionVector &sel,
|
170
|
+
const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
|
171
|
+
const idx_t col_idx, const vector<MatchFunction> &, SelectionVector *no_match_sel,
|
172
|
+
idx_t &no_match_count) {
|
173
|
+
const auto &type = rhs_layout.GetTypes()[col_idx];
|
174
|
+
|
175
|
+
// Gather a dense Vector containing the column values being matched
|
176
|
+
Vector key(type);
|
177
|
+
const auto gather_function = TupleDataCollection::GetGatherFunction(type);
|
178
|
+
gather_function.function(rhs_layout, rhs_row_locations, col_idx, sel, count, key,
|
179
|
+
*FlatVector::IncrementalSelectionVector(), key, gather_function.child_functions);
|
180
|
+
|
181
|
+
// Densify the input column
|
182
|
+
Vector sliced(lhs_vector, sel, count);
|
183
|
+
|
184
|
+
if (NO_MATCH_SEL) {
|
185
|
+
SelectionVector no_match_sel_offset(no_match_sel->data() + no_match_count);
|
186
|
+
auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset);
|
187
|
+
no_match_count += count - match_count;
|
188
|
+
return match_count;
|
189
|
+
}
|
190
|
+
return SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr);
|
191
|
+
}
|
192
|
+
|
193
|
+
void RowMatcher::Initialize(const bool no_match_sel, const TupleDataLayout &layout, const Predicates &predicates) {
|
194
|
+
match_functions.reserve(predicates.size());
|
195
|
+
for (idx_t col_idx = 0; col_idx < predicates.size(); col_idx++) {
|
196
|
+
match_functions.push_back(GetMatchFunction(no_match_sel, layout.GetTypes()[col_idx], predicates[col_idx]));
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
idx_t RowMatcher::Match(DataChunk &lhs, const vector<TupleDataVectorFormat> &lhs_formats, SelectionVector &sel,
|
201
|
+
idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
|
202
|
+
SelectionVector *no_match_sel, idx_t &no_match_count) {
|
203
|
+
D_ASSERT(!match_functions.empty());
|
204
|
+
for (idx_t col_idx = 0; col_idx < match_functions.size(); col_idx++) {
|
205
|
+
const auto &match_function = match_functions[col_idx];
|
206
|
+
count =
|
207
|
+
match_function.function(lhs.data[col_idx], lhs_formats[col_idx], sel, count, rhs_layout, rhs_row_locations,
|
208
|
+
col_idx, match_function.child_functions, no_match_sel, no_match_count);
|
209
|
+
}
|
210
|
+
return count;
|
211
|
+
}
|
212
|
+
|
213
|
+
MatchFunction RowMatcher::GetMatchFunction(const bool no_match_sel, const LogicalType &type,
|
214
|
+
const ExpressionType predicate) {
|
215
|
+
return no_match_sel ? GetMatchFunction<true>(type, predicate) : GetMatchFunction<false>(type, predicate);
|
216
|
+
}
|
217
|
+
|
218
|
+
template <bool NO_MATCH_SEL>
|
219
|
+
MatchFunction RowMatcher::GetMatchFunction(const LogicalType &type, const ExpressionType predicate) {
|
220
|
+
switch (type.InternalType()) {
|
221
|
+
case PhysicalType::BOOL:
|
222
|
+
return GetMatchFunction<NO_MATCH_SEL, bool>(predicate);
|
223
|
+
case PhysicalType::INT8:
|
224
|
+
return GetMatchFunction<NO_MATCH_SEL, int8_t>(predicate);
|
225
|
+
case PhysicalType::INT16:
|
226
|
+
return GetMatchFunction<NO_MATCH_SEL, int16_t>(predicate);
|
227
|
+
case PhysicalType::INT32:
|
228
|
+
return GetMatchFunction<NO_MATCH_SEL, int32_t>(predicate);
|
229
|
+
case PhysicalType::INT64:
|
230
|
+
return GetMatchFunction<NO_MATCH_SEL, int64_t>(predicate);
|
231
|
+
case PhysicalType::INT128:
|
232
|
+
return GetMatchFunction<NO_MATCH_SEL, hugeint_t>(predicate);
|
233
|
+
case PhysicalType::UINT8:
|
234
|
+
return GetMatchFunction<NO_MATCH_SEL, uint8_t>(predicate);
|
235
|
+
case PhysicalType::UINT16:
|
236
|
+
return GetMatchFunction<NO_MATCH_SEL, uint16_t>(predicate);
|
237
|
+
case PhysicalType::UINT32:
|
238
|
+
return GetMatchFunction<NO_MATCH_SEL, uint32_t>(predicate);
|
239
|
+
case PhysicalType::UINT64:
|
240
|
+
return GetMatchFunction<NO_MATCH_SEL, uint64_t>(predicate);
|
241
|
+
case PhysicalType::FLOAT:
|
242
|
+
return GetMatchFunction<NO_MATCH_SEL, float>(predicate);
|
243
|
+
case PhysicalType::DOUBLE:
|
244
|
+
return GetMatchFunction<NO_MATCH_SEL, double>(predicate);
|
245
|
+
case PhysicalType::INTERVAL:
|
246
|
+
return GetMatchFunction<NO_MATCH_SEL, interval_t>(predicate);
|
247
|
+
case PhysicalType::VARCHAR:
|
248
|
+
return GetMatchFunction<NO_MATCH_SEL, string_t>(predicate);
|
249
|
+
case PhysicalType::STRUCT:
|
250
|
+
return GetStructMatchFunction<NO_MATCH_SEL>(type, predicate);
|
251
|
+
case PhysicalType::LIST:
|
252
|
+
return GetListMatchFunction<NO_MATCH_SEL>(predicate);
|
253
|
+
default:
|
254
|
+
throw InternalException("Unsupported PhysicalType for RowMatcher::GetMatchFunction: %s",
|
255
|
+
EnumUtil::ToString(type.InternalType()));
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
template <bool NO_MATCH_SEL, class T>
|
260
|
+
MatchFunction RowMatcher::GetMatchFunction(const ExpressionType predicate) {
|
261
|
+
MatchFunction result;
|
262
|
+
switch (predicate) {
|
263
|
+
case ExpressionType::COMPARE_EQUAL:
|
264
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, Equals>;
|
265
|
+
break;
|
266
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
267
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, NotEquals>;
|
268
|
+
break;
|
269
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
270
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, DistinctFrom>;
|
271
|
+
break;
|
272
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
273
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, NotDistinctFrom>;
|
274
|
+
break;
|
275
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
276
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThan>;
|
277
|
+
break;
|
278
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
279
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, GreaterThanEquals>;
|
280
|
+
break;
|
281
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
282
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThan>;
|
283
|
+
break;
|
284
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
285
|
+
result.function = TemplatedMatch<NO_MATCH_SEL, T, LessThanEquals>;
|
286
|
+
break;
|
287
|
+
default:
|
288
|
+
throw InternalException("Unsupported ExpressionType for RowMatcher::GetMatchFunction: %s",
|
289
|
+
EnumUtil::ToString(predicate));
|
290
|
+
}
|
291
|
+
return result;
|
292
|
+
}
|
293
|
+
|
294
|
+
template <bool NO_MATCH_SEL>
|
295
|
+
MatchFunction RowMatcher::GetStructMatchFunction(const LogicalType &type, const ExpressionType predicate) {
|
296
|
+
// We perform equality conditions like it's just a row, but we cannot perform inequality conditions like a row,
|
297
|
+
// because for equality conditions we need to always loop through all columns, but for inequality conditions,
|
298
|
+
// we need to find the first inequality, so the loop looks very different
|
299
|
+
MatchFunction result;
|
300
|
+
ExpressionType child_predicate = predicate;
|
301
|
+
switch (predicate) {
|
302
|
+
case ExpressionType::COMPARE_EQUAL:
|
303
|
+
result.function = StructMatchEquality<NO_MATCH_SEL, Equals>;
|
304
|
+
child_predicate = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
|
305
|
+
break;
|
306
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
307
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
|
308
|
+
return result;
|
309
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
310
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
|
311
|
+
return result;
|
312
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
313
|
+
result.function = StructMatchEquality<NO_MATCH_SEL, NotDistinctFrom>;
|
314
|
+
break;
|
315
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
316
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
|
317
|
+
return result;
|
318
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
319
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
|
320
|
+
return result;
|
321
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
322
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
|
323
|
+
return result;
|
324
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
325
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
|
326
|
+
return result;
|
327
|
+
default:
|
328
|
+
throw InternalException("Unsupported ExpressionType for RowMatcher::GetStructMatchFunction: %s",
|
329
|
+
EnumUtil::ToString(predicate));
|
330
|
+
}
|
331
|
+
|
332
|
+
result.child_functions.reserve(StructType::GetChildCount(type));
|
333
|
+
for (const auto &child_type : StructType::GetChildTypes(type)) {
|
334
|
+
result.child_functions.push_back(GetMatchFunction<NO_MATCH_SEL>(child_type.second, child_predicate));
|
335
|
+
}
|
336
|
+
|
337
|
+
return result;
|
338
|
+
}
|
339
|
+
|
340
|
+
template <bool NO_MATCH_SEL>
|
341
|
+
MatchFunction RowMatcher::GetListMatchFunction(const ExpressionType predicate) {
|
342
|
+
MatchFunction result;
|
343
|
+
switch (predicate) {
|
344
|
+
case ExpressionType::COMPARE_EQUAL:
|
345
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, Equals>;
|
346
|
+
break;
|
347
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
348
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, NotEquals>;
|
349
|
+
break;
|
350
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
351
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, DistinctFrom>;
|
352
|
+
break;
|
353
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
354
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, NotDistinctFrom>;
|
355
|
+
break;
|
356
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
357
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThan>;
|
358
|
+
break;
|
359
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
360
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, GreaterThanEquals>;
|
361
|
+
break;
|
362
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
363
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThan>;
|
364
|
+
break;
|
365
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
366
|
+
result.function = GenericNestedMatch<NO_MATCH_SEL, LessThanEquals>;
|
367
|
+
break;
|
368
|
+
default:
|
369
|
+
throw InternalException("Unsupported ExpressionType for RowMatcher::GetListMatchFunction: %s",
|
370
|
+
EnumUtil::ToString(predicate));
|
371
|
+
}
|
372
|
+
return result;
|
373
|
+
}
|
374
|
+
|
375
|
+
} // namespace duckdb
|
@@ -13,6 +13,10 @@
|
|
13
13
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
14
14
|
#include "duckdb/execution/execution_context.hpp"
|
15
15
|
|
16
|
+
#include "duckdb/common/serializer/memory_stream.hpp"
|
17
|
+
#include "duckdb/common/serializer/binary_serializer.hpp"
|
18
|
+
#include "duckdb/common/serializer/binary_deserializer.hpp"
|
19
|
+
|
16
20
|
namespace duckdb {
|
17
21
|
|
18
22
|
DataChunk::DataChunk() : count(0), capacity(STANDARD_VECTOR_SIZE) {
|
@@ -231,16 +235,20 @@ string DataChunk::ToString() const {
|
|
231
235
|
}
|
232
236
|
|
233
237
|
void DataChunk::Serialize(Serializer &serializer) const {
|
238
|
+
|
234
239
|
// write the count
|
235
240
|
auto row_count = size();
|
236
241
|
serializer.WriteProperty<sel_t>(100, "rows", row_count);
|
242
|
+
|
243
|
+
// we should never try to serialize empty data chunks
|
237
244
|
auto column_count = ColumnCount();
|
245
|
+
D_ASSERT(column_count);
|
238
246
|
|
239
|
-
//
|
247
|
+
// write the types
|
240
248
|
serializer.WriteList(101, "types", column_count,
|
241
249
|
[&](Serializer::List &list, idx_t i) { list.WriteElement(data[i].GetType()); });
|
242
250
|
|
243
|
-
//
|
251
|
+
// write the data
|
244
252
|
serializer.WriteList(102, "columns", column_count, [&](Serializer::List &list, idx_t i) {
|
245
253
|
list.WriteObject([&](Serializer &object) {
|
246
254
|
// Reference the vector to avoid potentially mutating it during serialization
|
@@ -252,21 +260,23 @@ void DataChunk::Serialize(Serializer &serializer) const {
|
|
252
260
|
}
|
253
261
|
|
254
262
|
void DataChunk::Deserialize(Deserializer &deserializer) {
|
255
|
-
|
263
|
+
|
264
|
+
// read and set the row count
|
256
265
|
auto row_count = deserializer.ReadProperty<sel_t>(100, "rows");
|
266
|
+
SetCardinality(row_count);
|
257
267
|
|
258
|
-
//
|
268
|
+
// read the types
|
259
269
|
vector<LogicalType> types;
|
260
270
|
deserializer.ReadList(101, "types", [&](Deserializer::List &list, idx_t i) {
|
261
271
|
auto type = list.ReadElement<LogicalType>();
|
262
272
|
types.push_back(type);
|
263
273
|
});
|
264
|
-
Initialize(Allocator::DefaultAllocator(), types);
|
265
274
|
|
266
|
-
//
|
267
|
-
|
275
|
+
// initialize the data chunk
|
276
|
+
D_ASSERT(!types.empty());
|
277
|
+
Initialize(Allocator::DefaultAllocator(), types);
|
268
278
|
|
269
|
-
//
|
279
|
+
// read the data
|
270
280
|
deserializer.ReadList(102, "columns", [&](Deserializer::List &list, idx_t i) {
|
271
281
|
list.ReadObject([&](Deserializer &object) { data[i].Deserialize(object, row_count); });
|
272
282
|
});
|
@@ -296,11 +306,11 @@ void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count_
|
|
296
306
|
}
|
297
307
|
|
298
308
|
unsafe_unique_array<UnifiedVectorFormat> DataChunk::ToUnifiedFormat() {
|
299
|
-
auto
|
309
|
+
auto unified_data = make_unsafe_uniq_array<UnifiedVectorFormat>(ColumnCount());
|
300
310
|
for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) {
|
301
|
-
data[col_idx].ToUnifiedFormat(size(),
|
311
|
+
data[col_idx].ToUnifiedFormat(size(), unified_data[col_idx]);
|
302
312
|
}
|
303
|
-
return
|
313
|
+
return unified_data;
|
304
314
|
}
|
305
315
|
|
306
316
|
void DataChunk::Hash(Vector &result) {
|
@@ -324,10 +334,37 @@ void DataChunk::Hash(vector<idx_t> &column_ids, Vector &result) {
|
|
324
334
|
void DataChunk::Verify() {
|
325
335
|
#ifdef DEBUG
|
326
336
|
D_ASSERT(size() <= capacity);
|
337
|
+
|
327
338
|
// verify that all vectors in this chunk have the chunk selection vector
|
328
339
|
for (idx_t i = 0; i < ColumnCount(); i++) {
|
329
340
|
data[i].Verify(size());
|
330
341
|
}
|
342
|
+
|
343
|
+
if (!ColumnCount()) {
|
344
|
+
// don't try to round-trip dummy data chunks with no data
|
345
|
+
// e.g., these exist in queries like 'SELECT distinct(col0, col1) FROM tbl', where we have groups, but no
|
346
|
+
// payload so the payload will be such an empty data chunk
|
347
|
+
return;
|
348
|
+
}
|
349
|
+
|
350
|
+
// verify that we can round-trip chunk serialization
|
351
|
+
MemoryStream mem_stream;
|
352
|
+
BinarySerializer serializer(mem_stream);
|
353
|
+
|
354
|
+
serializer.Begin();
|
355
|
+
Serialize(serializer);
|
356
|
+
serializer.End();
|
357
|
+
|
358
|
+
mem_stream.Rewind();
|
359
|
+
|
360
|
+
BinaryDeserializer deserializer(mem_stream);
|
361
|
+
DataChunk new_chunk;
|
362
|
+
|
363
|
+
deserializer.Begin();
|
364
|
+
new_chunk.Deserialize(deserializer);
|
365
|
+
deserializer.End();
|
366
|
+
|
367
|
+
D_ASSERT(size() == new_chunk.size());
|
331
368
|
#endif
|
332
369
|
}
|
333
370
|
|
@@ -294,7 +294,7 @@ static inline void VerifyStrings(const LogicalTypeId type_id, const data_ptr_t r
|
|
294
294
|
for (idx_t i = 0; i < count; i++) {
|
295
295
|
const auto &row_location = row_locations[offset + i] + base_col_offset;
|
296
296
|
ValidityBytes row_mask(row_location);
|
297
|
-
if (row_mask.RowIsValid(row_mask.
|
297
|
+
if (row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
298
298
|
auto recomputed_string = Load<string_t>(row_location + col_offset);
|
299
299
|
recomputed_string.Verify();
|
300
300
|
}
|
@@ -328,7 +328,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
|
|
328
328
|
const auto idx = offset + i;
|
329
329
|
const auto &row_location = row_locations[idx] + base_col_offset;
|
330
330
|
ValidityBytes row_mask(row_location);
|
331
|
-
if (!row_mask.RowIsValid(row_mask.
|
331
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
332
332
|
continue;
|
333
333
|
}
|
334
334
|
|
@@ -352,7 +352,7 @@ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const Sele
|
|
352
352
|
const auto idx = offset + i;
|
353
353
|
const auto &row_location = row_locations[idx] + base_col_offset;
|
354
354
|
ValidityBytes row_mask(row_location);
|
355
|
-
if (!row_mask.RowIsValid(row_mask.
|
355
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry)) {
|
356
356
|
continue;
|
357
357
|
}
|
358
358
|
|
@@ -37,13 +37,17 @@ void TupleDataCollection::Initialize() {
|
|
37
37
|
}
|
38
38
|
}
|
39
39
|
|
40
|
-
void
|
41
|
-
column_ids.reserve(
|
42
|
-
for (idx_t col_idx = 0; col_idx <
|
40
|
+
void GetAllColumnIDsInternal(vector<column_t> &column_ids, const idx_t column_count) {
|
41
|
+
column_ids.reserve(column_count);
|
42
|
+
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
43
43
|
column_ids.emplace_back(col_idx);
|
44
44
|
}
|
45
45
|
}
|
46
46
|
|
47
|
+
void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
|
48
|
+
GetAllColumnIDsInternal(column_ids, layout.ColumnCount());
|
49
|
+
}
|
50
|
+
|
47
51
|
const TupleDataLayout &TupleDataCollection::GetLayout() const {
|
48
52
|
return layout;
|
49
53
|
}
|
@@ -108,7 +112,7 @@ void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, v
|
|
108
112
|
TupleDataPinProperties properties) {
|
109
113
|
VerifyAppendColumns(layout, column_ids);
|
110
114
|
InitializeAppend(append_state.pin_state, properties);
|
111
|
-
|
115
|
+
InitializeChunkState(append_state.chunk_state, std::move(column_ids));
|
112
116
|
}
|
113
117
|
|
114
118
|
void TupleDataCollection::InitializeAppend(TupleDataPinState &pin_state, TupleDataPinProperties properties) {
|
@@ -130,11 +134,11 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
|
|
130
134
|
for (const auto &child_entry : child_list) {
|
131
135
|
child_types.emplace_back(child_entry.second);
|
132
136
|
}
|
133
|
-
InitializeVectorFormat(vector_data[col_idx].
|
137
|
+
InitializeVectorFormat(vector_data[col_idx].children, child_types);
|
134
138
|
break;
|
135
139
|
}
|
136
140
|
case PhysicalType::LIST:
|
137
|
-
InitializeVectorFormat(vector_data[col_idx].
|
141
|
+
InitializeVectorFormat(vector_data[col_idx].children, {ListType::GetChildType(type)});
|
138
142
|
break;
|
139
143
|
default:
|
140
144
|
break;
|
@@ -142,11 +146,16 @@ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, c
|
|
142
146
|
}
|
143
147
|
}
|
144
148
|
|
145
|
-
void TupleDataCollection::
|
149
|
+
void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
|
150
|
+
TupleDataCollection::InitializeChunkState(chunk_state, layout.GetTypes(), std::move(column_ids));
|
151
|
+
}
|
152
|
+
|
153
|
+
void TupleDataCollection::InitializeChunkState(TupleDataChunkState &chunk_state, const vector<LogicalType> &types,
|
154
|
+
vector<column_t> column_ids) {
|
146
155
|
if (column_ids.empty()) {
|
147
|
-
|
156
|
+
GetAllColumnIDsInternal(column_ids, types.size());
|
148
157
|
}
|
149
|
-
InitializeVectorFormat(chunk_state.vector_data,
|
158
|
+
InitializeVectorFormat(chunk_state.vector_data, types);
|
150
159
|
chunk_state.column_ids = std::move(column_ids);
|
151
160
|
}
|
152
161
|
|
@@ -211,21 +220,23 @@ void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataC
|
|
211
220
|
}
|
212
221
|
|
213
222
|
static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
|
214
|
-
vector.ToUnifiedFormat(count, format.
|
215
|
-
format.original_sel = format.
|
216
|
-
format.original_owned_sel.Initialize(format.
|
223
|
+
vector.ToUnifiedFormat(count, format.unified);
|
224
|
+
format.original_sel = format.unified.sel;
|
225
|
+
format.original_owned_sel.Initialize(format.unified.owned_sel);
|
217
226
|
switch (vector.GetType().InternalType()) {
|
218
227
|
case PhysicalType::STRUCT: {
|
219
228
|
auto &entries = StructVector::GetEntries(vector);
|
220
|
-
D_ASSERT(format.
|
229
|
+
D_ASSERT(format.children.size() == entries.size());
|
221
230
|
for (idx_t struct_col_idx = 0; struct_col_idx < entries.size(); struct_col_idx++) {
|
222
|
-
ToUnifiedFormatInternal(format.
|
231
|
+
ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[struct_col_idx]),
|
232
|
+
*entries[struct_col_idx], count);
|
223
233
|
}
|
224
234
|
break;
|
225
235
|
}
|
226
236
|
case PhysicalType::LIST:
|
227
|
-
D_ASSERT(format.
|
228
|
-
ToUnifiedFormatInternal(format.
|
237
|
+
D_ASSERT(format.children.size() == 1);
|
238
|
+
ToUnifiedFormatInternal(reinterpret_cast<TupleDataVectorFormat &>(format.children[0]),
|
239
|
+
ListVector::GetEntry(vector), ListVector::GetListSize(vector));
|
229
240
|
break;
|
230
241
|
default:
|
231
242
|
break;
|
@@ -242,7 +253,7 @@ void TupleDataCollection::ToUnifiedFormat(TupleDataChunkState &chunk_state, Data
|
|
242
253
|
void TupleDataCollection::GetVectorData(const TupleDataChunkState &chunk_state, UnifiedVectorFormat result[]) {
|
243
254
|
const auto &vector_data = chunk_state.vector_data;
|
244
255
|
for (idx_t i = 0; i < vector_data.size(); i++) {
|
245
|
-
const auto &source = vector_data[i].
|
256
|
+
const auto &source = vector_data[i].unified;
|
246
257
|
auto &target = result[i];
|
247
258
|
target.sel = source.sel;
|
248
259
|
target.data = source.data;
|