duckdb 0.8.2-dev4711.0 → 0.8.2-dev5002.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +0 -1
- package/binding.gyp.in +0 -1
- package/package.json +1 -1
- package/src/connection.cpp +10 -23
- package/src/data_chunk.cpp +1 -3
- package/src/database.cpp +4 -9
- package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +4 -6
- package/src/duckdb/src/common/enum_util.cpp +10 -5
- package/src/duckdb/src/common/operator/cast_operators.cpp +18 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +375 -0
- package/src/duckdb/src/common/types/data_chunk.cpp +48 -11
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
- package/src/duckdb/src/common/types/vector.cpp +0 -1
- package/src/duckdb/src/common/types.cpp +1 -1
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
- package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +5 -1
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +10 -1
- package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +0 -2
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
- package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp +20 -27
- package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +21 -9
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
- package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
- package/src/duckdb/src/function/cast/time_casts.cpp +12 -0
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
- package/src/duckdb/src/function/function_binder.cpp +10 -9
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +3 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +38 -2
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +5 -5
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +7 -7
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +12 -12
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +12 -12
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +33 -33
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +13 -13
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -1
- package/src/duckdb/src/main/config.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +10 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +28 -6
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +3 -0
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +14 -6
- package/src/duckdb/src/planner/operator/logical_create_table.cpp +3 -3
- package/src/duckdb/src/planner/operator/logical_delete.cpp +3 -2
- package/src/duckdb/src/planner/operator/logical_insert.cpp +3 -2
- package/src/duckdb/src/planner/operator/logical_update.cpp +3 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
- package/src/duckdb/src/storage/data_table.cpp +18 -8
- package/src/duckdb/src/storage/local_storage.cpp +2 -3
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +64 -80
- package/src/duckdb/src/storage/storage_manager.cpp +6 -2
- package/src/duckdb/src/storage/table/row_group.cpp +6 -0
- package/src/duckdb/src/storage/table/row_group_collection.cpp +4 -3
- package/src/duckdb/src/storage/table/struct_column_data.cpp +2 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +1 -0
- package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
- package/src/statement.cpp +2 -4
- package/test/database_fail.test.ts +6 -0
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -236,9 +236,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
|
|
236
236
|
static unique_ptr<FunctionData> ListSortBind(ClientContext &context, ScalarFunction &bound_function,
|
237
237
|
vector<unique_ptr<Expression>> &arguments, OrderType &order,
|
238
238
|
OrderByNullType &null_order) {
|
239
|
+
|
240
|
+
LogicalType child_type;
|
241
|
+
if (arguments[0]->return_type == LogicalTypeId::UNKNOWN) {
|
242
|
+
bound_function.arguments[0] = LogicalTypeId::UNKNOWN;
|
243
|
+
bound_function.return_type = LogicalType::SQLNULL;
|
244
|
+
child_type = bound_function.return_type;
|
245
|
+
return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
|
246
|
+
}
|
247
|
+
|
239
248
|
bound_function.arguments[0] = arguments[0]->return_type;
|
240
249
|
bound_function.return_type = arguments[0]->return_type;
|
241
|
-
|
250
|
+
child_type = ListType::GetChildType(arguments[0]->return_type);
|
242
251
|
|
243
252
|
return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
|
244
253
|
}
|
@@ -97,7 +97,6 @@ static void MapConcatFunction(DataChunk &args, ExpressionState &state, Vector &r
|
|
97
97
|
auto &values = MapVector::GetValues(map);
|
98
98
|
values_list.push_back(values.GetValue(mapping.key_index));
|
99
99
|
}
|
100
|
-
idx_t entries_count = keys_list.size();
|
101
100
|
D_ASSERT(values_list.size() == keys_list.size());
|
102
101
|
result_entry.offset = ListVector::GetListSize(result);
|
103
102
|
result_entry.length = values_list.size();
|
@@ -105,7 +104,6 @@ static void MapConcatFunction(DataChunk &args, ExpressionState &state, Vector &r
|
|
105
104
|
for (auto &list_entry : list_entries) {
|
106
105
|
ListVector::PushBack(result, list_entry);
|
107
106
|
}
|
108
|
-
ListVector::SetListSize(result, ListVector::GetListSize(result) + entries_count);
|
109
107
|
}
|
110
108
|
|
111
109
|
if (args.AllConstant()) {
|
@@ -1,10 +1,9 @@
|
|
1
|
-
#include "duckdb/core_functions/scalar/string_functions.hpp"
|
2
|
-
|
3
1
|
#include "duckdb/common/exception.hpp"
|
4
2
|
#include "duckdb/common/vector_operations/binary_executor.hpp"
|
3
|
+
#include "duckdb/core_functions/scalar/string_functions.hpp"
|
5
4
|
|
6
|
-
#include <string.h>
|
7
5
|
#include <ctype.h>
|
6
|
+
#include <string.h>
|
8
7
|
|
9
8
|
namespace duckdb {
|
10
9
|
|
@@ -33,8 +32,12 @@ static void RepeatFunction(DataChunk &args, ExpressionState &state, Vector &resu
|
|
33
32
|
});
|
34
33
|
}
|
35
34
|
|
36
|
-
|
37
|
-
|
35
|
+
ScalarFunctionSet RepeatFun::GetFunctions() {
|
36
|
+
ScalarFunctionSet repeat;
|
37
|
+
for (const auto &type : {LogicalType::VARCHAR, LogicalType::BLOB}) {
|
38
|
+
repeat.AddFunction(ScalarFunction({type, LogicalType::BIGINT}, type, RepeatFunction));
|
39
|
+
}
|
40
|
+
return repeat;
|
38
41
|
}
|
39
42
|
|
40
43
|
} // namespace duckdb
|
@@ -45,6 +45,7 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
|
|
45
45
|
// Append hash column to the end and initialise the row layout
|
46
46
|
group_types_p.emplace_back(LogicalType::HASH);
|
47
47
|
layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
|
48
|
+
|
48
49
|
hash_offset = layout.GetOffsets()[layout.ColumnCount() - 1];
|
49
50
|
|
50
51
|
// Partitioned data and pointer table
|
@@ -52,7 +53,8 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
|
|
52
53
|
Resize(initial_capacity);
|
53
54
|
|
54
55
|
// Predicates
|
55
|
-
predicates.resize(layout.ColumnCount() - 1, ExpressionType::
|
56
|
+
predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_NOT_DISTINCT_FROM);
|
57
|
+
row_matcher.Initialize(true, layout, predicates);
|
56
58
|
}
|
57
59
|
|
58
60
|
void GroupedAggregateHashTable::InitializePartitionedData() {
|
@@ -414,9 +416,8 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
|
|
414
416
|
}
|
415
417
|
|
416
418
|
// Perform group comparisons
|
417
|
-
|
418
|
-
|
419
|
-
no_match_count);
|
419
|
+
row_matcher.Match(state.group_chunk, chunk_state.vector_data, state.group_compare_vector,
|
420
|
+
need_compare_count, layout, addresses_v, &state.no_match_vector, no_match_count);
|
420
421
|
}
|
421
422
|
|
422
423
|
// Linear probing: each of the entries that do not match move to the next entry in the HT
|
@@ -173,6 +173,19 @@ bool FixedSizeAllocator::InitializeVacuum() {
|
|
173
173
|
return false;
|
174
174
|
}
|
175
175
|
|
176
|
+
// remove all empty buffers
|
177
|
+
auto buffer_it = buffers.begin();
|
178
|
+
while (buffer_it != buffers.end()) {
|
179
|
+
if (!buffer_it->second.segment_count) {
|
180
|
+
buffers_with_free_space.erase(buffer_it->first);
|
181
|
+
buffer_it->second.Destroy();
|
182
|
+
buffer_it = buffers.erase(buffer_it);
|
183
|
+
} else {
|
184
|
+
buffer_it++;
|
185
|
+
}
|
186
|
+
}
|
187
|
+
|
188
|
+
// determine if a vacuum is necessary
|
176
189
|
multimap<idx_t, idx_t> temporary_vacuum_buffers;
|
177
190
|
D_ASSERT(vacuum_buffers.empty());
|
178
191
|
idx_t available_segments_in_memory = 0;
|
@@ -19,15 +19,15 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
|
|
19
19
|
: buffer_manager(buffer_manager_p), conditions(conditions_p), build_types(std::move(btypes)), entry_size(0),
|
20
20
|
tuple_size(0), vfound(Value::BOOLEAN(false)), join_type(type_p), finalized(false), has_null(false),
|
21
21
|
external(false), radix_bits(4), partition_start(0), partition_end(0) {
|
22
|
+
|
22
23
|
for (auto &condition : conditions) {
|
23
24
|
D_ASSERT(condition.left->return_type == condition.right->return_type);
|
24
25
|
auto type = condition.left->return_type;
|
25
26
|
if (condition.comparison == ExpressionType::COMPARE_EQUAL ||
|
26
|
-
condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM
|
27
|
-
|
28
|
-
// all equality conditions
|
29
|
-
// all other conditions at the back
|
30
|
-
// this assert checks that
|
27
|
+
condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
28
|
+
|
29
|
+
// ensure that all equality conditions are at the front,
|
30
|
+
// and that all other conditions are at the back
|
31
31
|
D_ASSERT(equality_types.size() == condition_types.size());
|
32
32
|
equality_types.push_back(type);
|
33
33
|
}
|
@@ -51,6 +51,8 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
|
|
51
51
|
}
|
52
52
|
layout_types.emplace_back(LogicalType::HASH);
|
53
53
|
layout.Initialize(layout_types, false);
|
54
|
+
row_matcher.Initialize(false, layout, predicates);
|
55
|
+
row_matcher_no_match_sel.Initialize(true, layout, predicates);
|
54
56
|
|
55
57
|
const auto &offsets = layout.GetOffsets();
|
56
58
|
tuple_size = offsets[condition_types.size() + build_types.size()];
|
@@ -142,30 +144,6 @@ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector
|
|
142
144
|
return result_count;
|
143
145
|
}
|
144
146
|
|
145
|
-
idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
|
146
|
-
const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side) {
|
147
|
-
key_data = keys.ToUnifiedFormat();
|
148
|
-
|
149
|
-
// figure out which keys are NULL, and create a selection vector out of them
|
150
|
-
current_sel = FlatVector::IncrementalSelectionVector();
|
151
|
-
idx_t added_count = keys.size();
|
152
|
-
if (build_side && IsRightOuterJoin(join_type)) {
|
153
|
-
// in case of a right or full outer join, we cannot remove NULL keys from the build side
|
154
|
-
return added_count;
|
155
|
-
}
|
156
|
-
for (idx_t i = 0; i < keys.ColumnCount(); i++) {
|
157
|
-
if (!null_values_are_equal[i]) {
|
158
|
-
if (key_data[i].validity.AllValid()) {
|
159
|
-
continue;
|
160
|
-
}
|
161
|
-
added_count = FilterNullValues(key_data[i], *current_sel, added_count, sel);
|
162
|
-
// null values are NOT equal for this column, filter them out
|
163
|
-
current_sel = &sel;
|
164
|
-
}
|
165
|
-
}
|
166
|
-
return added_count;
|
167
|
-
}
|
168
|
-
|
169
147
|
void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChunk &keys, DataChunk &payload) {
|
170
148
|
D_ASSERT(!finalized);
|
171
149
|
D_ASSERT(keys.size() == payload.size());
|
@@ -194,23 +172,6 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
|
|
194
172
|
info.correlated_counts->AddChunk(info.group_chunk, info.correlated_payload, AggregateType::NON_DISTINCT);
|
195
173
|
}
|
196
174
|
|
197
|
-
// prepare the keys for processing
|
198
|
-
unsafe_unique_array<UnifiedVectorFormat> key_data;
|
199
|
-
const SelectionVector *current_sel;
|
200
|
-
SelectionVector sel(STANDARD_VECTOR_SIZE);
|
201
|
-
idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
|
202
|
-
if (added_count < keys.size()) {
|
203
|
-
has_null = true;
|
204
|
-
}
|
205
|
-
if (added_count == 0) {
|
206
|
-
return;
|
207
|
-
}
|
208
|
-
|
209
|
-
// hash the keys and obtain an entry in the list
|
210
|
-
// note that we only hash the keys used in the equality comparison
|
211
|
-
Vector hash_values(LogicalType::HASH);
|
212
|
-
Hash(keys, *current_sel, added_count, hash_values);
|
213
|
-
|
214
175
|
// build a chunk to append to the data collection [keys, payload, (optional "found" boolean), hash]
|
215
176
|
DataChunk source_chunk;
|
216
177
|
source_chunk.InitializeEmpty(layout.GetTypes());
|
@@ -228,13 +189,58 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
|
|
228
189
|
source_chunk.data[col_offset].Reference(vfound);
|
229
190
|
col_offset++;
|
230
191
|
}
|
192
|
+
Vector hash_values(LogicalType::HASH);
|
231
193
|
source_chunk.data[col_offset].Reference(hash_values);
|
232
194
|
source_chunk.SetCardinality(keys);
|
233
195
|
|
196
|
+
// ToUnifiedFormat the source chunk
|
197
|
+
TupleDataCollection::ToUnifiedFormat(append_state.chunk_state, source_chunk);
|
198
|
+
|
199
|
+
// prepare the keys for processing
|
200
|
+
const SelectionVector *current_sel;
|
201
|
+
SelectionVector sel(STANDARD_VECTOR_SIZE);
|
202
|
+
idx_t added_count = PrepareKeys(keys, append_state.chunk_state.vector_data, current_sel, sel, true);
|
234
203
|
if (added_count < keys.size()) {
|
235
|
-
|
204
|
+
has_null = true;
|
205
|
+
}
|
206
|
+
if (added_count == 0) {
|
207
|
+
return;
|
236
208
|
}
|
237
|
-
|
209
|
+
|
210
|
+
// hash the keys and obtain an entry in the list
|
211
|
+
// note that we only hash the keys used in the equality comparison
|
212
|
+
Hash(keys, *current_sel, added_count, hash_values);
|
213
|
+
|
214
|
+
// Re-reference and ToUnifiedFormat the hash column after computing it
|
215
|
+
source_chunk.data[col_offset].Reference(hash_values);
|
216
|
+
hash_values.ToUnifiedFormat(source_chunk.size(), append_state.chunk_state.vector_data.back().unified);
|
217
|
+
|
218
|
+
// We already called TupleDataCollection::ToUnifiedFormat, so we can AppendUnified here
|
219
|
+
sink_collection->AppendUnified(append_state, source_chunk, *current_sel, added_count);
|
220
|
+
}
|
221
|
+
|
222
|
+
idx_t JoinHashTable::PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data,
|
223
|
+
const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side) {
|
224
|
+
// figure out which keys are NULL, and create a selection vector out of them
|
225
|
+
current_sel = FlatVector::IncrementalSelectionVector();
|
226
|
+
idx_t added_count = keys.size();
|
227
|
+
if (build_side && IsRightOuterJoin(join_type)) {
|
228
|
+
// in case of a right or full outer join, we cannot remove NULL keys from the build side
|
229
|
+
return added_count;
|
230
|
+
}
|
231
|
+
|
232
|
+
for (idx_t col_idx = 0; col_idx < keys.ColumnCount(); col_idx++) {
|
233
|
+
if (!null_values_are_equal[col_idx]) {
|
234
|
+
auto &col_key_data = vector_data[col_idx].unified;
|
235
|
+
if (col_key_data.validity.AllValid()) {
|
236
|
+
continue;
|
237
|
+
}
|
238
|
+
added_count = FilterNullValues(col_key_data, *current_sel, added_count, sel);
|
239
|
+
// null values are NOT equal for this column, filter them out
|
240
|
+
current_sel = &sel;
|
241
|
+
}
|
242
|
+
}
|
243
|
+
return added_count;
|
238
244
|
}
|
239
245
|
|
240
246
|
template <bool PARALLEL>
|
@@ -322,12 +328,13 @@ void JoinHashTable::Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool para
|
|
322
328
|
} while (iterator.Next());
|
323
329
|
}
|
324
330
|
|
325
|
-
unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys,
|
331
|
+
unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
|
332
|
+
const SelectionVector *¤t_sel) {
|
326
333
|
D_ASSERT(Count() > 0); // should be handled before
|
327
334
|
D_ASSERT(finalized);
|
328
335
|
|
329
336
|
// set up the scan structure
|
330
|
-
auto ss = make_uniq<ScanStructure>(*this);
|
337
|
+
auto ss = make_uniq<ScanStructure>(*this, key_state);
|
331
338
|
|
332
339
|
if (join_type != JoinType::INNER) {
|
333
340
|
ss->found_match = make_unsafe_uniq_array<bool>(STANDARD_VECTOR_SIZE);
|
@@ -335,13 +342,15 @@ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys
|
|
335
342
|
}
|
336
343
|
|
337
344
|
// first prepare the keys for probing
|
338
|
-
|
345
|
+
TupleDataCollection::ToUnifiedFormat(key_state, keys);
|
346
|
+
ss->count = PrepareKeys(keys, key_state.vector_data, current_sel, ss->sel_vector, false);
|
339
347
|
return ss;
|
340
348
|
}
|
341
349
|
|
342
|
-
unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys,
|
350
|
+
unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, TupleDataChunkState &key_state,
|
351
|
+
Vector *precomputed_hashes) {
|
343
352
|
const SelectionVector *current_sel;
|
344
|
-
auto ss = InitializeScanStructure(keys, current_sel);
|
353
|
+
auto ss = InitializeScanStructure(keys, key_state, current_sel);
|
345
354
|
if (ss->count == 0) {
|
346
355
|
return ss;
|
347
356
|
}
|
@@ -363,8 +372,9 @@ unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, Vector *precompu
|
|
363
372
|
return ss;
|
364
373
|
}
|
365
374
|
|
366
|
-
ScanStructure::ScanStructure(JoinHashTable &
|
367
|
-
: pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(
|
375
|
+
ScanStructure::ScanStructure(JoinHashTable &ht_p, TupleDataChunkState &key_state_p)
|
376
|
+
: key_state(key_state_p), pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(ht_p),
|
377
|
+
finished(false) {
|
368
378
|
}
|
369
379
|
|
370
380
|
void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
@@ -404,8 +414,9 @@ idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_s
|
|
404
414
|
}
|
405
415
|
idx_t no_match_count = 0;
|
406
416
|
|
407
|
-
|
408
|
-
|
417
|
+
auto &matcher = no_match_sel ? ht.row_matcher_no_match_sel : ht.row_matcher;
|
418
|
+
return matcher.Match(keys, key_state.vector_data, match_sel, this->count, ht.layout, pointers, no_match_sel,
|
419
|
+
no_match_count);
|
409
420
|
}
|
410
421
|
|
411
422
|
idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector) {
|
@@ -990,7 +1001,8 @@ static void CreateSpillChunk(DataChunk &spill_chunk, DataChunk &keys, DataChunk
|
|
990
1001
|
spill_chunk.data[spill_col_idx].Reference(hashes);
|
991
1002
|
}
|
992
1003
|
|
993
|
-
unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys,
|
1004
|
+
unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state,
|
1005
|
+
DataChunk &payload, ProbeSpill &probe_spill,
|
994
1006
|
ProbeSpillLocalAppendState &spill_state,
|
995
1007
|
DataChunk &spill_chunk) {
|
996
1008
|
// hash all the keys
|
@@ -1019,7 +1031,7 @@ unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, DataChun
|
|
1019
1031
|
payload.Slice(true_sel, true_count);
|
1020
1032
|
|
1021
1033
|
const SelectionVector *current_sel;
|
1022
|
-
auto ss = InitializeScanStructure(keys, current_sel);
|
1034
|
+
auto ss = InitializeScanStructure(keys, key_state, current_sel);
|
1023
1035
|
if (ss->count == 0) {
|
1024
1036
|
return ss;
|
1025
1037
|
}
|
@@ -3,21 +3,12 @@
|
|
3
3
|
|
4
4
|
namespace duckdb {
|
5
5
|
|
6
|
-
template <class OP>
|
7
|
-
struct ComparisonOperationWrapper {
|
8
|
-
template <class T>
|
9
|
-
static inline bool Operation(T left, T right, bool left_is_null, bool right_is_null) {
|
10
|
-
if (left_is_null || right_is_null) {
|
11
|
-
return false;
|
12
|
-
}
|
13
|
-
return OP::Operation(left, right);
|
14
|
-
}
|
15
|
-
};
|
16
|
-
|
17
6
|
struct InitialNestedLoopJoin {
|
18
7
|
template <class T, class OP>
|
19
8
|
static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
|
20
9
|
SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
|
10
|
+
using MATCH_OP = ComparisonOperationWrapper<OP>;
|
11
|
+
|
21
12
|
// initialize phase of nested loop join
|
22
13
|
// fill lvector and rvector with matches from the base vectors
|
23
14
|
UnifiedVectorFormat left_data, right_data;
|
@@ -37,7 +28,7 @@ struct InitialNestedLoopJoin {
|
|
37
28
|
}
|
38
29
|
idx_t left_position = left_data.sel->get_index(lpos);
|
39
30
|
bool left_is_valid = left_data.validity.RowIsValid(left_position);
|
40
|
-
if (
|
31
|
+
if (MATCH_OP::Operation(ldata[left_position], rdata[right_position], !left_is_valid, !right_is_valid)) {
|
41
32
|
// emit tuple
|
42
33
|
lvector.set_index(result_count, lpos);
|
43
34
|
rvector.set_index(result_count, rpos);
|
@@ -54,6 +45,8 @@ struct RefineNestedLoopJoin {
|
|
54
45
|
template <class T, class OP>
|
55
46
|
static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
|
56
47
|
SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
|
48
|
+
using MATCH_OP = ComparisonOperationWrapper<OP>;
|
49
|
+
|
57
50
|
UnifiedVectorFormat left_data, right_data;
|
58
51
|
left.ToUnifiedFormat(left_size, left_data);
|
59
52
|
right.ToUnifiedFormat(right_size, right_data);
|
@@ -72,7 +65,7 @@ struct RefineNestedLoopJoin {
|
|
72
65
|
auto right_idx = right_data.sel->get_index(ridx);
|
73
66
|
bool left_is_valid = left_data.validity.RowIsValid(left_idx);
|
74
67
|
bool right_is_valid = right_data.validity.RowIsValid(right_idx);
|
75
|
-
if (
|
68
|
+
if (MATCH_OP::Operation(ldata[left_idx], rdata[right_idx], !left_is_valid, !right_is_valid)) {
|
76
69
|
lvector.set_index(result_count, lidx);
|
77
70
|
rvector.set_index(result_count, ridx);
|
78
71
|
result_count++;
|
@@ -139,26 +132,26 @@ idx_t NestedLoopJoinComparisonSwitch(Vector &left, Vector &right, idx_t left_siz
|
|
139
132
|
D_ASSERT(left.GetType() == right.GetType());
|
140
133
|
switch (comparison_type) {
|
141
134
|
case ExpressionType::COMPARE_EQUAL:
|
142
|
-
return NestedLoopJoinTypeSwitch<NLTYPE,
|
143
|
-
|
135
|
+
return NestedLoopJoinTypeSwitch<NLTYPE, Equals>(left, right, left_size, right_size, lpos, rpos, lvector,
|
136
|
+
rvector, current_match_count);
|
144
137
|
case ExpressionType::COMPARE_NOTEQUAL:
|
145
|
-
return NestedLoopJoinTypeSwitch<NLTYPE,
|
146
|
-
|
138
|
+
return NestedLoopJoinTypeSwitch<NLTYPE, NotEquals>(left, right, left_size, right_size, lpos, rpos, lvector,
|
139
|
+
rvector, current_match_count);
|
147
140
|
case ExpressionType::COMPARE_LESSTHAN:
|
148
|
-
return NestedLoopJoinTypeSwitch<NLTYPE,
|
149
|
-
|
141
|
+
return NestedLoopJoinTypeSwitch<NLTYPE, LessThan>(left, right, left_size, right_size, lpos, rpos, lvector,
|
142
|
+
rvector, current_match_count);
|
150
143
|
case ExpressionType::COMPARE_GREATERTHAN:
|
151
|
-
return NestedLoopJoinTypeSwitch<NLTYPE,
|
152
|
-
|
144
|
+
return NestedLoopJoinTypeSwitch<NLTYPE, GreaterThan>(left, right, left_size, right_size, lpos, rpos, lvector,
|
145
|
+
rvector, current_match_count);
|
153
146
|
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
154
|
-
return NestedLoopJoinTypeSwitch<NLTYPE,
|
155
|
-
|
147
|
+
return NestedLoopJoinTypeSwitch<NLTYPE, LessThanEquals>(left, right, left_size, right_size, lpos, rpos, lvector,
|
148
|
+
rvector, current_match_count);
|
156
149
|
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
157
|
-
return NestedLoopJoinTypeSwitch<NLTYPE,
|
158
|
-
|
150
|
+
return NestedLoopJoinTypeSwitch<NLTYPE, GreaterThanEquals>(left, right, left_size, right_size, lpos, rpos,
|
151
|
+
lvector, rvector, current_match_count);
|
159
152
|
case ExpressionType::COMPARE_DISTINCT_FROM:
|
160
|
-
return NestedLoopJoinTypeSwitch<NLTYPE,
|
161
|
-
|
153
|
+
return NestedLoopJoinTypeSwitch<NLTYPE, DistinctFrom>(left, right, left_size, right_size, lpos, rpos, lvector,
|
154
|
+
rvector, current_match_count);
|
162
155
|
default:
|
163
156
|
throw NotImplementedException("Unimplemented comparison type for join!");
|
164
157
|
}
|
@@ -6,6 +6,8 @@ namespace duckdb {
|
|
6
6
|
|
7
7
|
template <class T, class OP>
|
8
8
|
static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) {
|
9
|
+
using MATCH_OP = ComparisonOperationWrapper<OP>;
|
10
|
+
|
9
11
|
UnifiedVectorFormat left_data, right_data;
|
10
12
|
left.ToUnifiedFormat(lcount, left_data);
|
11
13
|
right.ToUnifiedFormat(rcount, right_data);
|
@@ -17,15 +19,17 @@ static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t r
|
|
17
19
|
continue;
|
18
20
|
}
|
19
21
|
auto lidx = left_data.sel->get_index(i);
|
20
|
-
|
22
|
+
const auto left_null = !left_data.validity.RowIsValid(lidx);
|
23
|
+
if (!MATCH_OP::COMPARE_NULL && left_null) {
|
21
24
|
continue;
|
22
25
|
}
|
23
26
|
for (idx_t j = 0; j < rcount; j++) {
|
24
27
|
auto ridx = right_data.sel->get_index(j);
|
25
|
-
|
28
|
+
const auto right_null = !right_data.validity.RowIsValid(ridx);
|
29
|
+
if (!MATCH_OP::COMPARE_NULL && right_null) {
|
26
30
|
continue;
|
27
31
|
}
|
28
|
-
if (
|
32
|
+
if (MATCH_OP::template Operation<T>(ldata[lidx], rdata[ridx], left_null, right_null)) {
|
29
33
|
found_match[i] = true;
|
30
34
|
break;
|
31
35
|
}
|
@@ -62,6 +66,12 @@ static void MarkJoinNested(Vector &left, Vector &right, idx_t lcount, idx_t rcou
|
|
62
66
|
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
63
67
|
count = VectorOperations::GreaterThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
64
68
|
break;
|
69
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
70
|
+
count = VectorOperations::DistinctFrom(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
71
|
+
break;
|
72
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
73
|
+
count = VectorOperations::NotDistinctFrom(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
74
|
+
break;
|
65
75
|
default:
|
66
76
|
throw InternalException("Unsupported comparison type for MarkJoinNested");
|
67
77
|
}
|
@@ -116,17 +126,19 @@ static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount,
|
|
116
126
|
D_ASSERT(left.GetType() == right.GetType());
|
117
127
|
switch (comparison_type) {
|
118
128
|
case ExpressionType::COMPARE_EQUAL:
|
119
|
-
return MarkJoinSwitch<
|
129
|
+
return MarkJoinSwitch<Equals>(left, right, lcount, rcount, found_match);
|
120
130
|
case ExpressionType::COMPARE_NOTEQUAL:
|
121
|
-
return MarkJoinSwitch<
|
131
|
+
return MarkJoinSwitch<NotEquals>(left, right, lcount, rcount, found_match);
|
122
132
|
case ExpressionType::COMPARE_LESSTHAN:
|
123
|
-
return MarkJoinSwitch<
|
133
|
+
return MarkJoinSwitch<LessThan>(left, right, lcount, rcount, found_match);
|
124
134
|
case ExpressionType::COMPARE_GREATERTHAN:
|
125
|
-
return MarkJoinSwitch<
|
135
|
+
return MarkJoinSwitch<GreaterThan>(left, right, lcount, rcount, found_match);
|
126
136
|
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
127
|
-
return MarkJoinSwitch<
|
137
|
+
return MarkJoinSwitch<LessThanEquals>(left, right, lcount, rcount, found_match);
|
128
138
|
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
129
|
-
return MarkJoinSwitch<
|
139
|
+
return MarkJoinSwitch<GreaterThanEquals>(left, right, lcount, rcount, found_match);
|
140
|
+
case ExpressionType::COMPARE_DISTINCT_FROM:
|
141
|
+
return MarkJoinSwitch<DistinctFrom>(left, right, lcount, rcount, found_match);
|
130
142
|
default:
|
131
143
|
throw NotImplementedException("Unimplemented comparison type for join!");
|
132
144
|
}
|
@@ -343,8 +343,8 @@ void PhysicalHashAggregate::SinkDistinct(ExecutionContext &context, DataChunk &c
|
|
343
343
|
|
344
344
|
SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk &chunk,
|
345
345
|
OperatorSinkInput &input) const {
|
346
|
-
auto &
|
347
|
-
auto &
|
346
|
+
auto &local_state = input.local_state.Cast<HashAggregateLocalSinkState>();
|
347
|
+
auto &global_state = input.global_state.Cast<HashAggregateGlobalSinkState>();
|
348
348
|
|
349
349
|
if (distinct_collection_info) {
|
350
350
|
SinkDistinct(context, chunk, input);
|
@@ -354,8 +354,7 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
|
|
354
354
|
return SinkResultType::NEED_MORE_INPUT;
|
355
355
|
}
|
356
356
|
|
357
|
-
DataChunk &aggregate_input_chunk =
|
358
|
-
|
357
|
+
DataChunk &aggregate_input_chunk = local_state.aggregate_input_chunk;
|
359
358
|
auto &aggregates = grouped_aggregate_data.aggregates;
|
360
359
|
idx_t aggregate_input_idx = 0;
|
361
360
|
|
@@ -385,10 +384,11 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
|
|
385
384
|
|
386
385
|
// For every grouping set there is one radix_table
|
387
386
|
for (idx_t i = 0; i < groupings.size(); i++) {
|
388
|
-
auto &
|
389
|
-
auto &
|
387
|
+
auto &grouping_local_state = global_state.grouping_states[i];
|
388
|
+
auto &grouping_global_state = local_state.grouping_states[i];
|
390
389
|
InterruptState interrupt_state;
|
391
|
-
OperatorSinkInput sink_input {*
|
390
|
+
OperatorSinkInput sink_input {*grouping_local_state.table_state, *grouping_global_state.table_state,
|
391
|
+
interrupt_state};
|
392
392
|
|
393
393
|
auto &grouping = groupings[i];
|
394
394
|
auto &table = grouping.table_data;
|
@@ -437,7 +437,7 @@ void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientCont
|
|
437
437
|
}
|
438
438
|
sql_type_list.reserve(sql_type_names.size());
|
439
439
|
for (auto &sql_type : sql_type_names) {
|
440
|
-
auto def_type = TransformStringToLogicalType(sql_type);
|
440
|
+
auto def_type = TransformStringToLogicalType(sql_type, context);
|
441
441
|
if (def_type.id() == LogicalTypeId::USER) {
|
442
442
|
throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
|
443
443
|
kv.first);
|
@@ -420,6 +420,8 @@ public:
|
|
420
420
|
}
|
421
421
|
|
422
422
|
DataChunk join_keys;
|
423
|
+
TupleDataChunkState join_key_state;
|
424
|
+
|
423
425
|
ExpressionExecutor probe_executor;
|
424
426
|
unique_ptr<JoinHashTable::ScanStructure> scan_structure;
|
425
427
|
unique_ptr<OperatorState> perfect_hash_join_state;
|
@@ -446,6 +448,7 @@ unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &c
|
|
446
448
|
for (auto &cond : conditions) {
|
447
449
|
state->probe_executor.AddExpression(*cond.left);
|
448
450
|
}
|
451
|
+
TupleDataCollection::InitializeChunkState(state->join_key_state, condition_types);
|
449
452
|
}
|
450
453
|
if (sink.external) {
|
451
454
|
state->spill_chunk.Initialize(allocator, sink.probe_types);
|
@@ -502,10 +505,10 @@ OperatorResultType PhysicalHashJoin::ExecuteInternal(ExecutionContext &context,
|
|
502
505
|
|
503
506
|
// perform the actual probe
|
504
507
|
if (sink.external) {
|
505
|
-
state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys,
|
506
|
-
state.spill_state, state.spill_chunk);
|
508
|
+
state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys, state.join_key_state, input,
|
509
|
+
*sink.probe_spill, state.spill_state, state.spill_chunk);
|
507
510
|
} else {
|
508
|
-
state.scan_structure = sink.hash_table->Probe(state.join_keys);
|
511
|
+
state.scan_structure = sink.hash_table->Probe(state.join_keys, state.join_key_state);
|
509
512
|
}
|
510
513
|
state.scan_structure->Next(state.join_keys, input, chunk);
|
511
514
|
return OperatorResultType::HAVE_MORE_OUTPUT;
|
@@ -605,6 +608,7 @@ public:
|
|
605
608
|
DataChunk probe_chunk;
|
606
609
|
DataChunk join_keys;
|
607
610
|
DataChunk payload;
|
611
|
+
TupleDataChunkState join_key_state;
|
608
612
|
//! Column indices to easily reference the join keys/payload columns in probe_chunk
|
609
613
|
vector<idx_t> join_key_indices;
|
610
614
|
vector<idx_t> payload_indices;
|
@@ -782,6 +786,7 @@ HashJoinLocalSourceState::HashJoinLocalSourceState(const PhysicalHashJoin &op, A
|
|
782
786
|
probe_chunk.Initialize(allocator, sink.probe_types);
|
783
787
|
join_keys.Initialize(allocator, op.condition_types);
|
784
788
|
payload.Initialize(allocator, op.children[0]->types);
|
789
|
+
TupleDataCollection::InitializeChunkState(join_key_state, op.condition_types);
|
785
790
|
|
786
791
|
// Store the indices of the columns to reference them easily
|
787
792
|
idx_t col_idx = 0;
|
@@ -871,7 +876,7 @@ void HashJoinLocalSourceState::ExternalProbe(HashJoinGlobalSinkState &sink, Hash
|
|
871
876
|
}
|
872
877
|
|
873
878
|
// Perform the probe
|
874
|
-
scan_structure = sink.hash_table->Probe(join_keys, precomputed_hashes);
|
879
|
+
scan_structure = sink.hash_table->Probe(join_keys, join_key_state, precomputed_hashes);
|
875
880
|
scan_structure->Next(join_keys, payload, chunk);
|
876
881
|
}
|
877
882
|
|
@@ -254,7 +254,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
|
|
254
254
|
}
|
255
255
|
|
256
256
|
bool has_equality = false;
|
257
|
-
// bool has_inequality = false;
|
258
257
|
size_t has_range = 0;
|
259
258
|
for (size_t c = 0; c < op.conditions.size(); ++c) {
|
260
259
|
auto &cond = op.conditions[c];
|
@@ -271,7 +270,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
|
|
271
270
|
break;
|
272
271
|
case ExpressionType::COMPARE_NOTEQUAL:
|
273
272
|
case ExpressionType::COMPARE_DISTINCT_FROM:
|
274
|
-
// has_inequality = true;
|
275
273
|
break;
|
276
274
|
default:
|
277
275
|
throw NotImplementedException("Unimplemented comparison join");
|
@@ -107,25 +107,19 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
|
|
107
107
|
if (append_to_next_sample > 0) {
|
108
108
|
// we need to also add to the next sample
|
109
109
|
DataChunk new_chunk;
|
110
|
-
new_chunk.
|
111
|
-
|
112
|
-
for (idx_t r = 0; r < append_to_current_sample_count; r++) {
|
113
|
-
sel.set_index(r, r);
|
114
|
-
}
|
115
|
-
new_chunk.Slice(sel, append_to_current_sample_count);
|
110
|
+
new_chunk.InitializeEmpty(input.GetTypes());
|
111
|
+
new_chunk.Slice(input, *FlatVector::IncrementalSelectionVector(), append_to_current_sample_count);
|
116
112
|
new_chunk.Flatten();
|
117
|
-
|
118
113
|
current_sample->AddToReservoir(new_chunk);
|
119
114
|
} else {
|
120
115
|
input.Flatten();
|
121
|
-
|
122
116
|
input.SetCardinality(append_to_current_sample_count);
|
123
117
|
current_sample->AddToReservoir(input);
|
124
118
|
}
|
125
119
|
}
|
126
120
|
if (append_to_next_sample > 0) {
|
127
121
|
// slice the input for the remainder
|
128
|
-
SelectionVector sel(
|
122
|
+
SelectionVector sel(append_to_next_sample);
|
129
123
|
for (idx_t i = 0; i < append_to_next_sample; i++) {
|
130
124
|
sel.set_index(i, append_to_current_sample_count + i);
|
131
125
|
}
|