duckdb 0.8.2-dev4711.0 → 0.8.2-dev4871.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +0 -1
- package/binding.gyp.in +0 -1
- package/package.json +1 -1
- package/src/connection.cpp +10 -23
- package/src/data_chunk.cpp +1 -3
- package/src/database.cpp +4 -9
- package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +4 -6
- package/src/duckdb/src/common/enum_util.cpp +10 -5
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
- package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
- package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
- package/src/duckdb/src/function/function_binder.cpp +10 -9
- package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
- package/src/duckdb/src/main/config.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +10 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
- package/src/duckdb/src/storage/data_table.cpp +10 -0
- package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
- package/src/statement.cpp +2 -4
- package/test/database_fail.test.ts +6 -0
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -45,6 +45,7 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
|
|
45
45
|
// Append hash column to the end and initialise the row layout
|
46
46
|
group_types_p.emplace_back(LogicalType::HASH);
|
47
47
|
layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
|
48
|
+
|
48
49
|
hash_offset = layout.GetOffsets()[layout.ColumnCount() - 1];
|
49
50
|
|
50
51
|
// Partitioned data and pointer table
|
@@ -52,7 +53,8 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
|
|
52
53
|
Resize(initial_capacity);
|
53
54
|
|
54
55
|
// Predicates
|
55
|
-
predicates.resize(layout.ColumnCount() - 1, ExpressionType::
|
56
|
+
predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_NOT_DISTINCT_FROM);
|
57
|
+
row_matcher.Initialize(true, layout, predicates);
|
56
58
|
}
|
57
59
|
|
58
60
|
void GroupedAggregateHashTable::InitializePartitionedData() {
|
@@ -414,9 +416,8 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
|
|
414
416
|
}
|
415
417
|
|
416
418
|
// Perform group comparisons
|
417
|
-
|
418
|
-
|
419
|
-
no_match_count);
|
419
|
+
row_matcher.Match(state.group_chunk, chunk_state.vector_data, state.group_compare_vector,
|
420
|
+
need_compare_count, layout, addresses_v, &state.no_match_vector, no_match_count);
|
420
421
|
}
|
421
422
|
|
422
423
|
// Linear probing: each of the entries that do not match move to the next entry in the HT
|
@@ -173,6 +173,19 @@ bool FixedSizeAllocator::InitializeVacuum() {
|
|
173
173
|
return false;
|
174
174
|
}
|
175
175
|
|
176
|
+
// remove all empty buffers
|
177
|
+
auto buffer_it = buffers.begin();
|
178
|
+
while (buffer_it != buffers.end()) {
|
179
|
+
if (!buffer_it->second.segment_count) {
|
180
|
+
buffers_with_free_space.erase(buffer_it->first);
|
181
|
+
buffer_it->second.Destroy();
|
182
|
+
buffer_it = buffers.erase(buffer_it);
|
183
|
+
} else {
|
184
|
+
buffer_it++;
|
185
|
+
}
|
186
|
+
}
|
187
|
+
|
188
|
+
// determine if a vacuum is necessary
|
176
189
|
multimap<idx_t, idx_t> temporary_vacuum_buffers;
|
177
190
|
D_ASSERT(vacuum_buffers.empty());
|
178
191
|
idx_t available_segments_in_memory = 0;
|
@@ -19,15 +19,15 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
|
|
19
19
|
: buffer_manager(buffer_manager_p), conditions(conditions_p), build_types(std::move(btypes)), entry_size(0),
|
20
20
|
tuple_size(0), vfound(Value::BOOLEAN(false)), join_type(type_p), finalized(false), has_null(false),
|
21
21
|
external(false), radix_bits(4), partition_start(0), partition_end(0) {
|
22
|
+
|
22
23
|
for (auto &condition : conditions) {
|
23
24
|
D_ASSERT(condition.left->return_type == condition.right->return_type);
|
24
25
|
auto type = condition.left->return_type;
|
25
26
|
if (condition.comparison == ExpressionType::COMPARE_EQUAL ||
|
26
|
-
condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM
|
27
|
-
|
28
|
-
// all equality conditions
|
29
|
-
// all other conditions at the back
|
30
|
-
// this assert checks that
|
27
|
+
condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
28
|
+
|
29
|
+
// ensure that all equality conditions are at the front,
|
30
|
+
// and that all other conditions are at the back
|
31
31
|
D_ASSERT(equality_types.size() == condition_types.size());
|
32
32
|
equality_types.push_back(type);
|
33
33
|
}
|
@@ -51,6 +51,8 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
|
|
51
51
|
}
|
52
52
|
layout_types.emplace_back(LogicalType::HASH);
|
53
53
|
layout.Initialize(layout_types, false);
|
54
|
+
row_matcher.Initialize(false, layout, predicates);
|
55
|
+
row_matcher_no_match_sel.Initialize(true, layout, predicates);
|
54
56
|
|
55
57
|
const auto &offsets = layout.GetOffsets();
|
56
58
|
tuple_size = offsets[condition_types.size() + build_types.size()];
|
@@ -142,30 +144,6 @@ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector
|
|
142
144
|
return result_count;
|
143
145
|
}
|
144
146
|
|
145
|
-
idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
|
146
|
-
const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side) {
|
147
|
-
key_data = keys.ToUnifiedFormat();
|
148
|
-
|
149
|
-
// figure out which keys are NULL, and create a selection vector out of them
|
150
|
-
current_sel = FlatVector::IncrementalSelectionVector();
|
151
|
-
idx_t added_count = keys.size();
|
152
|
-
if (build_side && IsRightOuterJoin(join_type)) {
|
153
|
-
// in case of a right or full outer join, we cannot remove NULL keys from the build side
|
154
|
-
return added_count;
|
155
|
-
}
|
156
|
-
for (idx_t i = 0; i < keys.ColumnCount(); i++) {
|
157
|
-
if (!null_values_are_equal[i]) {
|
158
|
-
if (key_data[i].validity.AllValid()) {
|
159
|
-
continue;
|
160
|
-
}
|
161
|
-
added_count = FilterNullValues(key_data[i], *current_sel, added_count, sel);
|
162
|
-
// null values are NOT equal for this column, filter them out
|
163
|
-
current_sel = &sel;
|
164
|
-
}
|
165
|
-
}
|
166
|
-
return added_count;
|
167
|
-
}
|
168
|
-
|
169
147
|
void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChunk &keys, DataChunk &payload) {
|
170
148
|
D_ASSERT(!finalized);
|
171
149
|
D_ASSERT(keys.size() == payload.size());
|
@@ -194,23 +172,6 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
|
|
194
172
|
info.correlated_counts->AddChunk(info.group_chunk, info.correlated_payload, AggregateType::NON_DISTINCT);
|
195
173
|
}
|
196
174
|
|
197
|
-
// prepare the keys for processing
|
198
|
-
unsafe_unique_array<UnifiedVectorFormat> key_data;
|
199
|
-
const SelectionVector *current_sel;
|
200
|
-
SelectionVector sel(STANDARD_VECTOR_SIZE);
|
201
|
-
idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
|
202
|
-
if (added_count < keys.size()) {
|
203
|
-
has_null = true;
|
204
|
-
}
|
205
|
-
if (added_count == 0) {
|
206
|
-
return;
|
207
|
-
}
|
208
|
-
|
209
|
-
// hash the keys and obtain an entry in the list
|
210
|
-
// note that we only hash the keys used in the equality comparison
|
211
|
-
Vector hash_values(LogicalType::HASH);
|
212
|
-
Hash(keys, *current_sel, added_count, hash_values);
|
213
|
-
|
214
175
|
// build a chunk to append to the data collection [keys, payload, (optional "found" boolean), hash]
|
215
176
|
DataChunk source_chunk;
|
216
177
|
source_chunk.InitializeEmpty(layout.GetTypes());
|
@@ -228,13 +189,58 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
|
|
228
189
|
source_chunk.data[col_offset].Reference(vfound);
|
229
190
|
col_offset++;
|
230
191
|
}
|
192
|
+
Vector hash_values(LogicalType::HASH);
|
231
193
|
source_chunk.data[col_offset].Reference(hash_values);
|
232
194
|
source_chunk.SetCardinality(keys);
|
233
195
|
|
196
|
+
// ToUnifiedFormat the source chunk
|
197
|
+
TupleDataCollection::ToUnifiedFormat(append_state.chunk_state, source_chunk);
|
198
|
+
|
199
|
+
// prepare the keys for processing
|
200
|
+
const SelectionVector *current_sel;
|
201
|
+
SelectionVector sel(STANDARD_VECTOR_SIZE);
|
202
|
+
idx_t added_count = PrepareKeys(keys, append_state.chunk_state.vector_data, current_sel, sel, true);
|
234
203
|
if (added_count < keys.size()) {
|
235
|
-
|
204
|
+
has_null = true;
|
205
|
+
}
|
206
|
+
if (added_count == 0) {
|
207
|
+
return;
|
236
208
|
}
|
237
|
-
|
209
|
+
|
210
|
+
// hash the keys and obtain an entry in the list
|
211
|
+
// note that we only hash the keys used in the equality comparison
|
212
|
+
Hash(keys, *current_sel, added_count, hash_values);
|
213
|
+
|
214
|
+
// Re-reference and ToUnifiedFormat the hash column after computing it
|
215
|
+
source_chunk.data[col_offset].Reference(hash_values);
|
216
|
+
hash_values.ToUnifiedFormat(source_chunk.size(), append_state.chunk_state.vector_data.back().unified);
|
217
|
+
|
218
|
+
// We already called TupleDataCollection::ToUnifiedFormat, so we can AppendUnified here
|
219
|
+
sink_collection->AppendUnified(append_state, source_chunk, *current_sel, added_count);
|
220
|
+
}
|
221
|
+
|
222
|
+
idx_t JoinHashTable::PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data,
|
223
|
+
const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side) {
|
224
|
+
// figure out which keys are NULL, and create a selection vector out of them
|
225
|
+
current_sel = FlatVector::IncrementalSelectionVector();
|
226
|
+
idx_t added_count = keys.size();
|
227
|
+
if (build_side && IsRightOuterJoin(join_type)) {
|
228
|
+
// in case of a right or full outer join, we cannot remove NULL keys from the build side
|
229
|
+
return added_count;
|
230
|
+
}
|
231
|
+
|
232
|
+
for (idx_t col_idx = 0; col_idx < keys.ColumnCount(); col_idx++) {
|
233
|
+
if (!null_values_are_equal[col_idx]) {
|
234
|
+
auto &col_key_data = vector_data[col_idx].unified;
|
235
|
+
if (col_key_data.validity.AllValid()) {
|
236
|
+
continue;
|
237
|
+
}
|
238
|
+
added_count = FilterNullValues(col_key_data, *current_sel, added_count, sel);
|
239
|
+
// null values are NOT equal for this column, filter them out
|
240
|
+
current_sel = &sel;
|
241
|
+
}
|
242
|
+
}
|
243
|
+
return added_count;
|
238
244
|
}
|
239
245
|
|
240
246
|
template <bool PARALLEL>
|
@@ -322,12 +328,13 @@ void JoinHashTable::Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool para
|
|
322
328
|
} while (iterator.Next());
|
323
329
|
}
|
324
330
|
|
325
|
-
unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys,
|
331
|
+
unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
|
332
|
+
const SelectionVector *¤t_sel) {
|
326
333
|
D_ASSERT(Count() > 0); // should be handled before
|
327
334
|
D_ASSERT(finalized);
|
328
335
|
|
329
336
|
// set up the scan structure
|
330
|
-
auto ss = make_uniq<ScanStructure>(*this);
|
337
|
+
auto ss = make_uniq<ScanStructure>(*this, key_state);
|
331
338
|
|
332
339
|
if (join_type != JoinType::INNER) {
|
333
340
|
ss->found_match = make_unsafe_uniq_array<bool>(STANDARD_VECTOR_SIZE);
|
@@ -335,13 +342,15 @@ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys
|
|
335
342
|
}
|
336
343
|
|
337
344
|
// first prepare the keys for probing
|
338
|
-
|
345
|
+
TupleDataCollection::ToUnifiedFormat(key_state, keys);
|
346
|
+
ss->count = PrepareKeys(keys, key_state.vector_data, current_sel, ss->sel_vector, false);
|
339
347
|
return ss;
|
340
348
|
}
|
341
349
|
|
342
|
-
unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys,
|
350
|
+
unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, TupleDataChunkState &key_state,
|
351
|
+
Vector *precomputed_hashes) {
|
343
352
|
const SelectionVector *current_sel;
|
344
|
-
auto ss = InitializeScanStructure(keys, current_sel);
|
353
|
+
auto ss = InitializeScanStructure(keys, key_state, current_sel);
|
345
354
|
if (ss->count == 0) {
|
346
355
|
return ss;
|
347
356
|
}
|
@@ -363,8 +372,9 @@ unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, Vector *precompu
|
|
363
372
|
return ss;
|
364
373
|
}
|
365
374
|
|
366
|
-
ScanStructure::ScanStructure(JoinHashTable &
|
367
|
-
: pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(
|
375
|
+
ScanStructure::ScanStructure(JoinHashTable &ht_p, TupleDataChunkState &key_state_p)
|
376
|
+
: key_state(key_state_p), pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(ht_p),
|
377
|
+
finished(false) {
|
368
378
|
}
|
369
379
|
|
370
380
|
void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
@@ -404,8 +414,9 @@ idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_s
|
|
404
414
|
}
|
405
415
|
idx_t no_match_count = 0;
|
406
416
|
|
407
|
-
|
408
|
-
|
417
|
+
auto &matcher = no_match_sel ? ht.row_matcher_no_match_sel : ht.row_matcher;
|
418
|
+
return matcher.Match(keys, key_state.vector_data, match_sel, this->count, ht.layout, pointers, no_match_sel,
|
419
|
+
no_match_count);
|
409
420
|
}
|
410
421
|
|
411
422
|
idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector) {
|
@@ -990,7 +1001,8 @@ static void CreateSpillChunk(DataChunk &spill_chunk, DataChunk &keys, DataChunk
|
|
990
1001
|
spill_chunk.data[spill_col_idx].Reference(hashes);
|
991
1002
|
}
|
992
1003
|
|
993
|
-
unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys,
|
1004
|
+
unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state,
|
1005
|
+
DataChunk &payload, ProbeSpill &probe_spill,
|
994
1006
|
ProbeSpillLocalAppendState &spill_state,
|
995
1007
|
DataChunk &spill_chunk) {
|
996
1008
|
// hash all the keys
|
@@ -1019,7 +1031,7 @@ unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, DataChun
|
|
1019
1031
|
payload.Slice(true_sel, true_count);
|
1020
1032
|
|
1021
1033
|
const SelectionVector *current_sel;
|
1022
|
-
auto ss = InitializeScanStructure(keys, current_sel);
|
1034
|
+
auto ss = InitializeScanStructure(keys, key_state, current_sel);
|
1023
1035
|
if (ss->count == 0) {
|
1024
1036
|
return ss;
|
1025
1037
|
}
|
@@ -420,6 +420,8 @@ public:
|
|
420
420
|
}
|
421
421
|
|
422
422
|
DataChunk join_keys;
|
423
|
+
TupleDataChunkState join_key_state;
|
424
|
+
|
423
425
|
ExpressionExecutor probe_executor;
|
424
426
|
unique_ptr<JoinHashTable::ScanStructure> scan_structure;
|
425
427
|
unique_ptr<OperatorState> perfect_hash_join_state;
|
@@ -446,6 +448,7 @@ unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &c
|
|
446
448
|
for (auto &cond : conditions) {
|
447
449
|
state->probe_executor.AddExpression(*cond.left);
|
448
450
|
}
|
451
|
+
TupleDataCollection::InitializeChunkState(state->join_key_state, condition_types);
|
449
452
|
}
|
450
453
|
if (sink.external) {
|
451
454
|
state->spill_chunk.Initialize(allocator, sink.probe_types);
|
@@ -502,10 +505,10 @@ OperatorResultType PhysicalHashJoin::ExecuteInternal(ExecutionContext &context,
|
|
502
505
|
|
503
506
|
// perform the actual probe
|
504
507
|
if (sink.external) {
|
505
|
-
state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys,
|
506
|
-
state.spill_state, state.spill_chunk);
|
508
|
+
state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys, state.join_key_state, input,
|
509
|
+
*sink.probe_spill, state.spill_state, state.spill_chunk);
|
507
510
|
} else {
|
508
|
-
state.scan_structure = sink.hash_table->Probe(state.join_keys);
|
511
|
+
state.scan_structure = sink.hash_table->Probe(state.join_keys, state.join_key_state);
|
509
512
|
}
|
510
513
|
state.scan_structure->Next(state.join_keys, input, chunk);
|
511
514
|
return OperatorResultType::HAVE_MORE_OUTPUT;
|
@@ -605,6 +608,7 @@ public:
|
|
605
608
|
DataChunk probe_chunk;
|
606
609
|
DataChunk join_keys;
|
607
610
|
DataChunk payload;
|
611
|
+
TupleDataChunkState join_key_state;
|
608
612
|
//! Column indices to easily reference the join keys/payload columns in probe_chunk
|
609
613
|
vector<idx_t> join_key_indices;
|
610
614
|
vector<idx_t> payload_indices;
|
@@ -782,6 +786,7 @@ HashJoinLocalSourceState::HashJoinLocalSourceState(const PhysicalHashJoin &op, A
|
|
782
786
|
probe_chunk.Initialize(allocator, sink.probe_types);
|
783
787
|
join_keys.Initialize(allocator, op.condition_types);
|
784
788
|
payload.Initialize(allocator, op.children[0]->types);
|
789
|
+
TupleDataCollection::InitializeChunkState(join_key_state, op.condition_types);
|
785
790
|
|
786
791
|
// Store the indices of the columns to reference them easily
|
787
792
|
idx_t col_idx = 0;
|
@@ -871,7 +876,7 @@ void HashJoinLocalSourceState::ExternalProbe(HashJoinGlobalSinkState &sink, Hash
|
|
871
876
|
}
|
872
877
|
|
873
878
|
// Perform the probe
|
874
|
-
scan_structure = sink.hash_table->Probe(join_keys, precomputed_hashes);
|
879
|
+
scan_structure = sink.hash_table->Probe(join_keys, join_key_state, precomputed_hashes);
|
875
880
|
scan_structure->Next(join_keys, payload, chunk);
|
876
881
|
}
|
877
882
|
|
@@ -254,7 +254,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
|
|
254
254
|
}
|
255
255
|
|
256
256
|
bool has_equality = false;
|
257
|
-
// bool has_inequality = false;
|
258
257
|
size_t has_range = 0;
|
259
258
|
for (size_t c = 0; c < op.conditions.size(); ++c) {
|
260
259
|
auto &cond = op.conditions[c];
|
@@ -271,7 +270,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
|
|
271
270
|
break;
|
272
271
|
case ExpressionType::COMPARE_NOTEQUAL:
|
273
272
|
case ExpressionType::COMPARE_DISTINCT_FROM:
|
274
|
-
// has_inequality = true;
|
275
273
|
break;
|
276
274
|
default:
|
277
275
|
throw NotImplementedException("Unimplemented comparison join");
|
@@ -107,25 +107,19 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
|
|
107
107
|
if (append_to_next_sample > 0) {
|
108
108
|
// we need to also add to the next sample
|
109
109
|
DataChunk new_chunk;
|
110
|
-
new_chunk.
|
111
|
-
|
112
|
-
for (idx_t r = 0; r < append_to_current_sample_count; r++) {
|
113
|
-
sel.set_index(r, r);
|
114
|
-
}
|
115
|
-
new_chunk.Slice(sel, append_to_current_sample_count);
|
110
|
+
new_chunk.InitializeEmpty(input.GetTypes());
|
111
|
+
new_chunk.Slice(input, *FlatVector::IncrementalSelectionVector(), append_to_current_sample_count);
|
116
112
|
new_chunk.Flatten();
|
117
|
-
|
118
113
|
current_sample->AddToReservoir(new_chunk);
|
119
114
|
} else {
|
120
115
|
input.Flatten();
|
121
|
-
|
122
116
|
input.SetCardinality(append_to_current_sample_count);
|
123
117
|
current_sample->AddToReservoir(input);
|
124
118
|
}
|
125
119
|
}
|
126
120
|
if (append_to_next_sample > 0) {
|
127
121
|
// slice the input for the remainder
|
128
|
-
SelectionVector sel(
|
122
|
+
SelectionVector sel(append_to_next_sample);
|
129
123
|
for (idx_t i = 0; i < append_to_next_sample; i++) {
|
130
124
|
sel.set_index(i, append_to_current_sample_count + i);
|
131
125
|
}
|
@@ -20,10 +20,16 @@ inline static void SkipWhitespace(const char *buf, idx_t &pos, idx_t len) {
|
|
20
20
|
static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
|
21
21
|
char quote = buf[pos];
|
22
22
|
pos++;
|
23
|
+
bool escaped = false;
|
23
24
|
|
24
25
|
while (pos < len) {
|
25
|
-
if (buf[pos] ==
|
26
|
-
|
26
|
+
if (buf[pos] == '\\') {
|
27
|
+
escaped = !escaped;
|
28
|
+
} else {
|
29
|
+
if (buf[pos] == quote && !escaped) {
|
30
|
+
return true;
|
31
|
+
}
|
32
|
+
escaped = false;
|
27
33
|
}
|
28
34
|
pos++;
|
29
35
|
}
|
@@ -1,16 +1,16 @@
|
|
1
1
|
#include "duckdb/function/function_binder.hpp"
|
2
|
-
#include "duckdb/common/limits.hpp"
|
3
2
|
|
4
|
-
#include "duckdb/
|
5
|
-
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
6
|
-
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
7
|
-
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
3
|
+
#include "duckdb/catalog/catalog.hpp"
|
8
4
|
#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
|
9
|
-
|
10
|
-
#include "duckdb/
|
5
|
+
#include "duckdb/common/limits.hpp"
|
6
|
+
#include "duckdb/execution/expression_executor.hpp"
|
11
7
|
#include "duckdb/function/aggregate_function.hpp"
|
12
8
|
#include "duckdb/function/cast_rules.hpp"
|
13
|
-
#include "duckdb/
|
9
|
+
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
10
|
+
#include "duckdb/planner/expression/bound_cast_expression.hpp"
|
11
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
12
|
+
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
13
|
+
#include "duckdb/planner/expression_binder.hpp"
|
14
14
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
@@ -268,7 +268,8 @@ unique_ptr<Expression> FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE
|
|
268
268
|
|
269
269
|
if (bound_function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) {
|
270
270
|
for (auto &child : children) {
|
271
|
-
if (child->return_type == LogicalTypeId::SQLNULL
|
271
|
+
if (child->return_type == LogicalTypeId::SQLNULL ||
|
272
|
+
(child->IsFoldable() && ExpressionExecutor::EvaluateScalar(context, *child).IsNull())) {
|
272
273
|
return make_uniq<BoundConstantExpression>(Value(LogicalType::SQLNULL));
|
273
274
|
}
|
274
275
|
}
|
@@ -196,9 +196,6 @@ static unique_ptr<FunctionData> LikeBindFunction(ClientContext &context, ScalarF
|
|
196
196
|
D_ASSERT(arguments.size() == 2 || arguments.size() == 3);
|
197
197
|
if (arguments[1]->IsFoldable()) {
|
198
198
|
Value pattern_str = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
|
199
|
-
if (pattern_str.IsNull()) {
|
200
|
-
return nullptr;
|
201
|
-
}
|
202
199
|
return LikeMatcher::CreateLikeMatcher(pattern_str.ToString());
|
203
200
|
}
|
204
201
|
return nullptr;
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev4871"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "5a29c99891"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -25,7 +25,6 @@ enum class DatePartSpecifier : uint8_t {
|
|
25
25
|
SECOND,
|
26
26
|
MINUTE,
|
27
27
|
HOUR,
|
28
|
-
EPOCH,
|
29
28
|
DOW,
|
30
29
|
ISODOW,
|
31
30
|
WEEK,
|
@@ -39,11 +38,20 @@ enum class DatePartSpecifier : uint8_t {
|
|
39
38
|
TIMEZONE_MINUTE,
|
40
39
|
|
41
40
|
// DOUBLE values
|
42
|
-
|
41
|
+
EPOCH,
|
42
|
+
JULIAN_DAY,
|
43
|
+
|
44
|
+
// Invalid
|
45
|
+
INVALID,
|
46
|
+
|
47
|
+
// Type ranges
|
48
|
+
BEGIN_BIGINT = YEAR,
|
49
|
+
BEGIN_DOUBLE = EPOCH,
|
50
|
+
BEGIN_INVALID = INVALID,
|
43
51
|
};
|
44
52
|
|
45
53
|
inline bool IsBigintDatepart(DatePartSpecifier part_code) {
|
46
|
-
return size_t(part_code) < size_t(DatePartSpecifier::
|
54
|
+
return size_t(part_code) < size_t(DatePartSpecifier::BEGIN_DOUBLE);
|
47
55
|
}
|
48
56
|
|
49
57
|
DUCKDB_API bool TryGetDatePartSpecifier(const string &specifier, DatePartSpecifier &result);
|
@@ -0,0 +1,63 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/row_operations/row_matcher.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/enums/expression_type.hpp"
|
12
|
+
#include "duckdb/common/types.hpp"
|
13
|
+
|
14
|
+
namespace duckdb {
|
15
|
+
|
16
|
+
class Vector;
|
17
|
+
class DataChunk;
|
18
|
+
class TupleDataLayout;
|
19
|
+
struct TupleDataVectorFormat;
|
20
|
+
struct SelectionVector;
|
21
|
+
struct MatchFunction;
|
22
|
+
|
23
|
+
typedef idx_t (*match_function_t)(Vector &lhs_vector, const TupleDataVectorFormat &lhs_format, SelectionVector &sel,
|
24
|
+
const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
|
25
|
+
const idx_t col_idx, const vector<MatchFunction> &child_functions,
|
26
|
+
SelectionVector *no_match_sel, idx_t &no_match_count);
|
27
|
+
|
28
|
+
struct MatchFunction {
|
29
|
+
match_function_t function;
|
30
|
+
vector<MatchFunction> child_functions;
|
31
|
+
};
|
32
|
+
|
33
|
+
struct RowMatcher {
|
34
|
+
public:
|
35
|
+
using Predicates = vector<ExpressionType>;
|
36
|
+
|
37
|
+
//! Initializes the RowMatcher, filling match_functions using layout and predicates
|
38
|
+
void Initialize(const bool no_match_sel, const TupleDataLayout &layout, const Predicates &predicates);
|
39
|
+
//! Given a DataChunk on the LHS, on which we've called TupleDataCollection::ToUnifiedFormat,
|
40
|
+
//! we match it with rows on the RHS, according to the given layout and locations.
|
41
|
+
//! Initially, 'sel' has 'count' entries which point to what needs to be compared.
|
42
|
+
//! After matching is done, this returns how many matching entries there are, which 'sel' is modified to point to
|
43
|
+
idx_t Match(DataChunk &lhs, const vector<TupleDataVectorFormat> &lhs_formats, SelectionVector &sel, idx_t count,
|
44
|
+
const TupleDataLayout &rhs_layout, Vector &rhs_row_locations, SelectionVector *no_match_sel,
|
45
|
+
idx_t &no_match_count);
|
46
|
+
|
47
|
+
private:
|
48
|
+
//! Gets the templated match function for a given column
|
49
|
+
MatchFunction GetMatchFunction(const bool no_match_sel, const LogicalType &type, const ExpressionType predicate);
|
50
|
+
template <bool NO_MATCH_SEL>
|
51
|
+
MatchFunction GetMatchFunction(const LogicalType &type, const ExpressionType predicate);
|
52
|
+
template <bool NO_MATCH_SEL, class T>
|
53
|
+
MatchFunction GetMatchFunction(const ExpressionType predicate);
|
54
|
+
template <bool NO_MATCH_SEL>
|
55
|
+
MatchFunction GetStructMatchFunction(const LogicalType &type, const ExpressionType predicate);
|
56
|
+
template <bool NO_MATCH_SEL>
|
57
|
+
MatchFunction GetListMatchFunction(const ExpressionType predicate);
|
58
|
+
|
59
|
+
private:
|
60
|
+
vector<MatchFunction> match_functions;
|
61
|
+
};
|
62
|
+
|
63
|
+
} // namespace duckdb
|
@@ -21,7 +21,7 @@ struct RowOperationsState;
|
|
21
21
|
|
22
22
|
typedef void (*tuple_data_scatter_function_t)(const Vector &source, const TupleDataVectorFormat &source_format,
|
23
23
|
const SelectionVector &append_sel, const idx_t append_count,
|
24
|
-
const TupleDataLayout &layout, Vector &row_locations,
|
24
|
+
const TupleDataLayout &layout, const Vector &row_locations,
|
25
25
|
Vector &heap_locations, const idx_t col_idx,
|
26
26
|
const UnifiedVectorFormat &list_format,
|
27
27
|
const vector<TupleDataScatterFunction> &child_functions);
|
@@ -84,7 +84,11 @@ public:
|
|
84
84
|
TupleDataPinProperties = TupleDataPinProperties::UNPIN_AFTER_DONE);
|
85
85
|
//! Initializes the Chunk state of an Append state
|
86
86
|
//! - Useful for optimizing many appends made to the same tuple data collection
|
87
|
-
void
|
87
|
+
void InitializeChunkState(TupleDataChunkState &chunk_state, vector<column_t> column_ids = {});
|
88
|
+
//! Initializes the Chunk state of an Append state
|
89
|
+
//! - Useful for optimizing many appends made to the same tuple data collection
|
90
|
+
static void InitializeChunkState(TupleDataChunkState &chunk_state, const vector<LogicalType> &types,
|
91
|
+
vector<column_t> column_ids = {});
|
88
92
|
//! Append a DataChunk directly to this TupleDataCollection - calls InitializeAppend and Append internally
|
89
93
|
void Append(DataChunk &new_chunk, const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(),
|
90
94
|
idx_t append_count = DConstants::INVALID_INDEX);
|
@@ -42,8 +42,8 @@ struct TupleDataVectorFormat {
|
|
42
42
|
const SelectionVector *original_sel;
|
43
43
|
SelectionVector original_owned_sel;
|
44
44
|
|
45
|
-
UnifiedVectorFormat
|
46
|
-
vector<TupleDataVectorFormat>
|
45
|
+
UnifiedVectorFormat unified;
|
46
|
+
vector<TupleDataVectorFormat> children;
|
47
47
|
unique_ptr<CombinedListData> combined_list_data;
|
48
48
|
};
|
49
49
|
|
@@ -148,6 +148,9 @@ public:
|
|
148
148
|
if (!validity_mask) {
|
149
149
|
return ValidityBuffer::MAX_ENTRY;
|
150
150
|
}
|
151
|
+
return GetValidityEntryUnsafe(entry_idx);
|
152
|
+
}
|
153
|
+
inline V &GetValidityEntryUnsafe(idx_t entry_idx) const {
|
151
154
|
return validity_mask[entry_idx];
|
152
155
|
}
|
153
156
|
static inline bool AllValid(V entry) {
|
@@ -156,7 +159,7 @@ public:
|
|
156
159
|
static inline bool NoneValid(V entry) {
|
157
160
|
return entry == 0;
|
158
161
|
}
|
159
|
-
static inline bool RowIsValid(V entry, idx_t idx_in_entry) {
|
162
|
+
static inline bool RowIsValid(const V &entry, const idx_t &idx_in_entry) {
|
160
163
|
return entry & (V(1) << V(idx_in_entry));
|
161
164
|
}
|
162
165
|
static inline void GetEntryIndex(idx_t row_idx, idx_t &entry_idx, idx_t &idx_in_entry) {
|
@@ -285,7 +285,7 @@ struct RepeatFun {
|
|
285
285
|
static constexpr const char *Description = "Repeats the string count number of times";
|
286
286
|
static constexpr const char *Example = "repeat('A', 5)";
|
287
287
|
|
288
|
-
static
|
288
|
+
static ScalarFunctionSet GetFunctions();
|
289
289
|
};
|
290
290
|
|
291
291
|
struct ReplaceFun {
|
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
+
#include "duckdb/common/row_operations/row_matcher.hpp"
|
11
12
|
#include "duckdb/common/types/row/partitioned_tuple_data.hpp"
|
12
13
|
#include "duckdb/execution/base_aggregate_hashtable.hpp"
|
13
14
|
#include "duckdb/storage/arena_allocator.hpp"
|
@@ -143,6 +144,9 @@ public:
|
|
143
144
|
void UnpinData();
|
144
145
|
|
145
146
|
private:
|
147
|
+
//! Efficiently matches groups
|
148
|
+
RowMatcher row_matcher;
|
149
|
+
|
146
150
|
//! Append state
|
147
151
|
struct AggregateHTAppendState {
|
148
152
|
AggregateHTAppendState();
|