duckdb 0.8.2-dev4711.0 → 0.8.2-dev4871.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +0 -1
- package/binding.gyp.in +0 -1
- package/package.json +1 -1
- package/src/connection.cpp +10 -23
- package/src/data_chunk.cpp +1 -3
- package/src/database.cpp +4 -9
- package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +4 -6
- package/src/duckdb/src/common/enum_util.cpp +10 -5
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
- package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
- package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
- package/src/duckdb/src/function/function_binder.cpp +10 -9
- package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
- package/src/duckdb/src/main/config.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +10 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
- package/src/duckdb/src/storage/data_table.cpp +10 -0
- package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
- package/src/statement.cpp +2 -4
- package/test/database_fail.test.ts +6 -0
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -65,7 +65,7 @@ public:
|
|
65
65
|
//! returned by the JoinHashTable::Scan function and can be used to resume a
|
66
66
|
//! probe.
|
67
67
|
struct ScanStructure {
|
68
|
-
|
68
|
+
TupleDataChunkState &key_state;
|
69
69
|
Vector pointers;
|
70
70
|
idx_t count;
|
71
71
|
SelectionVector sel_vector;
|
@@ -74,7 +74,7 @@ public:
|
|
74
74
|
JoinHashTable &ht;
|
75
75
|
bool finished;
|
76
76
|
|
77
|
-
explicit ScanStructure(JoinHashTable &ht);
|
77
|
+
explicit ScanStructure(JoinHashTable &ht, TupleDataChunkState &key_state);
|
78
78
|
//! Get the next batch of data from the scan structure
|
79
79
|
void Next(DataChunk &keys, DataChunk &left, DataChunk &result);
|
80
80
|
|
@@ -130,7 +130,8 @@ public:
|
|
130
130
|
//! ever called.
|
131
131
|
void Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool parallel);
|
132
132
|
//! Probe the HT with the given input chunk, resulting in the given result
|
133
|
-
unique_ptr<ScanStructure> Probe(DataChunk &keys,
|
133
|
+
unique_ptr<ScanStructure> Probe(DataChunk &keys, TupleDataChunkState &key_state,
|
134
|
+
Vector *precomputed_hashes = nullptr);
|
134
135
|
//! Scan the HT to construct the full outer join result
|
135
136
|
void ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result);
|
136
137
|
|
@@ -166,6 +167,9 @@ public:
|
|
166
167
|
vector<ExpressionType> predicates;
|
167
168
|
//! Data column layout
|
168
169
|
TupleDataLayout layout;
|
170
|
+
//! Efficiently matches rows
|
171
|
+
RowMatcher row_matcher;
|
172
|
+
RowMatcher row_matcher_no_match_sel;
|
169
173
|
//! The size of an entry as stored in the HashTable
|
170
174
|
idx_t entry_size;
|
171
175
|
//! The total tuple size
|
@@ -201,7 +205,8 @@ public:
|
|
201
205
|
} correlated_mark_join_info;
|
202
206
|
|
203
207
|
private:
|
204
|
-
unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys,
|
208
|
+
unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
|
209
|
+
const SelectionVector *¤t_sel);
|
205
210
|
void Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes);
|
206
211
|
|
207
212
|
//! Apply a bitmask to the hashes
|
@@ -212,8 +217,8 @@ private:
|
|
212
217
|
//! Insert the given set of locations into the HT with the given set of hashes
|
213
218
|
void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
|
214
219
|
|
215
|
-
idx_t PrepareKeys(DataChunk &keys,
|
216
|
-
|
220
|
+
idx_t PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data, const SelectionVector *¤t_sel,
|
221
|
+
SelectionVector &sel, bool build_side);
|
217
222
|
|
218
223
|
//! Lock for combining data_collection when merging HTs
|
219
224
|
mutex data_lock;
|
@@ -316,8 +321,9 @@ public:
|
|
316
321
|
//! Build HT for the next partitioned probe round
|
317
322
|
bool PrepareExternalFinalize();
|
318
323
|
//! Probe whatever we can, sink the rest into a thread-local HT
|
319
|
-
unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys,
|
320
|
-
|
324
|
+
unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state, DataChunk &payload,
|
325
|
+
ProbeSpill &probe_spill, ProbeSpillLocalAppendState &spill_state,
|
326
|
+
DataChunk &spill_chunk);
|
321
327
|
|
322
328
|
private:
|
323
329
|
//! First and last partition of the current probe round
|
@@ -103,6 +103,8 @@ public:
|
|
103
103
|
// JOIN operation
|
104
104
|
DUCKDB_API shared_ptr<Relation> Join(const shared_ptr<Relation> &other, const string &condition,
|
105
105
|
JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
|
106
|
+
shared_ptr<Relation> Join(const shared_ptr<Relation> &other, vector<unique_ptr<ParsedExpression>> condition,
|
107
|
+
JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
|
106
108
|
|
107
109
|
// CROSS PRODUCT operation
|
108
110
|
DUCKDB_API shared_ptr<Relation> CrossProduct(const shared_ptr<Relation> &other,
|
@@ -121,6 +123,8 @@ public:
|
|
121
123
|
DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates);
|
122
124
|
DUCKDB_API shared_ptr<Relation> Aggregate(const string &aggregate_list, const string &group_list);
|
123
125
|
DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates, const vector<string> &groups);
|
126
|
+
DUCKDB_API shared_ptr<Relation> Aggregate(vector<unique_ptr<ParsedExpression>> expressions,
|
127
|
+
const string &group_list);
|
124
128
|
|
125
129
|
// ALIAS
|
126
130
|
DUCKDB_API shared_ptr<Relation> Alias(const string &alias);
|
@@ -177,7 +177,7 @@ void DBConfig::SetOptionByName(const string &name, const Value &value) {
|
|
177
177
|
void DBConfig::SetOption(DatabaseInstance *db, const ConfigurationOption &option, const Value &value) {
|
178
178
|
lock_guard<mutex> l(config_lock);
|
179
179
|
if (!option.set_global) {
|
180
|
-
throw
|
180
|
+
throw InvalidInputException("Could not set option \"%s\" as a global option", option.name);
|
181
181
|
}
|
182
182
|
D_ASSERT(option.reset_global);
|
183
183
|
Value input = value.DefaultCastAs(option.parameter_type);
|
@@ -130,7 +130,12 @@ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other, const str
|
|
130
130
|
JoinRefType ref_type) {
|
131
131
|
auto expression_list = Parser::ParseExpressionList(condition, context.GetContext()->GetParserOptions());
|
132
132
|
D_ASSERT(!expression_list.empty());
|
133
|
+
return Join(other, std::move(expression_list), type, ref_type);
|
134
|
+
}
|
133
135
|
|
136
|
+
shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other,
|
137
|
+
vector<unique_ptr<ParsedExpression>> expression_list, JoinType type,
|
138
|
+
JoinRefType ref_type) {
|
134
139
|
if (expression_list.size() > 1 || expression_list[0]->type == ExpressionType::COLUMN_REF) {
|
135
140
|
// multiple columns or single column ref: the condition is a USING list
|
136
141
|
vector<string> using_columns;
|
@@ -197,6 +202,11 @@ shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates, const
|
|
197
202
|
return this->Aggregate(aggregate_list, group_list);
|
198
203
|
}
|
199
204
|
|
205
|
+
shared_ptr<Relation> Relation::Aggregate(vector<unique_ptr<ParsedExpression>> expressions, const string &group_list) {
|
206
|
+
auto groups = Parser::ParseGroupByList(group_list, context.GetContext()->GetParserOptions());
|
207
|
+
return make_shared<AggregateRelation>(shared_from_this(), std::move(expressions), std::move(groups));
|
208
|
+
}
|
209
|
+
|
200
210
|
string Relation::GetAlias() {
|
201
211
|
return "relation";
|
202
212
|
}
|
@@ -70,9 +70,6 @@ unique_ptr<Expression> DatePartSimplificationRule::Apply(LogicalOperator &op, ve
|
|
70
70
|
case DatePartSpecifier::DOY:
|
71
71
|
new_function_name = "dayofyear";
|
72
72
|
break;
|
73
|
-
case DatePartSpecifier::EPOCH:
|
74
|
-
new_function_name = "epoch";
|
75
|
-
break;
|
76
73
|
case DatePartSpecifier::MICROSECONDS:
|
77
74
|
new_function_name = "microsecond";
|
78
75
|
break;
|
@@ -51,6 +51,7 @@ void LogicalComparisonJoin::ExtractJoinConditions(
|
|
51
51
|
unique_ptr<LogicalOperator> &right_child, const unordered_set<idx_t> &left_bindings,
|
52
52
|
const unordered_set<idx_t> &right_bindings, vector<unique_ptr<Expression>> &expressions,
|
53
53
|
vector<JoinCondition> &conditions, vector<unique_ptr<Expression>> &arbitrary_expressions) {
|
54
|
+
|
54
55
|
for (auto &expr : expressions) {
|
55
56
|
auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings);
|
56
57
|
if (total_side != JoinSide::BOTH) {
|
@@ -77,10 +78,17 @@ void LogicalComparisonJoin::ExtractJoinConditions(
|
|
77
78
|
continue;
|
78
79
|
}
|
79
80
|
}
|
80
|
-
} else if (
|
81
|
-
|
82
|
-
expr->type == ExpressionType::
|
83
|
-
expr->type == ExpressionType::
|
81
|
+
} else if (expr->type == ExpressionType::COMPARE_EQUAL || expr->type == ExpressionType::COMPARE_NOTEQUAL ||
|
82
|
+
expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
|
83
|
+
expr->type == ExpressionType::COMPARE_LESSTHAN ||
|
84
|
+
expr->type == ExpressionType::COMPARE_GREATERTHAN ||
|
85
|
+
expr->type == ExpressionType::COMPARE_LESSTHANOREQUALTO ||
|
86
|
+
expr->type == ExpressionType::COMPARE_GREATERTHANOREQUALTO ||
|
87
|
+
expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
|
88
|
+
expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM ||
|
89
|
+
expr->type == ExpressionType::COMPARE_DISTINCT_FROM)
|
90
|
+
|
91
|
+
{
|
84
92
|
// comparison, check if we can create a comparison JoinCondition
|
85
93
|
if (CreateJoinCondition(*expr, left_bindings, right_bindings, conditions)) {
|
86
94
|
// successfully created the join condition
|
@@ -449,11 +449,10 @@ void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) {
|
|
449
449
|
if (start_bit % 8 != 0) {
|
450
450
|
// handle sub-bit stuff (yay)
|
451
451
|
idx_t byte_pos = start_bit / 8;
|
452
|
-
idx_t bit_start = byte_pos * 8;
|
453
452
|
idx_t bit_end = (byte_pos + 1) * 8;
|
454
|
-
ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr()
|
453
|
+
ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr()));
|
455
454
|
for (idx_t i = start_bit; i < bit_end; i++) {
|
456
|
-
mask.SetValid(i
|
455
|
+
mask.SetValid(i);
|
457
456
|
}
|
458
457
|
revert_start = bit_end / 8;
|
459
458
|
} else {
|
@@ -832,6 +832,7 @@ void DataTable::RevertAppendInternal(idx_t start_row, idx_t count) {
|
|
832
832
|
void DataTable::RevertAppend(idx_t start_row, idx_t count) {
|
833
833
|
lock_guard<mutex> lock(append_lock);
|
834
834
|
|
835
|
+
// revert any appends to indexes
|
835
836
|
if (!info->indexes.Empty()) {
|
836
837
|
idx_t current_row_base = start_row;
|
837
838
|
row_t row_data[STANDARD_VECTOR_SIZE];
|
@@ -847,6 +848,15 @@ void DataTable::RevertAppend(idx_t start_row, idx_t count) {
|
|
847
848
|
current_row_base += chunk.size();
|
848
849
|
});
|
849
850
|
}
|
851
|
+
|
852
|
+
// we need to vacuum the indexes to remove any buffers that are now empty
|
853
|
+
// due to reverting the appends
|
854
|
+
info->indexes.Scan([&](Index &index) {
|
855
|
+
index.Vacuum();
|
856
|
+
return false;
|
857
|
+
});
|
858
|
+
|
859
|
+
// revert the data table append
|
850
860
|
RevertAppendInternal(start_row, count);
|
851
861
|
}
|
852
862
|
|
package/src/statement.cpp
CHANGED
@@ -93,11 +93,9 @@ Statement::Statement(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Statemen
|
|
93
93
|
int length = info.Length();
|
94
94
|
|
95
95
|
if (length <= 0 || !Connection::HasInstance(info[0])) {
|
96
|
-
Napi::TypeError::New(env, "Connection object expected")
|
97
|
-
return;
|
96
|
+
throw Napi::TypeError::New(env, "Connection object expected");
|
98
97
|
} else if (length <= 1 || !info[1].IsString()) {
|
99
|
-
Napi::TypeError::New(env, "SQL query expected")
|
100
|
-
return;
|
98
|
+
throw Napi::TypeError::New(env, "SQL query expected");
|
101
99
|
}
|
102
100
|
|
103
101
|
connection_ref = Napi::ObjectWrap<Connection>::Unwrap(info[0].As<Napi::Object>());
|
@@ -2,6 +2,7 @@ import * as sqlite3 from '..';
|
|
2
2
|
import * as assert from 'assert';
|
3
3
|
import {DuckDbError, RowData} from "..";
|
4
4
|
import {Worker} from 'worker_threads';
|
5
|
+
import {expect} from 'chai';
|
5
6
|
|
6
7
|
describe('error handling', function() {
|
7
8
|
var db: sqlite3.Database;
|
@@ -163,4 +164,9 @@ describe('error handling', function() {
|
|
163
164
|
await run_worker(); // first should always succeed
|
164
165
|
await run_worker(); // second fails without thread safety
|
165
166
|
})
|
167
|
+
|
168
|
+
it("shouldn't crash on an exception", () => {
|
169
|
+
expect(() => new sqlite3.Database(':memory:', {file_search_path: '/'})).to.throw('Could not set option "file_search_path" as a global option');
|
170
|
+
});
|
166
171
|
});
|
172
|
+
|
@@ -1,359 +0,0 @@
|
|
1
|
-
//===--------------------------------------------------------------------===//
|
2
|
-
// row_match.cpp
|
3
|
-
// Description: This file contains the implementation of the match operators
|
4
|
-
//===--------------------------------------------------------------------===//
|
5
|
-
|
6
|
-
#include "duckdb/common/exception.hpp"
|
7
|
-
#include "duckdb/common/operator/comparison_operators.hpp"
|
8
|
-
#include "duckdb/common/operator/constant_operators.hpp"
|
9
|
-
#include "duckdb/common/row_operations/row_operations.hpp"
|
10
|
-
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
11
|
-
|
12
|
-
namespace duckdb {
|
13
|
-
|
14
|
-
using ValidityBytes = RowLayout::ValidityBytes;
|
15
|
-
using Predicates = RowOperations::Predicates;
|
16
|
-
|
17
|
-
template <typename OP>
|
18
|
-
static idx_t SelectComparison(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
19
|
-
SelectionVector *true_sel, SelectionVector *false_sel) {
|
20
|
-
throw NotImplementedException("Unsupported nested comparison operand for RowOperations::Match");
|
21
|
-
}
|
22
|
-
|
23
|
-
template <>
|
24
|
-
idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
25
|
-
SelectionVector *true_sel, SelectionVector *false_sel) {
|
26
|
-
return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel);
|
27
|
-
}
|
28
|
-
|
29
|
-
template <>
|
30
|
-
idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
31
|
-
SelectionVector *true_sel, SelectionVector *false_sel) {
|
32
|
-
return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel);
|
33
|
-
}
|
34
|
-
|
35
|
-
template <>
|
36
|
-
idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
37
|
-
SelectionVector *true_sel, SelectionVector *false_sel) {
|
38
|
-
return VectorOperations::DistinctGreaterThan(left, right, &sel, count, true_sel, false_sel);
|
39
|
-
}
|
40
|
-
|
41
|
-
template <>
|
42
|
-
idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
43
|
-
SelectionVector *true_sel, SelectionVector *false_sel) {
|
44
|
-
return VectorOperations::DistinctGreaterThanEquals(left, right, &sel, count, true_sel, false_sel);
|
45
|
-
}
|
46
|
-
|
47
|
-
template <>
|
48
|
-
idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
49
|
-
SelectionVector *true_sel, SelectionVector *false_sel) {
|
50
|
-
return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
|
51
|
-
}
|
52
|
-
|
53
|
-
template <>
|
54
|
-
idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
|
55
|
-
SelectionVector *true_sel, SelectionVector *false_sel) {
|
56
|
-
return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
|
57
|
-
}
|
58
|
-
|
59
|
-
template <class T, class OP, bool NO_MATCH_SEL>
|
60
|
-
static void TemplatedMatchType(UnifiedVectorFormat &col, Vector &rows, SelectionVector &sel, idx_t &count,
|
61
|
-
idx_t col_offset, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
|
62
|
-
// Precompute row_mask indexes
|
63
|
-
idx_t entry_idx;
|
64
|
-
idx_t idx_in_entry;
|
65
|
-
ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry);
|
66
|
-
|
67
|
-
auto data = UnifiedVectorFormat::GetData<T>(col);
|
68
|
-
auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
|
69
|
-
idx_t match_count = 0;
|
70
|
-
if (!col.validity.AllValid()) {
|
71
|
-
for (idx_t i = 0; i < count; i++) {
|
72
|
-
auto idx = sel.get_index(i);
|
73
|
-
|
74
|
-
auto row = ptrs[idx];
|
75
|
-
ValidityBytes row_mask(row);
|
76
|
-
auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
|
77
|
-
|
78
|
-
auto col_idx = col.sel->get_index(idx);
|
79
|
-
if (!col.validity.RowIsValid(col_idx)) {
|
80
|
-
if (isnull) {
|
81
|
-
// match: move to next value to compare
|
82
|
-
sel.set_index(match_count++, idx);
|
83
|
-
} else {
|
84
|
-
if (NO_MATCH_SEL) {
|
85
|
-
no_match->set_index(no_match_count++, idx);
|
86
|
-
}
|
87
|
-
}
|
88
|
-
} else {
|
89
|
-
auto value = Load<T>(row + col_offset);
|
90
|
-
if (!isnull && OP::template Operation<T>(data[col_idx], value)) {
|
91
|
-
sel.set_index(match_count++, idx);
|
92
|
-
} else {
|
93
|
-
if (NO_MATCH_SEL) {
|
94
|
-
no_match->set_index(no_match_count++, idx);
|
95
|
-
}
|
96
|
-
}
|
97
|
-
}
|
98
|
-
}
|
99
|
-
} else {
|
100
|
-
for (idx_t i = 0; i < count; i++) {
|
101
|
-
auto idx = sel.get_index(i);
|
102
|
-
|
103
|
-
auto row = ptrs[idx];
|
104
|
-
ValidityBytes row_mask(row);
|
105
|
-
auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
|
106
|
-
|
107
|
-
auto col_idx = col.sel->get_index(idx);
|
108
|
-
auto value = Load<T>(row + col_offset);
|
109
|
-
if (!isnull && OP::template Operation<T>(data[col_idx], value)) {
|
110
|
-
sel.set_index(match_count++, idx);
|
111
|
-
} else {
|
112
|
-
if (NO_MATCH_SEL) {
|
113
|
-
no_match->set_index(no_match_count++, idx);
|
114
|
-
}
|
115
|
-
}
|
116
|
-
}
|
117
|
-
}
|
118
|
-
count = match_count;
|
119
|
-
}
|
120
|
-
|
121
|
-
//! Forward declaration for recursion
|
122
|
-
template <class OP, bool NO_MATCH_SEL>
|
123
|
-
static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
|
124
|
-
SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
|
125
|
-
idx_t &no_match_count, const idx_t original_count);
|
126
|
-
|
127
|
-
template <class OP, bool NO_MATCH_SEL>
|
128
|
-
static void TemplatedMatchStruct(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
|
129
|
-
SelectionVector &sel, idx_t &count, const idx_t col_no, SelectionVector *no_match,
|
130
|
-
idx_t &no_match_count, const idx_t original_count) {
|
131
|
-
// Precompute row_mask indexes
|
132
|
-
idx_t entry_idx;
|
133
|
-
idx_t idx_in_entry;
|
134
|
-
ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry);
|
135
|
-
|
136
|
-
// Work our way through the validity of the whole struct
|
137
|
-
auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
|
138
|
-
idx_t match_count = 0;
|
139
|
-
if (!col.validity.AllValid()) {
|
140
|
-
for (idx_t i = 0; i < count; i++) {
|
141
|
-
auto idx = sel.get_index(i);
|
142
|
-
|
143
|
-
auto row = ptrs[idx];
|
144
|
-
ValidityBytes row_mask(row);
|
145
|
-
auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
|
146
|
-
|
147
|
-
auto col_idx = col.sel->get_index(idx);
|
148
|
-
if (!col.validity.RowIsValid(col_idx)) {
|
149
|
-
if (isnull) {
|
150
|
-
// match: move to next value to compare
|
151
|
-
sel.set_index(match_count++, idx);
|
152
|
-
} else {
|
153
|
-
if (NO_MATCH_SEL) {
|
154
|
-
no_match->set_index(no_match_count++, idx);
|
155
|
-
}
|
156
|
-
}
|
157
|
-
} else {
|
158
|
-
if (!isnull) {
|
159
|
-
sel.set_index(match_count++, idx);
|
160
|
-
} else {
|
161
|
-
if (NO_MATCH_SEL) {
|
162
|
-
no_match->set_index(no_match_count++, idx);
|
163
|
-
}
|
164
|
-
}
|
165
|
-
}
|
166
|
-
}
|
167
|
-
} else {
|
168
|
-
for (idx_t i = 0; i < count; i++) {
|
169
|
-
auto idx = sel.get_index(i);
|
170
|
-
|
171
|
-
auto row = ptrs[idx];
|
172
|
-
ValidityBytes row_mask(row);
|
173
|
-
auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
|
174
|
-
|
175
|
-
if (!isnull) {
|
176
|
-
sel.set_index(match_count++, idx);
|
177
|
-
} else {
|
178
|
-
if (NO_MATCH_SEL) {
|
179
|
-
no_match->set_index(no_match_count++, idx);
|
180
|
-
}
|
181
|
-
}
|
182
|
-
}
|
183
|
-
}
|
184
|
-
count = match_count;
|
185
|
-
|
186
|
-
// Now we construct row pointers to the structs
|
187
|
-
Vector struct_rows(LogicalTypeId::POINTER);
|
188
|
-
auto struct_ptrs = FlatVector::GetData<data_ptr_t>(struct_rows);
|
189
|
-
|
190
|
-
const auto col_offset = layout.GetOffsets()[col_no];
|
191
|
-
for (idx_t i = 0; i < count; i++) {
|
192
|
-
auto idx = sel.get_index(i);
|
193
|
-
auto row = ptrs[idx];
|
194
|
-
struct_ptrs[idx] = row + col_offset;
|
195
|
-
}
|
196
|
-
|
197
|
-
// Get the struct layout, child columns, then recurse
|
198
|
-
const auto &struct_layout = layout.GetStructLayout(col_no);
|
199
|
-
auto &struct_entries = StructVector::GetEntries(vec);
|
200
|
-
D_ASSERT(struct_layout.ColumnCount() == struct_entries.size());
|
201
|
-
for (idx_t struct_col_no = 0; struct_col_no < struct_layout.ColumnCount(); struct_col_no++) {
|
202
|
-
auto &struct_vec = *struct_entries[struct_col_no];
|
203
|
-
UnifiedVectorFormat struct_col;
|
204
|
-
struct_vec.ToUnifiedFormat(original_count, struct_col);
|
205
|
-
TemplatedMatchOp<OP, NO_MATCH_SEL>(struct_vec, struct_col, struct_layout, struct_rows, sel, count,
|
206
|
-
struct_col_no, no_match, no_match_count, original_count);
|
207
|
-
}
|
208
|
-
}
|
209
|
-
|
210
|
-
template <class OP, bool NO_MATCH_SEL>
|
211
|
-
static void TemplatedMatchList(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count,
|
212
|
-
const TupleDataLayout &layout, const idx_t col_no, SelectionVector *no_match,
|
213
|
-
idx_t &no_match_count) {
|
214
|
-
// Gather a dense Vector containing the column values being matched
|
215
|
-
Vector key(col.GetType());
|
216
|
-
const auto gather_function = TupleDataCollection::GetGatherFunction(col.GetType());
|
217
|
-
gather_function.function(layout, rows, col_no, sel, count, key, *FlatVector::IncrementalSelectionVector(), key,
|
218
|
-
gather_function.child_functions);
|
219
|
-
|
220
|
-
// Densify the input column
|
221
|
-
Vector sliced(col, sel, count);
|
222
|
-
|
223
|
-
if (NO_MATCH_SEL) {
|
224
|
-
SelectionVector no_match_sel_offset(no_match->data() + no_match_count);
|
225
|
-
auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset);
|
226
|
-
no_match_count += count - match_count;
|
227
|
-
count = match_count;
|
228
|
-
} else {
|
229
|
-
count = SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr);
|
230
|
-
}
|
231
|
-
}
|
232
|
-
|
233
|
-
template <class OP, bool NO_MATCH_SEL>
|
234
|
-
static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
|
235
|
-
SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
|
236
|
-
idx_t &no_match_count, const idx_t original_count) {
|
237
|
-
if (count == 0) {
|
238
|
-
return;
|
239
|
-
}
|
240
|
-
auto col_offset = layout.GetOffsets()[col_no];
|
241
|
-
switch (layout.GetTypes()[col_no].InternalType()) {
|
242
|
-
case PhysicalType::BOOL:
|
243
|
-
case PhysicalType::INT8:
|
244
|
-
TemplatedMatchType<int8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
245
|
-
no_match_count);
|
246
|
-
break;
|
247
|
-
case PhysicalType::INT16:
|
248
|
-
TemplatedMatchType<int16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
249
|
-
no_match_count);
|
250
|
-
break;
|
251
|
-
case PhysicalType::INT32:
|
252
|
-
TemplatedMatchType<int32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
253
|
-
no_match_count);
|
254
|
-
break;
|
255
|
-
case PhysicalType::INT64:
|
256
|
-
TemplatedMatchType<int64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
257
|
-
no_match_count);
|
258
|
-
break;
|
259
|
-
case PhysicalType::UINT8:
|
260
|
-
TemplatedMatchType<uint8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
261
|
-
no_match_count);
|
262
|
-
break;
|
263
|
-
case PhysicalType::UINT16:
|
264
|
-
TemplatedMatchType<uint16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
265
|
-
no_match_count);
|
266
|
-
break;
|
267
|
-
case PhysicalType::UINT32:
|
268
|
-
TemplatedMatchType<uint32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
269
|
-
no_match_count);
|
270
|
-
break;
|
271
|
-
case PhysicalType::UINT64:
|
272
|
-
TemplatedMatchType<uint64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
273
|
-
no_match_count);
|
274
|
-
break;
|
275
|
-
case PhysicalType::INT128:
|
276
|
-
TemplatedMatchType<hugeint_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
277
|
-
no_match_count);
|
278
|
-
break;
|
279
|
-
case PhysicalType::FLOAT:
|
280
|
-
TemplatedMatchType<float, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
281
|
-
no_match_count);
|
282
|
-
break;
|
283
|
-
case PhysicalType::DOUBLE:
|
284
|
-
TemplatedMatchType<double, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
285
|
-
no_match_count);
|
286
|
-
break;
|
287
|
-
case PhysicalType::INTERVAL:
|
288
|
-
TemplatedMatchType<interval_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
289
|
-
no_match_count);
|
290
|
-
break;
|
291
|
-
case PhysicalType::VARCHAR:
|
292
|
-
TemplatedMatchType<string_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
|
293
|
-
no_match_count);
|
294
|
-
break;
|
295
|
-
case PhysicalType::STRUCT:
|
296
|
-
TemplatedMatchStruct<OP, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
|
297
|
-
original_count);
|
298
|
-
break;
|
299
|
-
case PhysicalType::LIST:
|
300
|
-
TemplatedMatchList<OP, NO_MATCH_SEL>(vec, rows, sel, count, layout, col_no, no_match, no_match_count);
|
301
|
-
break;
|
302
|
-
default:
|
303
|
-
throw InternalException("Unsupported column type for RowOperations::Match");
|
304
|
-
}
|
305
|
-
}
|
306
|
-
|
307
|
-
template <bool NO_MATCH_SEL>
|
308
|
-
static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
|
309
|
-
Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t &count,
|
310
|
-
SelectionVector *no_match, idx_t &no_match_count) {
|
311
|
-
for (idx_t col_no = 0; col_no < predicates.size(); ++col_no) {
|
312
|
-
auto &vec = columns.data[col_no];
|
313
|
-
auto &col = col_data[col_no];
|
314
|
-
switch (predicates[col_no]) {
|
315
|
-
case ExpressionType::COMPARE_EQUAL:
|
316
|
-
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
317
|
-
case ExpressionType::COMPARE_DISTINCT_FROM:
|
318
|
-
TemplatedMatchOp<Equals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
|
319
|
-
count);
|
320
|
-
break;
|
321
|
-
case ExpressionType::COMPARE_NOTEQUAL:
|
322
|
-
TemplatedMatchOp<NotEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
|
323
|
-
no_match_count, count);
|
324
|
-
break;
|
325
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
326
|
-
TemplatedMatchOp<GreaterThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
|
327
|
-
no_match_count, count);
|
328
|
-
break;
|
329
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
330
|
-
TemplatedMatchOp<GreaterThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
|
331
|
-
no_match_count, count);
|
332
|
-
break;
|
333
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
334
|
-
TemplatedMatchOp<LessThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
|
335
|
-
no_match_count, count);
|
336
|
-
break;
|
337
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
338
|
-
TemplatedMatchOp<LessThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
|
339
|
-
no_match_count, count);
|
340
|
-
break;
|
341
|
-
default:
|
342
|
-
throw InternalException("Unsupported comparison type for RowOperations::Match");
|
343
|
-
}
|
344
|
-
}
|
345
|
-
}
|
346
|
-
|
347
|
-
idx_t RowOperations::Match(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
|
348
|
-
Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t count,
|
349
|
-
SelectionVector *no_match, idx_t &no_match_count) {
|
350
|
-
if (no_match) {
|
351
|
-
TemplatedMatch<true>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count);
|
352
|
-
} else {
|
353
|
-
TemplatedMatch<false>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count);
|
354
|
-
}
|
355
|
-
|
356
|
-
return count;
|
357
|
-
}
|
358
|
-
|
359
|
-
} // namespace duckdb
|