duckdb 0.8.2-dev4711.0 → 0.8.2-dev4871.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/binding.gyp +0 -1
  2. package/binding.gyp.in +0 -1
  3. package/package.json +1 -1
  4. package/src/connection.cpp +10 -23
  5. package/src/data_chunk.cpp +1 -3
  6. package/src/database.cpp +4 -9
  7. package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
  9. package/src/duckdb/extension/json/json_functions.cpp +4 -6
  10. package/src/duckdb/src/common/enum_util.cpp +10 -5
  11. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  12. package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
  13. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
  14. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
  15. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
  16. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
  17. package/src/duckdb/src/core_functions/function_list.cpp +1 -1
  18. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
  19. package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
  20. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
  21. package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
  22. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
  23. package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
  24. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
  25. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
  26. package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
  27. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
  28. package/src/duckdb/src/function/function_binder.cpp +10 -9
  29. package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
  30. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  31. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
  32. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
  33. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
  34. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
  35. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  36. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
  37. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
  38. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
  39. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
  40. package/src/duckdb/src/main/config.cpp +1 -1
  41. package/src/duckdb/src/main/relation.cpp +10 -0
  42. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
  43. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
  44. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
  45. package/src/duckdb/src/storage/data_table.cpp +10 -0
  46. package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
  47. package/src/statement.cpp +2 -4
  48. package/test/database_fail.test.ts +6 -0
  49. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -65,7 +65,7 @@ public:
65
65
  //! returned by the JoinHashTable::Scan function and can be used to resume a
66
66
  //! probe.
67
67
  struct ScanStructure {
68
- unsafe_unique_array<UnifiedVectorFormat> key_data;
68
+ TupleDataChunkState &key_state;
69
69
  Vector pointers;
70
70
  idx_t count;
71
71
  SelectionVector sel_vector;
@@ -74,7 +74,7 @@ public:
74
74
  JoinHashTable &ht;
75
75
  bool finished;
76
76
 
77
- explicit ScanStructure(JoinHashTable &ht);
77
+ explicit ScanStructure(JoinHashTable &ht, TupleDataChunkState &key_state);
78
78
  //! Get the next batch of data from the scan structure
79
79
  void Next(DataChunk &keys, DataChunk &left, DataChunk &result);
80
80
 
@@ -130,7 +130,8 @@ public:
130
130
  //! ever called.
131
131
  void Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool parallel);
132
132
  //! Probe the HT with the given input chunk, resulting in the given result
133
- unique_ptr<ScanStructure> Probe(DataChunk &keys, Vector *precomputed_hashes = nullptr);
133
+ unique_ptr<ScanStructure> Probe(DataChunk &keys, TupleDataChunkState &key_state,
134
+ Vector *precomputed_hashes = nullptr);
134
135
  //! Scan the HT to construct the full outer join result
135
136
  void ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result);
136
137
 
@@ -166,6 +167,9 @@ public:
166
167
  vector<ExpressionType> predicates;
167
168
  //! Data column layout
168
169
  TupleDataLayout layout;
170
+ //! Efficiently matches rows
171
+ RowMatcher row_matcher;
172
+ RowMatcher row_matcher_no_match_sel;
169
173
  //! The size of an entry as stored in the HashTable
170
174
  idx_t entry_size;
171
175
  //! The total tuple size
@@ -201,7 +205,8 @@ public:
201
205
  } correlated_mark_join_info;
202
206
 
203
207
  private:
204
- unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, const SelectionVector *&current_sel);
208
+ unique_ptr<ScanStructure> InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
209
+ const SelectionVector *&current_sel);
205
210
  void Hash(DataChunk &keys, const SelectionVector &sel, idx_t count, Vector &hashes);
206
211
 
207
212
  //! Apply a bitmask to the hashes
@@ -212,8 +217,8 @@ private:
212
217
  //! Insert the given set of locations into the HT with the given set of hashes
213
218
  void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
214
219
 
215
- idx_t PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
216
- const SelectionVector *&current_sel, SelectionVector &sel, bool build_side);
220
+ idx_t PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data, const SelectionVector *&current_sel,
221
+ SelectionVector &sel, bool build_side);
217
222
 
218
223
  //! Lock for combining data_collection when merging HTs
219
224
  mutex data_lock;
@@ -316,8 +321,9 @@ public:
316
321
  //! Build HT for the next partitioned probe round
317
322
  bool PrepareExternalFinalize();
318
323
  //! Probe whatever we can, sink the rest into a thread-local HT
319
- unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, DataChunk &payload, ProbeSpill &probe_spill,
320
- ProbeSpillLocalAppendState &spill_state, DataChunk &spill_chunk);
324
+ unique_ptr<ScanStructure> ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state, DataChunk &payload,
325
+ ProbeSpill &probe_spill, ProbeSpillLocalAppendState &spill_state,
326
+ DataChunk &spill_chunk);
321
327
 
322
328
  private:
323
329
  //! First and last partition of the current probe round
@@ -103,6 +103,8 @@ public:
103
103
  // JOIN operation
104
104
  DUCKDB_API shared_ptr<Relation> Join(const shared_ptr<Relation> &other, const string &condition,
105
105
  JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
106
+ shared_ptr<Relation> Join(const shared_ptr<Relation> &other, vector<unique_ptr<ParsedExpression>> condition,
107
+ JoinType type = JoinType::INNER, JoinRefType ref_type = JoinRefType::REGULAR);
106
108
 
107
109
  // CROSS PRODUCT operation
108
110
  DUCKDB_API shared_ptr<Relation> CrossProduct(const shared_ptr<Relation> &other,
@@ -121,6 +123,8 @@ public:
121
123
  DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates);
122
124
  DUCKDB_API shared_ptr<Relation> Aggregate(const string &aggregate_list, const string &group_list);
123
125
  DUCKDB_API shared_ptr<Relation> Aggregate(const vector<string> &aggregates, const vector<string> &groups);
126
+ DUCKDB_API shared_ptr<Relation> Aggregate(vector<unique_ptr<ParsedExpression>> expressions,
127
+ const string &group_list);
124
128
 
125
129
  // ALIAS
126
130
  DUCKDB_API shared_ptr<Relation> Alias(const string &alias);
@@ -177,7 +177,7 @@ void DBConfig::SetOptionByName(const string &name, const Value &value) {
177
177
  void DBConfig::SetOption(DatabaseInstance *db, const ConfigurationOption &option, const Value &value) {
178
178
  lock_guard<mutex> l(config_lock);
179
179
  if (!option.set_global) {
180
- throw InternalException("Could not set option \"%s\" as a global option", option.name);
180
+ throw InvalidInputException("Could not set option \"%s\" as a global option", option.name);
181
181
  }
182
182
  D_ASSERT(option.reset_global);
183
183
  Value input = value.DefaultCastAs(option.parameter_type);
@@ -130,7 +130,12 @@ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other, const str
130
130
  JoinRefType ref_type) {
131
131
  auto expression_list = Parser::ParseExpressionList(condition, context.GetContext()->GetParserOptions());
132
132
  D_ASSERT(!expression_list.empty());
133
+ return Join(other, std::move(expression_list), type, ref_type);
134
+ }
133
135
 
136
+ shared_ptr<Relation> Relation::Join(const shared_ptr<Relation> &other,
137
+ vector<unique_ptr<ParsedExpression>> expression_list, JoinType type,
138
+ JoinRefType ref_type) {
134
139
  if (expression_list.size() > 1 || expression_list[0]->type == ExpressionType::COLUMN_REF) {
135
140
  // multiple columns or single column ref: the condition is a USING list
136
141
  vector<string> using_columns;
@@ -197,6 +202,11 @@ shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates, const
197
202
  return this->Aggregate(aggregate_list, group_list);
198
203
  }
199
204
 
205
+ shared_ptr<Relation> Relation::Aggregate(vector<unique_ptr<ParsedExpression>> expressions, const string &group_list) {
206
+ auto groups = Parser::ParseGroupByList(group_list, context.GetContext()->GetParserOptions());
207
+ return make_shared<AggregateRelation>(shared_from_this(), std::move(expressions), std::move(groups));
208
+ }
209
+
200
210
  string Relation::GetAlias() {
201
211
  return "relation";
202
212
  }
@@ -70,9 +70,6 @@ unique_ptr<Expression> DatePartSimplificationRule::Apply(LogicalOperator &op, ve
70
70
  case DatePartSpecifier::DOY:
71
71
  new_function_name = "dayofyear";
72
72
  break;
73
- case DatePartSpecifier::EPOCH:
74
- new_function_name = "epoch";
75
- break;
76
73
  case DatePartSpecifier::MICROSECONDS:
77
74
  new_function_name = "microsecond";
78
75
  break;
@@ -51,6 +51,7 @@ void LogicalComparisonJoin::ExtractJoinConditions(
51
51
  unique_ptr<LogicalOperator> &right_child, const unordered_set<idx_t> &left_bindings,
52
52
  const unordered_set<idx_t> &right_bindings, vector<unique_ptr<Expression>> &expressions,
53
53
  vector<JoinCondition> &conditions, vector<unique_ptr<Expression>> &arbitrary_expressions) {
54
+
54
55
  for (auto &expr : expressions) {
55
56
  auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings);
56
57
  if (total_side != JoinSide::BOTH) {
@@ -77,10 +78,17 @@ void LogicalComparisonJoin::ExtractJoinConditions(
77
78
  continue;
78
79
  }
79
80
  }
80
- } else if ((expr->type >= ExpressionType::COMPARE_EQUAL &&
81
- expr->type <= ExpressionType::COMPARE_GREATERTHANOREQUALTO) ||
82
- expr->type == ExpressionType::COMPARE_DISTINCT_FROM ||
83
- expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
81
+ } else if (expr->type == ExpressionType::COMPARE_EQUAL || expr->type == ExpressionType::COMPARE_NOTEQUAL ||
82
+ expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
83
+ expr->type == ExpressionType::COMPARE_LESSTHAN ||
84
+ expr->type == ExpressionType::COMPARE_GREATERTHAN ||
85
+ expr->type == ExpressionType::COMPARE_LESSTHANOREQUALTO ||
86
+ expr->type == ExpressionType::COMPARE_GREATERTHANOREQUALTO ||
87
+ expr->type == ExpressionType::COMPARE_BOUNDARY_START ||
88
+ expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM ||
89
+ expr->type == ExpressionType::COMPARE_DISTINCT_FROM)
90
+
91
+ {
84
92
  // comparison, check if we can create a comparison JoinCondition
85
93
  if (CreateJoinCondition(*expr, left_bindings, right_bindings, conditions)) {
86
94
  // successfully created the join condition
@@ -449,11 +449,10 @@ void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) {
449
449
  if (start_bit % 8 != 0) {
450
450
  // handle sub-bit stuff (yay)
451
451
  idx_t byte_pos = start_bit / 8;
452
- idx_t bit_start = byte_pos * 8;
453
452
  idx_t bit_end = (byte_pos + 1) * 8;
454
- ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr() + byte_pos));
453
+ ValidityMask mask(reinterpret_cast<validity_t *>(handle.Ptr()));
455
454
  for (idx_t i = start_bit; i < bit_end; i++) {
456
- mask.SetValid(i - bit_start);
455
+ mask.SetValid(i);
457
456
  }
458
457
  revert_start = bit_end / 8;
459
458
  } else {
@@ -832,6 +832,7 @@ void DataTable::RevertAppendInternal(idx_t start_row, idx_t count) {
832
832
  void DataTable::RevertAppend(idx_t start_row, idx_t count) {
833
833
  lock_guard<mutex> lock(append_lock);
834
834
 
835
+ // revert any appends to indexes
835
836
  if (!info->indexes.Empty()) {
836
837
  idx_t current_row_base = start_row;
837
838
  row_t row_data[STANDARD_VECTOR_SIZE];
@@ -847,6 +848,15 @@ void DataTable::RevertAppend(idx_t start_row, idx_t count) {
847
848
  current_row_base += chunk.size();
848
849
  });
849
850
  }
851
+
852
+ // we need to vacuum the indexes to remove any buffers that are now empty
853
+ // due to reverting the appends
854
+ info->indexes.Scan([&](Index &index) {
855
+ index.Vacuum();
856
+ return false;
857
+ });
858
+
859
+ // revert the data table append
850
860
  RevertAppendInternal(start_row, count);
851
861
  }
852
862
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  #include "src/common/row_operations/row_gather.cpp"
6
6
 
7
- #include "src/common/row_operations/row_match.cpp"
7
+ #include "src/common/row_operations/row_matcher.cpp"
8
8
 
9
9
  #include "src/common/row_operations/row_external.cpp"
10
10
 
package/src/statement.cpp CHANGED
@@ -93,11 +93,9 @@ Statement::Statement(const Napi::CallbackInfo &info) : Napi::ObjectWrap<Statemen
93
93
  int length = info.Length();
94
94
 
95
95
  if (length <= 0 || !Connection::HasInstance(info[0])) {
96
- Napi::TypeError::New(env, "Connection object expected").ThrowAsJavaScriptException();
97
- return;
96
+ throw Napi::TypeError::New(env, "Connection object expected");
98
97
  } else if (length <= 1 || !info[1].IsString()) {
99
- Napi::TypeError::New(env, "SQL query expected").ThrowAsJavaScriptException();
100
- return;
98
+ throw Napi::TypeError::New(env, "SQL query expected");
101
99
  }
102
100
 
103
101
  connection_ref = Napi::ObjectWrap<Connection>::Unwrap(info[0].As<Napi::Object>());
@@ -2,6 +2,7 @@ import * as sqlite3 from '..';
2
2
  import * as assert from 'assert';
3
3
  import {DuckDbError, RowData} from "..";
4
4
  import {Worker} from 'worker_threads';
5
+ import {expect} from 'chai';
5
6
 
6
7
  describe('error handling', function() {
7
8
  var db: sqlite3.Database;
@@ -163,4 +164,9 @@ describe('error handling', function() {
163
164
  await run_worker(); // first should always succeed
164
165
  await run_worker(); // second fails without thread safety
165
166
  })
167
+
168
+ it("shouldn't crash on an exception", () => {
169
+ expect(() => new sqlite3.Database(':memory:', {file_search_path: '/'})).to.throw('Could not set option "file_search_path" as a global option');
170
+ });
166
171
  });
172
+
@@ -1,359 +0,0 @@
1
- //===--------------------------------------------------------------------===//
2
- // row_match.cpp
3
- // Description: This file contains the implementation of the match operators
4
- //===--------------------------------------------------------------------===//
5
-
6
- #include "duckdb/common/exception.hpp"
7
- #include "duckdb/common/operator/comparison_operators.hpp"
8
- #include "duckdb/common/operator/constant_operators.hpp"
9
- #include "duckdb/common/row_operations/row_operations.hpp"
10
- #include "duckdb/common/types/row/tuple_data_collection.hpp"
11
-
12
- namespace duckdb {
13
-
14
- using ValidityBytes = RowLayout::ValidityBytes;
15
- using Predicates = RowOperations::Predicates;
16
-
17
- template <typename OP>
18
- static idx_t SelectComparison(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
19
- SelectionVector *true_sel, SelectionVector *false_sel) {
20
- throw NotImplementedException("Unsupported nested comparison operand for RowOperations::Match");
21
- }
22
-
23
- template <>
24
- idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
25
- SelectionVector *true_sel, SelectionVector *false_sel) {
26
- return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel);
27
- }
28
-
29
- template <>
30
- idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
31
- SelectionVector *true_sel, SelectionVector *false_sel) {
32
- return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel);
33
- }
34
-
35
- template <>
36
- idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
37
- SelectionVector *true_sel, SelectionVector *false_sel) {
38
- return VectorOperations::DistinctGreaterThan(left, right, &sel, count, true_sel, false_sel);
39
- }
40
-
41
- template <>
42
- idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
43
- SelectionVector *true_sel, SelectionVector *false_sel) {
44
- return VectorOperations::DistinctGreaterThanEquals(left, right, &sel, count, true_sel, false_sel);
45
- }
46
-
47
- template <>
48
- idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
49
- SelectionVector *true_sel, SelectionVector *false_sel) {
50
- return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
51
- }
52
-
53
- template <>
54
- idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
55
- SelectionVector *true_sel, SelectionVector *false_sel) {
56
- return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
57
- }
58
-
59
- template <class T, class OP, bool NO_MATCH_SEL>
60
- static void TemplatedMatchType(UnifiedVectorFormat &col, Vector &rows, SelectionVector &sel, idx_t &count,
61
- idx_t col_offset, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
62
- // Precompute row_mask indexes
63
- idx_t entry_idx;
64
- idx_t idx_in_entry;
65
- ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry);
66
-
67
- auto data = UnifiedVectorFormat::GetData<T>(col);
68
- auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
69
- idx_t match_count = 0;
70
- if (!col.validity.AllValid()) {
71
- for (idx_t i = 0; i < count; i++) {
72
- auto idx = sel.get_index(i);
73
-
74
- auto row = ptrs[idx];
75
- ValidityBytes row_mask(row);
76
- auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
77
-
78
- auto col_idx = col.sel->get_index(idx);
79
- if (!col.validity.RowIsValid(col_idx)) {
80
- if (isnull) {
81
- // match: move to next value to compare
82
- sel.set_index(match_count++, idx);
83
- } else {
84
- if (NO_MATCH_SEL) {
85
- no_match->set_index(no_match_count++, idx);
86
- }
87
- }
88
- } else {
89
- auto value = Load<T>(row + col_offset);
90
- if (!isnull && OP::template Operation<T>(data[col_idx], value)) {
91
- sel.set_index(match_count++, idx);
92
- } else {
93
- if (NO_MATCH_SEL) {
94
- no_match->set_index(no_match_count++, idx);
95
- }
96
- }
97
- }
98
- }
99
- } else {
100
- for (idx_t i = 0; i < count; i++) {
101
- auto idx = sel.get_index(i);
102
-
103
- auto row = ptrs[idx];
104
- ValidityBytes row_mask(row);
105
- auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
106
-
107
- auto col_idx = col.sel->get_index(idx);
108
- auto value = Load<T>(row + col_offset);
109
- if (!isnull && OP::template Operation<T>(data[col_idx], value)) {
110
- sel.set_index(match_count++, idx);
111
- } else {
112
- if (NO_MATCH_SEL) {
113
- no_match->set_index(no_match_count++, idx);
114
- }
115
- }
116
- }
117
- }
118
- count = match_count;
119
- }
120
-
121
- //! Forward declaration for recursion
122
- template <class OP, bool NO_MATCH_SEL>
123
- static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
124
- SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
125
- idx_t &no_match_count, const idx_t original_count);
126
-
127
- template <class OP, bool NO_MATCH_SEL>
128
- static void TemplatedMatchStruct(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
129
- SelectionVector &sel, idx_t &count, const idx_t col_no, SelectionVector *no_match,
130
- idx_t &no_match_count, const idx_t original_count) {
131
- // Precompute row_mask indexes
132
- idx_t entry_idx;
133
- idx_t idx_in_entry;
134
- ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry);
135
-
136
- // Work our way through the validity of the whole struct
137
- auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
138
- idx_t match_count = 0;
139
- if (!col.validity.AllValid()) {
140
- for (idx_t i = 0; i < count; i++) {
141
- auto idx = sel.get_index(i);
142
-
143
- auto row = ptrs[idx];
144
- ValidityBytes row_mask(row);
145
- auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
146
-
147
- auto col_idx = col.sel->get_index(idx);
148
- if (!col.validity.RowIsValid(col_idx)) {
149
- if (isnull) {
150
- // match: move to next value to compare
151
- sel.set_index(match_count++, idx);
152
- } else {
153
- if (NO_MATCH_SEL) {
154
- no_match->set_index(no_match_count++, idx);
155
- }
156
- }
157
- } else {
158
- if (!isnull) {
159
- sel.set_index(match_count++, idx);
160
- } else {
161
- if (NO_MATCH_SEL) {
162
- no_match->set_index(no_match_count++, idx);
163
- }
164
- }
165
- }
166
- }
167
- } else {
168
- for (idx_t i = 0; i < count; i++) {
169
- auto idx = sel.get_index(i);
170
-
171
- auto row = ptrs[idx];
172
- ValidityBytes row_mask(row);
173
- auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
174
-
175
- if (!isnull) {
176
- sel.set_index(match_count++, idx);
177
- } else {
178
- if (NO_MATCH_SEL) {
179
- no_match->set_index(no_match_count++, idx);
180
- }
181
- }
182
- }
183
- }
184
- count = match_count;
185
-
186
- // Now we construct row pointers to the structs
187
- Vector struct_rows(LogicalTypeId::POINTER);
188
- auto struct_ptrs = FlatVector::GetData<data_ptr_t>(struct_rows);
189
-
190
- const auto col_offset = layout.GetOffsets()[col_no];
191
- for (idx_t i = 0; i < count; i++) {
192
- auto idx = sel.get_index(i);
193
- auto row = ptrs[idx];
194
- struct_ptrs[idx] = row + col_offset;
195
- }
196
-
197
- // Get the struct layout, child columns, then recurse
198
- const auto &struct_layout = layout.GetStructLayout(col_no);
199
- auto &struct_entries = StructVector::GetEntries(vec);
200
- D_ASSERT(struct_layout.ColumnCount() == struct_entries.size());
201
- for (idx_t struct_col_no = 0; struct_col_no < struct_layout.ColumnCount(); struct_col_no++) {
202
- auto &struct_vec = *struct_entries[struct_col_no];
203
- UnifiedVectorFormat struct_col;
204
- struct_vec.ToUnifiedFormat(original_count, struct_col);
205
- TemplatedMatchOp<OP, NO_MATCH_SEL>(struct_vec, struct_col, struct_layout, struct_rows, sel, count,
206
- struct_col_no, no_match, no_match_count, original_count);
207
- }
208
- }
209
-
210
- template <class OP, bool NO_MATCH_SEL>
211
- static void TemplatedMatchList(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count,
212
- const TupleDataLayout &layout, const idx_t col_no, SelectionVector *no_match,
213
- idx_t &no_match_count) {
214
- // Gather a dense Vector containing the column values being matched
215
- Vector key(col.GetType());
216
- const auto gather_function = TupleDataCollection::GetGatherFunction(col.GetType());
217
- gather_function.function(layout, rows, col_no, sel, count, key, *FlatVector::IncrementalSelectionVector(), key,
218
- gather_function.child_functions);
219
-
220
- // Densify the input column
221
- Vector sliced(col, sel, count);
222
-
223
- if (NO_MATCH_SEL) {
224
- SelectionVector no_match_sel_offset(no_match->data() + no_match_count);
225
- auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset);
226
- no_match_count += count - match_count;
227
- count = match_count;
228
- } else {
229
- count = SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr);
230
- }
231
- }
232
-
233
- template <class OP, bool NO_MATCH_SEL>
234
- static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
235
- SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
236
- idx_t &no_match_count, const idx_t original_count) {
237
- if (count == 0) {
238
- return;
239
- }
240
- auto col_offset = layout.GetOffsets()[col_no];
241
- switch (layout.GetTypes()[col_no].InternalType()) {
242
- case PhysicalType::BOOL:
243
- case PhysicalType::INT8:
244
- TemplatedMatchType<int8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
245
- no_match_count);
246
- break;
247
- case PhysicalType::INT16:
248
- TemplatedMatchType<int16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
249
- no_match_count);
250
- break;
251
- case PhysicalType::INT32:
252
- TemplatedMatchType<int32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
253
- no_match_count);
254
- break;
255
- case PhysicalType::INT64:
256
- TemplatedMatchType<int64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
257
- no_match_count);
258
- break;
259
- case PhysicalType::UINT8:
260
- TemplatedMatchType<uint8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
261
- no_match_count);
262
- break;
263
- case PhysicalType::UINT16:
264
- TemplatedMatchType<uint16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
265
- no_match_count);
266
- break;
267
- case PhysicalType::UINT32:
268
- TemplatedMatchType<uint32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
269
- no_match_count);
270
- break;
271
- case PhysicalType::UINT64:
272
- TemplatedMatchType<uint64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
273
- no_match_count);
274
- break;
275
- case PhysicalType::INT128:
276
- TemplatedMatchType<hugeint_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
277
- no_match_count);
278
- break;
279
- case PhysicalType::FLOAT:
280
- TemplatedMatchType<float, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
281
- no_match_count);
282
- break;
283
- case PhysicalType::DOUBLE:
284
- TemplatedMatchType<double, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
285
- no_match_count);
286
- break;
287
- case PhysicalType::INTERVAL:
288
- TemplatedMatchType<interval_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
289
- no_match_count);
290
- break;
291
- case PhysicalType::VARCHAR:
292
- TemplatedMatchType<string_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
293
- no_match_count);
294
- break;
295
- case PhysicalType::STRUCT:
296
- TemplatedMatchStruct<OP, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
297
- original_count);
298
- break;
299
- case PhysicalType::LIST:
300
- TemplatedMatchList<OP, NO_MATCH_SEL>(vec, rows, sel, count, layout, col_no, no_match, no_match_count);
301
- break;
302
- default:
303
- throw InternalException("Unsupported column type for RowOperations::Match");
304
- }
305
- }
306
-
307
- template <bool NO_MATCH_SEL>
308
- static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
309
- Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t &count,
310
- SelectionVector *no_match, idx_t &no_match_count) {
311
- for (idx_t col_no = 0; col_no < predicates.size(); ++col_no) {
312
- auto &vec = columns.data[col_no];
313
- auto &col = col_data[col_no];
314
- switch (predicates[col_no]) {
315
- case ExpressionType::COMPARE_EQUAL:
316
- case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
317
- case ExpressionType::COMPARE_DISTINCT_FROM:
318
- TemplatedMatchOp<Equals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
319
- count);
320
- break;
321
- case ExpressionType::COMPARE_NOTEQUAL:
322
- TemplatedMatchOp<NotEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
323
- no_match_count, count);
324
- break;
325
- case ExpressionType::COMPARE_GREATERTHAN:
326
- TemplatedMatchOp<GreaterThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
327
- no_match_count, count);
328
- break;
329
- case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
330
- TemplatedMatchOp<GreaterThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
331
- no_match_count, count);
332
- break;
333
- case ExpressionType::COMPARE_LESSTHAN:
334
- TemplatedMatchOp<LessThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
335
- no_match_count, count);
336
- break;
337
- case ExpressionType::COMPARE_LESSTHANOREQUALTO:
338
- TemplatedMatchOp<LessThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
339
- no_match_count, count);
340
- break;
341
- default:
342
- throw InternalException("Unsupported comparison type for RowOperations::Match");
343
- }
344
- }
345
- }
346
-
347
- idx_t RowOperations::Match(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
348
- Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t count,
349
- SelectionVector *no_match, idx_t &no_match_count) {
350
- if (no_match) {
351
- TemplatedMatch<true>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count);
352
- } else {
353
- TemplatedMatch<false>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count);
354
- }
355
-
356
- return count;
357
- }
358
-
359
- } // namespace duckdb