duckdb 0.8.2-dev4711.0 → 0.8.2-dev4871.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/binding.gyp +0 -1
  2. package/binding.gyp.in +0 -1
  3. package/package.json +1 -1
  4. package/src/connection.cpp +10 -23
  5. package/src/data_chunk.cpp +1 -3
  6. package/src/database.cpp +4 -9
  7. package/src/duckdb/extension/icu/icu-datepart.cpp +12 -8
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -6
  9. package/src/duckdb/extension/json/json_functions.cpp +4 -6
  10. package/src/duckdb/src/common/enum_util.cpp +10 -5
  11. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  12. package/src/duckdb/src/common/row_operations/row_matcher.cpp +408 -0
  13. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -3
  14. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +28 -17
  15. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -43
  16. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +1 -0
  17. package/src/duckdb/src/core_functions/function_list.cpp +1 -1
  18. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +86 -50
  19. package/src/duckdb/src/core_functions/scalar/generic/hash.cpp +3 -0
  20. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +8 -5
  21. package/src/duckdb/src/execution/aggregate_hashtable.cpp +5 -4
  22. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +13 -0
  23. package/src/duckdb/src/execution/join_hashtable.cpp +71 -59
  24. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -4
  25. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -2
  26. package/src/duckdb/src/execution/reservoir_sample.cpp +3 -9
  27. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -2
  28. package/src/duckdb/src/function/function_binder.cpp +10 -9
  29. package/src/duckdb/src/function/scalar/string/like.cpp +0 -3
  30. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  31. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +11 -3
  32. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +63 -0
  33. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -2
  34. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +2 -2
  35. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  36. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +1 -1
  37. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +4 -0
  38. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +14 -8
  39. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -0
  40. package/src/duckdb/src/main/config.cpp +1 -1
  41. package/src/duckdb/src/main/relation.cpp +10 -0
  42. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +0 -3
  43. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +12 -4
  44. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -3
  45. package/src/duckdb/src/storage/data_table.cpp +10 -0
  46. package/src/duckdb/ub_src_common_row_operations.cpp +1 -1
  47. package/src/statement.cpp +2 -4
  48. package/test/database_fail.test.ts +6 -0
  49. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
@@ -45,6 +45,7 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
45
45
  // Append hash column to the end and initialise the row layout
46
46
  group_types_p.emplace_back(LogicalType::HASH);
47
47
  layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
48
+
48
49
  hash_offset = layout.GetOffsets()[layout.ColumnCount() - 1];
49
50
 
50
51
  // Partitioned data and pointer table
@@ -52,7 +53,8 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
52
53
  Resize(initial_capacity);
53
54
 
54
55
  // Predicates
55
- predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_EQUAL);
56
+ predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_NOT_DISTINCT_FROM);
57
+ row_matcher.Initialize(true, layout, predicates);
56
58
  }
57
59
 
58
60
  void GroupedAggregateHashTable::InitializePartitionedData() {
@@ -414,9 +416,8 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
414
416
  }
415
417
 
416
418
  // Perform group comparisons
417
- RowOperations::Match(state.group_chunk, state.group_data.get(), layout, addresses_v, predicates,
418
- state.group_compare_vector, need_compare_count, &state.no_match_vector,
419
- no_match_count);
419
+ row_matcher.Match(state.group_chunk, chunk_state.vector_data, state.group_compare_vector,
420
+ need_compare_count, layout, addresses_v, &state.no_match_vector, no_match_count);
420
421
  }
421
422
 
422
423
  // Linear probing: each of the entries that do not match move to the next entry in the HT
@@ -173,6 +173,19 @@ bool FixedSizeAllocator::InitializeVacuum() {
173
173
  return false;
174
174
  }
175
175
 
176
+ // remove all empty buffers
177
+ auto buffer_it = buffers.begin();
178
+ while (buffer_it != buffers.end()) {
179
+ if (!buffer_it->second.segment_count) {
180
+ buffers_with_free_space.erase(buffer_it->first);
181
+ buffer_it->second.Destroy();
182
+ buffer_it = buffers.erase(buffer_it);
183
+ } else {
184
+ buffer_it++;
185
+ }
186
+ }
187
+
188
+ // determine if a vacuum is necessary
176
189
  multimap<idx_t, idx_t> temporary_vacuum_buffers;
177
190
  D_ASSERT(vacuum_buffers.empty());
178
191
  idx_t available_segments_in_memory = 0;
@@ -19,15 +19,15 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
19
19
  : buffer_manager(buffer_manager_p), conditions(conditions_p), build_types(std::move(btypes)), entry_size(0),
20
20
  tuple_size(0), vfound(Value::BOOLEAN(false)), join_type(type_p), finalized(false), has_null(false),
21
21
  external(false), radix_bits(4), partition_start(0), partition_end(0) {
22
+
22
23
  for (auto &condition : conditions) {
23
24
  D_ASSERT(condition.left->return_type == condition.right->return_type);
24
25
  auto type = condition.left->return_type;
25
26
  if (condition.comparison == ExpressionType::COMPARE_EQUAL ||
26
- condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM ||
27
- condition.comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
28
- // all equality conditions should be at the front
29
- // all other conditions at the back
30
- // this assert checks that
27
+ condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
28
+
29
+ // ensure that all equality conditions are at the front,
30
+ // and that all other conditions are at the back
31
31
  D_ASSERT(equality_types.size() == condition_types.size());
32
32
  equality_types.push_back(type);
33
33
  }
@@ -51,6 +51,8 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
51
51
  }
52
52
  layout_types.emplace_back(LogicalType::HASH);
53
53
  layout.Initialize(layout_types, false);
54
+ row_matcher.Initialize(false, layout, predicates);
55
+ row_matcher_no_match_sel.Initialize(true, layout, predicates);
54
56
 
55
57
  const auto &offsets = layout.GetOffsets();
56
58
  tuple_size = offsets[condition_types.size() + build_types.size()];
@@ -142,30 +144,6 @@ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector
142
144
  return result_count;
143
145
  }
144
146
 
145
- idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
146
- const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
147
- key_data = keys.ToUnifiedFormat();
148
-
149
- // figure out which keys are NULL, and create a selection vector out of them
150
- current_sel = FlatVector::IncrementalSelectionVector();
151
- idx_t added_count = keys.size();
152
- if (build_side && IsRightOuterJoin(join_type)) {
153
- // in case of a right or full outer join, we cannot remove NULL keys from the build side
154
- return added_count;
155
- }
156
- for (idx_t i = 0; i < keys.ColumnCount(); i++) {
157
- if (!null_values_are_equal[i]) {
158
- if (key_data[i].validity.AllValid()) {
159
- continue;
160
- }
161
- added_count = FilterNullValues(key_data[i], *current_sel, added_count, sel);
162
- // null values are NOT equal for this column, filter them out
163
- current_sel = &sel;
164
- }
165
- }
166
- return added_count;
167
- }
168
-
169
147
  void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChunk &keys, DataChunk &payload) {
170
148
  D_ASSERT(!finalized);
171
149
  D_ASSERT(keys.size() == payload.size());
@@ -194,23 +172,6 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
194
172
  info.correlated_counts->AddChunk(info.group_chunk, info.correlated_payload, AggregateType::NON_DISTINCT);
195
173
  }
196
174
 
197
- // prepare the keys for processing
198
- unsafe_unique_array<UnifiedVectorFormat> key_data;
199
- const SelectionVector *current_sel;
200
- SelectionVector sel(STANDARD_VECTOR_SIZE);
201
- idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
202
- if (added_count < keys.size()) {
203
- has_null = true;
204
- }
205
- if (added_count == 0) {
206
- return;
207
- }
208
-
209
- // hash the keys and obtain an entry in the list
210
- // note that we only hash the keys used in the equality comparison
211
- Vector hash_values(LogicalType::HASH);
212
- Hash(keys, *current_sel, added_count, hash_values);
213
-
214
175
  // build a chunk to append to the data collection [keys, payload, (optional "found" boolean), hash]
215
176
  DataChunk source_chunk;
216
177
  source_chunk.InitializeEmpty(layout.GetTypes());
@@ -228,13 +189,58 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
228
189
  source_chunk.data[col_offset].Reference(vfound);
229
190
  col_offset++;
230
191
  }
192
+ Vector hash_values(LogicalType::HASH);
231
193
  source_chunk.data[col_offset].Reference(hash_values);
232
194
  source_chunk.SetCardinality(keys);
233
195
 
196
+ // ToUnifiedFormat the source chunk
197
+ TupleDataCollection::ToUnifiedFormat(append_state.chunk_state, source_chunk);
198
+
199
+ // prepare the keys for processing
200
+ const SelectionVector *current_sel;
201
+ SelectionVector sel(STANDARD_VECTOR_SIZE);
202
+ idx_t added_count = PrepareKeys(keys, append_state.chunk_state.vector_data, current_sel, sel, true);
234
203
  if (added_count < keys.size()) {
235
- source_chunk.Slice(*current_sel, added_count);
204
+ has_null = true;
205
+ }
206
+ if (added_count == 0) {
207
+ return;
236
208
  }
237
- sink_collection->Append(append_state, source_chunk);
209
+
210
+ // hash the keys and obtain an entry in the list
211
+ // note that we only hash the keys used in the equality comparison
212
+ Hash(keys, *current_sel, added_count, hash_values);
213
+
214
+ // Re-reference and ToUnifiedFormat the hash column after computing it
215
+ source_chunk.data[col_offset].Reference(hash_values);
216
+ hash_values.ToUnifiedFormat(source_chunk.size(), append_state.chunk_state.vector_data.back().unified);
217
+
218
+ // We already called TupleDataCollection::ToUnifiedFormat, so we can AppendUnified here
219
+ sink_collection->AppendUnified(append_state, source_chunk, *current_sel, added_count);
220
+ }
221
+
222
+ idx_t JoinHashTable::PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data,
223
+ const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
224
+ // figure out which keys are NULL, and create a selection vector out of them
225
+ current_sel = FlatVector::IncrementalSelectionVector();
226
+ idx_t added_count = keys.size();
227
+ if (build_side && IsRightOuterJoin(join_type)) {
228
+ // in case of a right or full outer join, we cannot remove NULL keys from the build side
229
+ return added_count;
230
+ }
231
+
232
+ for (idx_t col_idx = 0; col_idx < keys.ColumnCount(); col_idx++) {
233
+ if (!null_values_are_equal[col_idx]) {
234
+ auto &col_key_data = vector_data[col_idx].unified;
235
+ if (col_key_data.validity.AllValid()) {
236
+ continue;
237
+ }
238
+ added_count = FilterNullValues(col_key_data, *current_sel, added_count, sel);
239
+ // null values are NOT equal for this column, filter them out
240
+ current_sel = &sel;
241
+ }
242
+ }
243
+ return added_count;
238
244
  }
239
245
 
240
246
  template <bool PARALLEL>
@@ -322,12 +328,13 @@ void JoinHashTable::Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool para
322
328
  } while (iterator.Next());
323
329
  }
324
330
 
325
- unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys, const SelectionVector *&current_sel) {
331
+ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
332
+ const SelectionVector *&current_sel) {
326
333
  D_ASSERT(Count() > 0); // should be handled before
327
334
  D_ASSERT(finalized);
328
335
 
329
336
  // set up the scan structure
330
- auto ss = make_uniq<ScanStructure>(*this);
337
+ auto ss = make_uniq<ScanStructure>(*this, key_state);
331
338
 
332
339
  if (join_type != JoinType::INNER) {
333
340
  ss->found_match = make_unsafe_uniq_array<bool>(STANDARD_VECTOR_SIZE);
@@ -335,13 +342,15 @@ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys
335
342
  }
336
343
 
337
344
  // first prepare the keys for probing
338
- ss->count = PrepareKeys(keys, ss->key_data, current_sel, ss->sel_vector, false);
345
+ TupleDataCollection::ToUnifiedFormat(key_state, keys);
346
+ ss->count = PrepareKeys(keys, key_state.vector_data, current_sel, ss->sel_vector, false);
339
347
  return ss;
340
348
  }
341
349
 
342
- unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, Vector *precomputed_hashes) {
350
+ unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, TupleDataChunkState &key_state,
351
+ Vector *precomputed_hashes) {
343
352
  const SelectionVector *current_sel;
344
- auto ss = InitializeScanStructure(keys, current_sel);
353
+ auto ss = InitializeScanStructure(keys, key_state, current_sel);
345
354
  if (ss->count == 0) {
346
355
  return ss;
347
356
  }
@@ -363,8 +372,9 @@ unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, Vector *precompu
363
372
  return ss;
364
373
  }
365
374
 
366
- ScanStructure::ScanStructure(JoinHashTable &ht)
367
- : pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(ht), finished(false) {
375
+ ScanStructure::ScanStructure(JoinHashTable &ht_p, TupleDataChunkState &key_state_p)
376
+ : key_state(key_state_p), pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(ht_p),
377
+ finished(false) {
368
378
  }
369
379
 
370
380
  void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
@@ -404,8 +414,9 @@ idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_s
404
414
  }
405
415
  idx_t no_match_count = 0;
406
416
 
407
- return RowOperations::Match(keys, key_data.get(), ht.layout, pointers, ht.predicates, match_sel, this->count,
408
- no_match_sel, no_match_count);
417
+ auto &matcher = no_match_sel ? ht.row_matcher_no_match_sel : ht.row_matcher;
418
+ return matcher.Match(keys, key_state.vector_data, match_sel, this->count, ht.layout, pointers, no_match_sel,
419
+ no_match_count);
409
420
  }
410
421
 
411
422
  idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector) {
@@ -990,7 +1001,8 @@ static void CreateSpillChunk(DataChunk &spill_chunk, DataChunk &keys, DataChunk
990
1001
  spill_chunk.data[spill_col_idx].Reference(hashes);
991
1002
  }
992
1003
 
993
- unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, DataChunk &payload, ProbeSpill &probe_spill,
1004
+ unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state,
1005
+ DataChunk &payload, ProbeSpill &probe_spill,
994
1006
  ProbeSpillLocalAppendState &spill_state,
995
1007
  DataChunk &spill_chunk) {
996
1008
  // hash all the keys
@@ -1019,7 +1031,7 @@ unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, DataChun
1019
1031
  payload.Slice(true_sel, true_count);
1020
1032
 
1021
1033
  const SelectionVector *current_sel;
1022
- auto ss = InitializeScanStructure(keys, current_sel);
1034
+ auto ss = InitializeScanStructure(keys, key_state, current_sel);
1023
1035
  if (ss->count == 0) {
1024
1036
  return ss;
1025
1037
  }
@@ -420,6 +420,8 @@ public:
420
420
  }
421
421
 
422
422
  DataChunk join_keys;
423
+ TupleDataChunkState join_key_state;
424
+
423
425
  ExpressionExecutor probe_executor;
424
426
  unique_ptr<JoinHashTable::ScanStructure> scan_structure;
425
427
  unique_ptr<OperatorState> perfect_hash_join_state;
@@ -446,6 +448,7 @@ unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &c
446
448
  for (auto &cond : conditions) {
447
449
  state->probe_executor.AddExpression(*cond.left);
448
450
  }
451
+ TupleDataCollection::InitializeChunkState(state->join_key_state, condition_types);
449
452
  }
450
453
  if (sink.external) {
451
454
  state->spill_chunk.Initialize(allocator, sink.probe_types);
@@ -502,10 +505,10 @@ OperatorResultType PhysicalHashJoin::ExecuteInternal(ExecutionContext &context,
502
505
 
503
506
  // perform the actual probe
504
507
  if (sink.external) {
505
- state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys, input, *sink.probe_spill,
506
- state.spill_state, state.spill_chunk);
508
+ state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys, state.join_key_state, input,
509
+ *sink.probe_spill, state.spill_state, state.spill_chunk);
507
510
  } else {
508
- state.scan_structure = sink.hash_table->Probe(state.join_keys);
511
+ state.scan_structure = sink.hash_table->Probe(state.join_keys, state.join_key_state);
509
512
  }
510
513
  state.scan_structure->Next(state.join_keys, input, chunk);
511
514
  return OperatorResultType::HAVE_MORE_OUTPUT;
@@ -605,6 +608,7 @@ public:
605
608
  DataChunk probe_chunk;
606
609
  DataChunk join_keys;
607
610
  DataChunk payload;
611
+ TupleDataChunkState join_key_state;
608
612
  //! Column indices to easily reference the join keys/payload columns in probe_chunk
609
613
  vector<idx_t> join_key_indices;
610
614
  vector<idx_t> payload_indices;
@@ -782,6 +786,7 @@ HashJoinLocalSourceState::HashJoinLocalSourceState(const PhysicalHashJoin &op, A
782
786
  probe_chunk.Initialize(allocator, sink.probe_types);
783
787
  join_keys.Initialize(allocator, op.condition_types);
784
788
  payload.Initialize(allocator, op.children[0]->types);
789
+ TupleDataCollection::InitializeChunkState(join_key_state, op.condition_types);
785
790
 
786
791
  // Store the indices of the columns to reference them easily
787
792
  idx_t col_idx = 0;
@@ -871,7 +876,7 @@ void HashJoinLocalSourceState::ExternalProbe(HashJoinGlobalSinkState &sink, Hash
871
876
  }
872
877
 
873
878
  // Perform the probe
874
- scan_structure = sink.hash_table->Probe(join_keys, precomputed_hashes);
879
+ scan_structure = sink.hash_table->Probe(join_keys, join_key_state, precomputed_hashes);
875
880
  scan_structure->Next(join_keys, payload, chunk);
876
881
  }
877
882
 
@@ -254,7 +254,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
254
254
  }
255
255
 
256
256
  bool has_equality = false;
257
- // bool has_inequality = false;
258
257
  size_t has_range = 0;
259
258
  for (size_t c = 0; c < op.conditions.size(); ++c) {
260
259
  auto &cond = op.conditions[c];
@@ -271,7 +270,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
271
270
  break;
272
271
  case ExpressionType::COMPARE_NOTEQUAL:
273
272
  case ExpressionType::COMPARE_DISTINCT_FROM:
274
- // has_inequality = true;
275
273
  break;
276
274
  default:
277
275
  throw NotImplementedException("Unimplemented comparison join");
@@ -107,25 +107,19 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
107
107
  if (append_to_next_sample > 0) {
108
108
  // we need to also add to the next sample
109
109
  DataChunk new_chunk;
110
- new_chunk.Initialize(allocator, input.GetTypes());
111
- SelectionVector sel(append_to_current_sample_count);
112
- for (idx_t r = 0; r < append_to_current_sample_count; r++) {
113
- sel.set_index(r, r);
114
- }
115
- new_chunk.Slice(sel, append_to_current_sample_count);
110
+ new_chunk.InitializeEmpty(input.GetTypes());
111
+ new_chunk.Slice(input, *FlatVector::IncrementalSelectionVector(), append_to_current_sample_count);
116
112
  new_chunk.Flatten();
117
-
118
113
  current_sample->AddToReservoir(new_chunk);
119
114
  } else {
120
115
  input.Flatten();
121
-
122
116
  input.SetCardinality(append_to_current_sample_count);
123
117
  current_sample->AddToReservoir(input);
124
118
  }
125
119
  }
126
120
  if (append_to_next_sample > 0) {
127
121
  // slice the input for the remainder
128
- SelectionVector sel(STANDARD_VECTOR_SIZE);
122
+ SelectionVector sel(append_to_next_sample);
129
123
  for (idx_t i = 0; i < append_to_next_sample; i++) {
130
124
  sel.set_index(i, append_to_current_sample_count + i);
131
125
  }
@@ -20,10 +20,16 @@ inline static void SkipWhitespace(const char *buf, idx_t &pos, idx_t len) {
20
20
  static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
21
21
  char quote = buf[pos];
22
22
  pos++;
23
+ bool escaped = false;
23
24
 
24
25
  while (pos < len) {
25
- if (buf[pos] == quote) {
26
- return true;
26
+ if (buf[pos] == '\\') {
27
+ escaped = !escaped;
28
+ } else {
29
+ if (buf[pos] == quote && !escaped) {
30
+ return true;
31
+ }
32
+ escaped = false;
27
33
  }
28
34
  pos++;
29
35
  }
@@ -1,16 +1,16 @@
1
1
  #include "duckdb/function/function_binder.hpp"
2
- #include "duckdb/common/limits.hpp"
3
2
 
4
- #include "duckdb/planner/expression/bound_cast_expression.hpp"
5
- #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
6
- #include "duckdb/planner/expression/bound_function_expression.hpp"
7
- #include "duckdb/planner/expression/bound_constant_expression.hpp"
3
+ #include "duckdb/catalog/catalog.hpp"
8
4
  #include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
9
-
10
- #include "duckdb/planner/expression_binder.hpp"
5
+ #include "duckdb/common/limits.hpp"
6
+ #include "duckdb/execution/expression_executor.hpp"
11
7
  #include "duckdb/function/aggregate_function.hpp"
12
8
  #include "duckdb/function/cast_rules.hpp"
13
- #include "duckdb/catalog/catalog.hpp"
9
+ #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
10
+ #include "duckdb/planner/expression/bound_cast_expression.hpp"
11
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
12
+ #include "duckdb/planner/expression/bound_function_expression.hpp"
13
+ #include "duckdb/planner/expression_binder.hpp"
14
14
 
15
15
  namespace duckdb {
16
16
 
@@ -268,7 +268,8 @@ unique_ptr<Expression> FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE
268
268
 
269
269
  if (bound_function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) {
270
270
  for (auto &child : children) {
271
- if (child->return_type == LogicalTypeId::SQLNULL) {
271
+ if (child->return_type == LogicalTypeId::SQLNULL ||
272
+ (child->IsFoldable() && ExpressionExecutor::EvaluateScalar(context, *child).IsNull())) {
272
273
  return make_uniq<BoundConstantExpression>(Value(LogicalType::SQLNULL));
273
274
  }
274
275
  }
@@ -196,9 +196,6 @@ static unique_ptr<FunctionData> LikeBindFunction(ClientContext &context, ScalarF
196
196
  D_ASSERT(arguments.size() == 2 || arguments.size() == 3);
197
197
  if (arguments[1]->IsFoldable()) {
198
198
  Value pattern_str = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
199
- if (pattern_str.IsNull()) {
200
- return nullptr;
201
- }
202
199
  return LikeMatcher::CreateLikeMatcher(pattern_str.ToString());
203
200
  }
204
201
  return nullptr;
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev4711"
2
+ #define DUCKDB_VERSION "0.8.2-dev4871"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "474a0bd683"
5
+ #define DUCKDB_SOURCE_ID "5a29c99891"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -25,7 +25,6 @@ enum class DatePartSpecifier : uint8_t {
25
25
  SECOND,
26
26
  MINUTE,
27
27
  HOUR,
28
- EPOCH,
29
28
  DOW,
30
29
  ISODOW,
31
30
  WEEK,
@@ -39,11 +38,20 @@ enum class DatePartSpecifier : uint8_t {
39
38
  TIMEZONE_MINUTE,
40
39
 
41
40
  // DOUBLE values
42
- JULIAN_DAY
41
+ EPOCH,
42
+ JULIAN_DAY,
43
+
44
+ // Invalid
45
+ INVALID,
46
+
47
+ // Type ranges
48
+ BEGIN_BIGINT = YEAR,
49
+ BEGIN_DOUBLE = EPOCH,
50
+ BEGIN_INVALID = INVALID,
43
51
  };
44
52
 
45
53
  inline bool IsBigintDatepart(DatePartSpecifier part_code) {
46
- return size_t(part_code) < size_t(DatePartSpecifier::JULIAN_DAY);
54
+ return size_t(part_code) < size_t(DatePartSpecifier::BEGIN_DOUBLE);
47
55
  }
48
56
 
49
57
  DUCKDB_API bool TryGetDatePartSpecifier(const string &specifier, DatePartSpecifier &result);
@@ -0,0 +1,63 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/row_operations/row_matcher.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/enums/expression_type.hpp"
12
+ #include "duckdb/common/types.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ class Vector;
17
+ class DataChunk;
18
+ class TupleDataLayout;
19
+ struct TupleDataVectorFormat;
20
+ struct SelectionVector;
21
+ struct MatchFunction;
22
+
23
+ typedef idx_t (*match_function_t)(Vector &lhs_vector, const TupleDataVectorFormat &lhs_format, SelectionVector &sel,
24
+ const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
25
+ const idx_t col_idx, const vector<MatchFunction> &child_functions,
26
+ SelectionVector *no_match_sel, idx_t &no_match_count);
27
+
28
+ struct MatchFunction {
29
+ match_function_t function;
30
+ vector<MatchFunction> child_functions;
31
+ };
32
+
33
+ struct RowMatcher {
34
+ public:
35
+ using Predicates = vector<ExpressionType>;
36
+
37
+ //! Initializes the RowMatcher, filling match_functions using layout and predicates
38
+ void Initialize(const bool no_match_sel, const TupleDataLayout &layout, const Predicates &predicates);
39
+ //! Given a DataChunk on the LHS, on which we've called TupleDataCollection::ToUnifiedFormat,
40
+ //! we match it with rows on the RHS, according to the given layout and locations.
41
+ //! Initially, 'sel' has 'count' entries which point to what needs to be compared.
42
+ //! After matching is done, this returns how many matching entries there are, which 'sel' is modified to point to
43
+ idx_t Match(DataChunk &lhs, const vector<TupleDataVectorFormat> &lhs_formats, SelectionVector &sel, idx_t count,
44
+ const TupleDataLayout &rhs_layout, Vector &rhs_row_locations, SelectionVector *no_match_sel,
45
+ idx_t &no_match_count);
46
+
47
+ private:
48
+ //! Gets the templated match function for a given column
49
+ MatchFunction GetMatchFunction(const bool no_match_sel, const LogicalType &type, const ExpressionType predicate);
50
+ template <bool NO_MATCH_SEL>
51
+ MatchFunction GetMatchFunction(const LogicalType &type, const ExpressionType predicate);
52
+ template <bool NO_MATCH_SEL, class T>
53
+ MatchFunction GetMatchFunction(const ExpressionType predicate);
54
+ template <bool NO_MATCH_SEL>
55
+ MatchFunction GetStructMatchFunction(const LogicalType &type, const ExpressionType predicate);
56
+ template <bool NO_MATCH_SEL>
57
+ MatchFunction GetListMatchFunction(const ExpressionType predicate);
58
+
59
+ private:
60
+ vector<MatchFunction> match_functions;
61
+ };
62
+
63
+ } // namespace duckdb
@@ -21,7 +21,7 @@ struct RowOperationsState;
21
21
 
22
22
  typedef void (*tuple_data_scatter_function_t)(const Vector &source, const TupleDataVectorFormat &source_format,
23
23
  const SelectionVector &append_sel, const idx_t append_count,
24
- const TupleDataLayout &layout, Vector &row_locations,
24
+ const TupleDataLayout &layout, const Vector &row_locations,
25
25
  Vector &heap_locations, const idx_t col_idx,
26
26
  const UnifiedVectorFormat &list_format,
27
27
  const vector<TupleDataScatterFunction> &child_functions);
@@ -84,7 +84,11 @@ public:
84
84
  TupleDataPinProperties = TupleDataPinProperties::UNPIN_AFTER_DONE);
85
85
  //! Initializes the Chunk state of an Append state
86
86
  //! - Useful for optimizing many appends made to the same tuple data collection
87
- void InitializeAppend(TupleDataChunkState &chunk_state, vector<column_t> column_ids = {});
87
+ void InitializeChunkState(TupleDataChunkState &chunk_state, vector<column_t> column_ids = {});
88
+ //! Initializes the Chunk state of an Append state
89
+ //! - Useful for optimizing many appends made to the same tuple data collection
90
+ static void InitializeChunkState(TupleDataChunkState &chunk_state, const vector<LogicalType> &types,
91
+ vector<column_t> column_ids = {});
88
92
  //! Append a DataChunk directly to this TupleDataCollection - calls InitializeAppend and Append internally
89
93
  void Append(DataChunk &new_chunk, const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(),
90
94
  idx_t append_count = DConstants::INVALID_INDEX);
@@ -42,8 +42,8 @@ struct TupleDataVectorFormat {
42
42
  const SelectionVector *original_sel;
43
43
  SelectionVector original_owned_sel;
44
44
 
45
- UnifiedVectorFormat data;
46
- vector<TupleDataVectorFormat> child_formats;
45
+ UnifiedVectorFormat unified;
46
+ vector<TupleDataVectorFormat> children;
47
47
  unique_ptr<CombinedListData> combined_list_data;
48
48
  };
49
49
 
@@ -148,6 +148,9 @@ public:
148
148
  if (!validity_mask) {
149
149
  return ValidityBuffer::MAX_ENTRY;
150
150
  }
151
+ return GetValidityEntryUnsafe(entry_idx);
152
+ }
153
+ inline V &GetValidityEntryUnsafe(idx_t entry_idx) const {
151
154
  return validity_mask[entry_idx];
152
155
  }
153
156
  static inline bool AllValid(V entry) {
@@ -156,7 +159,7 @@ public:
156
159
  static inline bool NoneValid(V entry) {
157
160
  return entry == 0;
158
161
  }
159
- static inline bool RowIsValid(V entry, idx_t idx_in_entry) {
162
+ static inline bool RowIsValid(const V &entry, const idx_t &idx_in_entry) {
160
163
  return entry & (V(1) << V(idx_in_entry));
161
164
  }
162
165
  static inline void GetEntryIndex(idx_t row_idx, idx_t &entry_idx, idx_t &idx_in_entry) {
@@ -285,7 +285,7 @@ struct RepeatFun {
285
285
  static constexpr const char *Description = "Repeats the string count number of times";
286
286
  static constexpr const char *Example = "repeat('A', 5)";
287
287
 
288
- static ScalarFunction GetFunction();
288
+ static ScalarFunctionSet GetFunctions();
289
289
  };
290
290
 
291
291
  struct ReplaceFun {
@@ -8,6 +8,7 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/common/row_operations/row_matcher.hpp"
11
12
  #include "duckdb/common/types/row/partitioned_tuple_data.hpp"
12
13
  #include "duckdb/execution/base_aggregate_hashtable.hpp"
13
14
  #include "duckdb/storage/arena_allocator.hpp"
@@ -143,6 +144,9 @@ public:
143
144
  void UnpinData();
144
145
 
145
146
  private:
147
+ //! Efficiently matches groups
148
+ RowMatcher row_matcher;
149
+
146
150
  //! Append state
147
151
  struct AggregateHTAppendState {
148
152
  AggregateHTAppendState();