duckdb 0.3.4-dev91.0 → 0.3.5-dev116.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -29679,6 +29679,8 @@ public:
29679
29679
  //! Completes the cascaded merge sort round.
29680
29680
  //! Pass true if you wish to use the radix data for further comparisons.
29681
29681
  void CompleteMergeRound(bool keep_radix_data = false);
29682
+ //! Print the sorted data to the console.
29683
+ void Print();
29682
29684
 
29683
29685
  public:
29684
29686
  //! The lock for updating the order global state
@@ -31591,6 +31593,19 @@ void GlobalSortState::CompleteMergeRound(bool keep_radix_data) {
31591
31593
  sorted_blocks[0]->blob_sorting_data = nullptr;
31592
31594
  }
31593
31595
  }
31596
+ void GlobalSortState::Print() {
31597
+ PayloadScanner scanner(*this, false);
31598
+ DataChunk chunk;
31599
+ chunk.Initialize(scanner.GetPayloadTypes());
31600
+ for (;;) {
31601
+ scanner.Scan(chunk);
31602
+ const auto count = chunk.size();
31603
+ if (!count) {
31604
+ break;
31605
+ }
31606
+ chunk.Print();
31607
+ }
31608
+ }
31594
31609
 
31595
31610
  } // namespace duckdb
31596
31611
 
@@ -41290,7 +41305,7 @@ void Vector::Normalify(idx_t count) {
41290
41305
  break;
41291
41306
  case VectorType::DICTIONARY_VECTOR: {
41292
41307
  // create a new flat vector of this type
41293
- Vector other(GetType());
41308
+ Vector other(GetType(), count);
41294
41309
  // now copy the data of this vector to the other vector, removing the selection vector in the process
41295
41310
  VectorOperations::Copy(*this, other, count, 0, 0);
41296
41311
  // create a reference to the data in the other vector
@@ -46757,8 +46772,8 @@ static bool ListCastSwitch(Vector &source, Vector &result, idx_t count, string *
46757
46772
  }
46758
46773
 
46759
46774
  template <class SRC_TYPE, class RES_TYPE>
46760
- void FillEnum(Vector &source, Vector &result, idx_t count) {
46761
-
46775
+ bool FillEnum(Vector &source, Vector &result, idx_t count, string *error_message) {
46776
+ bool all_converted = true;
46762
46777
  result.SetVectorType(VectorType::FLAT_VECTOR);
46763
46778
 
46764
46779
  auto &str_vec = EnumType::GetValuesInsertOrder(source.GetType());
@@ -46786,25 +46801,29 @@ void FillEnum(Vector &source, Vector &result, idx_t count) {
46786
46801
  auto key = EnumType::GetPos(res_enum_type, str);
46787
46802
  if (key == -1) {
46788
46803
  // key doesn't exist on result enum
46789
- result_mask.SetInvalid(i);
46804
+ if (!error_message) {
46805
+ result_data[i] = HandleVectorCastError::Operation<RES_TYPE>(
46806
+ CastExceptionText<SRC_TYPE, RES_TYPE>(source_data[src_idx]), result_mask, i, error_message,
46807
+ all_converted);
46808
+ } else {
46809
+ result_mask.SetInvalid(i);
46810
+ }
46790
46811
  continue;
46791
46812
  }
46792
46813
  result_data[i] = key;
46793
46814
  }
46815
+ return all_converted;
46794
46816
  }
46795
46817
 
46796
46818
  template <class SRC_TYPE>
46797
- void FillEnumResultTemplate(Vector &source, Vector &result, idx_t count) {
46819
+ bool FillEnumResultTemplate(Vector &source, Vector &result, idx_t count, string *error_message) {
46798
46820
  switch (source.GetType().InternalType()) {
46799
46821
  case PhysicalType::UINT8:
46800
- FillEnum<SRC_TYPE, uint8_t>(source, result, count);
46801
- break;
46822
+ return FillEnum<SRC_TYPE, uint8_t>(source, result, count, error_message);
46802
46823
  case PhysicalType::UINT16:
46803
- FillEnum<SRC_TYPE, uint16_t>(source, result, count);
46804
- break;
46824
+ return FillEnum<SRC_TYPE, uint16_t>(source, result, count, error_message);
46805
46825
  case PhysicalType::UINT32:
46806
- FillEnum<SRC_TYPE, uint32_t>(source, result, count);
46807
- break;
46826
+ return FillEnum<SRC_TYPE, uint32_t>(source, result, count, error_message);
46808
46827
  default:
46809
46828
  throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
46810
46829
  }
@@ -46852,18 +46871,14 @@ static bool EnumCastSwitch(Vector &source, Vector &result, idx_t count, string *
46852
46871
  // This means they are both ENUMs, but of different types.
46853
46872
  switch (enum_physical_type) {
46854
46873
  case PhysicalType::UINT8:
46855
- FillEnumResultTemplate<uint8_t>(source, result, count);
46856
- break;
46874
+ return FillEnumResultTemplate<uint8_t>(source, result, count, error_message);
46857
46875
  case PhysicalType::UINT16:
46858
- FillEnumResultTemplate<uint16_t>(source, result, count);
46859
- break;
46876
+ return FillEnumResultTemplate<uint16_t>(source, result, count, error_message);
46860
46877
  case PhysicalType::UINT32:
46861
- FillEnumResultTemplate<uint32_t>(source, result, count);
46862
- break;
46878
+ return FillEnumResultTemplate<uint32_t>(source, result, count, error_message);
46863
46879
  default:
46864
46880
  throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
46865
46881
  }
46866
- break;
46867
46882
  }
46868
46883
  case LogicalTypeId::JSON:
46869
46884
  case LogicalTypeId::VARCHAR: {
@@ -47105,7 +47120,20 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
47105
47120
  if (smask.IsMaskSet()) {
47106
47121
  for (idx_t i = 0; i < copy_count; i++) {
47107
47122
  auto idx = sel->get_index(source_offset + i);
47108
- tmask.Set(target_offset + i, smask.RowIsValid(idx));
47123
+
47124
+ if (smask.RowIsValid(idx)) {
47125
+ // set valid
47126
+ if (!tmask.AllValid()) {
47127
+ tmask.SetValidUnsafe(target_offset + i);
47128
+ }
47129
+ } else {
47130
+ // set invalid
47131
+ if (tmask.AllValid()) {
47132
+ auto init_size = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, target_offset + copy_count);
47133
+ tmask.Initialize(init_size);
47134
+ }
47135
+ tmask.SetInvalidUnsafe(target_offset + i);
47136
+ }
47109
47137
  }
47110
47138
  }
47111
47139
  }
@@ -59205,16 +59233,119 @@ void PhysicalHashJoin::GetData(ExecutionContext &context, DataChunk &chunk, Glob
59205
59233
 
59206
59234
 
59207
59235
 
59236
+ //===----------------------------------------------------------------------===//
59237
+ // DuckDB
59238
+ //
59239
+ // duckdb/execution/operator/join/physical_piecewise_merge_join.hpp
59240
+ //
59241
+ //
59242
+ //===----------------------------------------------------------------------===//
59243
+
59244
+
59245
+
59246
+
59208
59247
 
59209
59248
 
59210
59249
 
59211
59250
  namespace duckdb {
59212
59251
 
59213
- class IEJoinSortedTable;
59252
+ struct GlobalSortState;
59253
+
59254
+ //! PhysicalRangeJoin represents one or more inequality range join predicates between
59255
+ //! two tables
59256
+ class PhysicalRangeJoin : public PhysicalComparisonJoin {
59257
+ public:
59258
+ class LocalSortedTable {
59259
+ public:
59260
+ LocalSortedTable(const PhysicalRangeJoin &op, const idx_t child);
59261
+
59262
+ void Sink(DataChunk &input, GlobalSortState &global_sort_state);
59263
+
59264
+ inline void Sort(GlobalSortState &global_sort_state) {
59265
+ local_sort_state.Sort(global_sort_state, true);
59266
+ }
59267
+
59268
+ //! The hosting operator
59269
+ const PhysicalRangeJoin &op;
59270
+ //! The local sort state
59271
+ LocalSortState local_sort_state;
59272
+ //! Local copy of the sorting expression executor
59273
+ ExpressionExecutor executor;
59274
+ //! Holds a vector of incoming sorting columns
59275
+ DataChunk keys;
59276
+ //! The number of NULL values
59277
+ idx_t has_null;
59278
+ //! The total number of rows
59279
+ idx_t count;
59280
+
59281
+ private:
59282
+ // Merge the NULLs of all non-DISTINCT predicates into the primary so they sort to the end.
59283
+ idx_t MergeNulls(const vector<JoinCondition> &conditions);
59284
+ };
59285
+
59286
+ class GlobalSortedTable {
59287
+ public:
59288
+ GlobalSortedTable(ClientContext &context, const vector<BoundOrderByNode> &orders, RowLayout &payload_layout);
59289
+
59290
+ inline idx_t Count() const {
59291
+ return count;
59292
+ }
59293
+
59294
+ inline idx_t BlockCount() const {
59295
+ if (global_sort_state.sorted_blocks.empty()) {
59296
+ return 0;
59297
+ }
59298
+ D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
59299
+ return global_sort_state.sorted_blocks[0]->radix_sorting_data.size();
59300
+ }
59301
+
59302
+ inline idx_t BlockSize(idx_t i) const {
59303
+ return global_sort_state.sorted_blocks[0]->radix_sorting_data[i].count;
59304
+ }
59305
+
59306
+ void Combine(LocalSortedTable &ltable);
59307
+ void IntializeMatches();
59308
+ void Print();
59309
+
59310
+ //! Starts the sorting process.
59311
+ void Finalize(Pipeline &pipeline, Event &event);
59312
+ //! Schedules tasks to merge sort the current child's data during a Finalize phase
59313
+ void ScheduleMergeTasks(Pipeline &pipeline, Event &event);
59314
+
59315
+ GlobalSortState global_sort_state;
59316
+ //! Whether or not the RHS has NULL values
59317
+ atomic<idx_t> has_null;
59318
+ //! The total number of rows in the RHS
59319
+ atomic<idx_t> count;
59320
+ //! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
59321
+ unique_ptr<bool[]> found_match;
59322
+ //! Memory usage per thread
59323
+ idx_t memory_per_thread;
59324
+ };
59325
+
59326
+ public:
59327
+ PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType type, unique_ptr<PhysicalOperator> left,
59328
+ unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
59329
+ idx_t estimated_cardinality);
59330
+
59331
+ public:
59332
+ // Gather the result values and slice the payload columns to those values.
59333
+ static void SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
59334
+ const SelectionVector &result, const idx_t result_count, const idx_t left_cols = 0);
59335
+ // Apply a tail condition to the current selection
59336
+ static idx_t SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right,
59337
+ const SelectionVector *sel, idx_t count, SelectionVector *true_sel);
59338
+ };
59339
+
59340
+ } // namespace duckdb
59341
+
59342
+
59343
+
59344
+ namespace duckdb {
59214
59345
 
59215
59346
  //! PhysicalIEJoin represents a two inequality range join between
59216
59347
  //! two tables
59217
- class PhysicalIEJoin : public PhysicalComparisonJoin {
59348
+ class PhysicalIEJoin : public PhysicalRangeJoin {
59218
59349
  public:
59219
59350
  PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right,
59220
59351
  vector<JoinCondition> cond, JoinType join_type, idx_t estimated_cardinality);
@@ -59253,9 +59384,6 @@ public:
59253
59384
  SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
59254
59385
  GlobalSinkState &gstate) const override;
59255
59386
 
59256
- //! Schedules tasks to merge sort the current child's data during a Finalize phase
59257
- static void ScheduleMergeTasks(Pipeline &pipeline, Event &event, IEJoinSortedTable &table);
59258
-
59259
59387
  bool IsSink() const override {
59260
59388
  return true;
59261
59389
  }
@@ -59289,35 +59417,8 @@ namespace duckdb {
59289
59417
  PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
59290
59418
  unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
59291
59419
  idx_t estimated_cardinality)
59292
- : PhysicalComparisonJoin(op, PhysicalOperatorType::IE_JOIN, move(cond), join_type, estimated_cardinality) {
59293
- // Reorder the conditions so that ranges are at the front.
59294
- // TODO: use stats to improve the choice?
59295
- // TODO: Prefer fixed length types?
59296
- auto conditions_p = std::move(conditions);
59297
- conditions.resize(conditions_p.size());
59298
- idx_t range_position = 0;
59299
- idx_t other_position = conditions_p.size();
59300
- for (idx_t i = 0; i < conditions_p.size(); ++i) {
59301
- switch (conditions_p[i].comparison) {
59302
- case ExpressionType::COMPARE_LESSTHAN:
59303
- case ExpressionType::COMPARE_LESSTHANOREQUALTO:
59304
- case ExpressionType::COMPARE_GREATERTHAN:
59305
- case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
59306
- conditions[range_position++] = std::move(conditions_p[i]);
59307
- break;
59308
- case ExpressionType::COMPARE_NOTEQUAL:
59309
- case ExpressionType::COMPARE_DISTINCT_FROM:
59310
- // Allowed in multi-predicate joins, but can't be first/sort.
59311
- conditions[--other_position] = std::move(conditions_p[i]);
59312
- break;
59313
- default:
59314
- // COMPARE EQUAL not supported with iejoin join
59315
- throw NotImplementedException("Unimplemented join type for IEJoin");
59316
- }
59317
- }
59318
-
59319
- // IEJoin requires at least two comparisons.
59320
- D_ASSERT(range_position > 1);
59420
+ : PhysicalRangeJoin(op, PhysicalOperatorType::IE_JOIN, move(left), move(right), move(cond), join_type,
59421
+ estimated_cardinality) {
59321
59422
 
59322
59423
  // 1. let L1 (resp. L2) be the array of column X (resp. Y)
59323
59424
  D_ASSERT(conditions.size() >= 2);
@@ -59342,9 +59443,12 @@ PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator>
59342
59443
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
59343
59444
  sense = i ? OrderType::ASCENDING : OrderType::DESCENDING;
59344
59445
  break;
59345
- default:
59446
+ case ExpressionType::COMPARE_LESSTHAN:
59447
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
59346
59448
  sense = i ? OrderType::DESCENDING : OrderType::ASCENDING;
59347
59449
  break;
59450
+ default:
59451
+ throw NotImplementedException("Unimplemented join type for IEJoin");
59348
59452
  }
59349
59453
  lhs_orders[i].emplace_back(BoundOrderByNode(sense, OrderByNullType::NULLS_LAST, move(left)));
59350
59454
  rhs_orders[i].emplace_back(BoundOrderByNode(sense, OrderByNullType::NULLS_LAST, move(right)));
@@ -59355,9 +59459,6 @@ PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator>
59355
59459
  D_ASSERT(cond.left->return_type == cond.right->return_type);
59356
59460
  join_key_types.push_back(cond.left->return_type);
59357
59461
  }
59358
-
59359
- children.push_back(move(left));
59360
- children.push_back(move(right));
59361
59462
  }
59362
59463
 
59363
59464
  //===--------------------------------------------------------------------===//
@@ -59365,193 +59466,19 @@ PhysicalIEJoin::PhysicalIEJoin(LogicalOperator &op, unique_ptr<PhysicalOperator>
59365
59466
  //===--------------------------------------------------------------------===//
59366
59467
  class IEJoinLocalState : public LocalSinkState {
59367
59468
  public:
59368
- explicit IEJoinLocalState(const vector<JoinCondition> &conditions, const idx_t child) : has_null(0), count(0) {
59369
- // Initialize order clause expression executor and key DataChunk
59370
- vector<LogicalType> types;
59371
- for (const auto &cond : conditions) {
59372
- comparisons.emplace_back(cond.comparison);
59469
+ using LocalSortedTable = PhysicalRangeJoin::LocalSortedTable;
59373
59470
 
59374
- const auto &expr = child ? cond.right : cond.left;
59375
- executor.AddExpression(*expr);
59376
-
59377
- types.push_back(expr->return_type);
59378
- }
59379
- keys.Initialize(types);
59471
+ IEJoinLocalState(const PhysicalRangeJoin &op, const idx_t child) : table(op, child) {
59380
59472
  }
59381
59473
 
59382
59474
  //! The local sort state
59383
- LocalSortState local_sort_state;
59384
- //! Local copy of the sorting expression executor
59385
- ExpressionExecutor executor;
59386
- //! Holds a vector of incoming sorting columns
59387
- DataChunk keys;
59388
- //! The comparison list (for null merging)
59389
- vector<ExpressionType> comparisons;
59390
- //! The number of NULL values
59391
- idx_t has_null;
59392
- //! The total number of rows
59393
- idx_t count;
59394
-
59395
- idx_t MergeKeyNulls();
59396
-
59397
- void Sink(DataChunk &input, GlobalSortState &global_sort_state) {
59398
- // Initialize local state (if necessary)
59399
- if (!local_sort_state.initialized) {
59400
- local_sort_state.Initialize(global_sort_state, global_sort_state.buffer_manager);
59401
- }
59402
-
59403
- // Obtain sorting columns
59404
- keys.Reset();
59405
- executor.Execute(input, keys);
59406
-
59407
- // Count the NULLs so we can exclude them later
59408
- has_null += MergeKeyNulls();
59409
- count += keys.size();
59410
-
59411
- // Sink the data into the local sort state
59412
- D_ASSERT(keys.ColumnCount() > 1);
59413
- // Only sort the primary key
59414
- DataChunk join_head;
59415
- join_head.data.emplace_back(Vector(keys.data[0]));
59416
- join_head.SetCardinality(keys.size());
59417
-
59418
- local_sort_state.SinkChunk(join_head, input);
59419
- }
59420
-
59421
- void Sort(GlobalSortState &gss) {
59422
- local_sort_state.Sort(gss, true);
59423
- }
59424
- void Reset() {
59425
- has_null = 0;
59426
- count = 0;
59427
- }
59475
+ LocalSortedTable table;
59428
59476
  };
59429
59477
 
59430
- idx_t IEJoinLocalState::MergeKeyNulls() {
59431
- // Merge the validity masks of the comparison keys into the primary
59432
- // Return the number of NULLs in the resulting chunk
59433
- D_ASSERT(keys.ColumnCount() > 0);
59434
- const auto count = keys.size();
59435
-
59436
- size_t all_constant = 0;
59437
- for (auto &v : keys.data) {
59438
- all_constant += int(v.GetVectorType() == VectorType::CONSTANT_VECTOR);
59439
- }
59440
-
59441
- auto &primary = keys.data[0];
59442
- if (all_constant == keys.data.size()) {
59443
- // Either all NULL or no NULLs
59444
- for (auto &v : keys.data) {
59445
- if (ConstantVector::IsNull(v)) {
59446
- ConstantVector::SetNull(primary, true);
59447
- return count;
59448
- }
59449
- }
59450
- return 0;
59451
- } else if (keys.ColumnCount() > 1) {
59452
- // Normalify the primary, as it will need to merge arbitrary validity masks
59453
- primary.Normalify(count);
59454
- auto &pvalidity = FlatVector::Validity(primary);
59455
- D_ASSERT(keys.ColumnCount() == comparisons.size());
59456
- for (size_t c = 1; c < keys.data.size(); ++c) {
59457
- // Skip comparisons that accept NULLs
59458
- if (comparisons[c] == ExpressionType::COMPARE_DISTINCT_FROM) {
59459
- continue;
59460
- }
59461
- // Orrify the rest, as the sort code will do this anyway.
59462
- auto &v = keys.data[c];
59463
- VectorData vdata;
59464
- v.Orrify(count, vdata);
59465
- auto &vvalidity = vdata.validity;
59466
- if (vvalidity.AllValid()) {
59467
- continue;
59468
- }
59469
- pvalidity.EnsureWritable();
59470
- auto pmask = pvalidity.GetData();
59471
- if (v.GetVectorType() == VectorType::FLAT_VECTOR) {
59472
- // Merge entire entries
59473
- const auto entry_count = pvalidity.EntryCount(count);
59474
- for (idx_t entry_idx = 0; entry_idx < entry_count; ++entry_idx) {
59475
- pmask[entry_idx] &= vvalidity.GetValidityEntry(entry_idx);
59476
- }
59477
- }
59478
- }
59479
- return count - pvalidity.CountValid(count);
59480
- } else {
59481
- return count - VectorOperations::CountNotNull(primary, count);
59482
- }
59483
- }
59484
-
59485
- class IEJoinSortedTable {
59478
+ class IEJoinGlobalState : public GlobalSinkState {
59486
59479
  public:
59487
- IEJoinSortedTable(ClientContext &context, const vector<BoundOrderByNode> &orders, RowLayout &payload_layout)
59488
- : global_sort_state(BufferManager::GetBufferManager(context), orders, payload_layout), has_null(0), count(0),
59489
- memory_per_thread(0) {
59490
- D_ASSERT(orders.size() == 1);
59491
-
59492
- // Set external (can be force with the PRAGMA)
59493
- auto &config = ClientConfig::GetConfig(context);
59494
- global_sort_state.external = config.force_external;
59495
- // Memory usage per thread should scale with max mem / num threads
59496
- // We take 1/4th of this, to be conservative
59497
- idx_t max_memory = global_sort_state.buffer_manager.GetMaxMemory();
59498
- idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
59499
- memory_per_thread = (max_memory / num_threads) / 4;
59500
- }
59501
-
59502
- inline idx_t Count() const {
59503
- return count;
59504
- }
59505
-
59506
- inline idx_t BlockCount() const {
59507
- if (global_sort_state.sorted_blocks.empty()) {
59508
- return 0;
59509
- }
59510
- D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
59511
- return global_sort_state.sorted_blocks[0]->radix_sorting_data.size();
59512
- }
59480
+ using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
59513
59481
 
59514
- inline idx_t BlockSize(idx_t i) const {
59515
- return global_sort_state.sorted_blocks[0]->radix_sorting_data[i].count;
59516
- }
59517
-
59518
- inline void Combine(IEJoinLocalState &lstate) {
59519
- global_sort_state.AddLocalState(lstate.local_sort_state);
59520
- has_null += lstate.has_null;
59521
- count += lstate.count;
59522
- }
59523
-
59524
- inline void IntializeMatches() {
59525
- found_match = unique_ptr<bool[]>(new bool[Count()]);
59526
- memset(found_match.get(), 0, sizeof(bool) * Count());
59527
- }
59528
-
59529
- void Print() {
59530
- PayloadScanner scanner(global_sort_state, false);
59531
- DataChunk chunk;
59532
- chunk.Initialize(scanner.GetPayloadTypes());
59533
- for (;;) {
59534
- scanner.Scan(chunk);
59535
- const auto count = chunk.size();
59536
- if (!count) {
59537
- break;
59538
- }
59539
- chunk.Print();
59540
- }
59541
- }
59542
-
59543
- GlobalSortState global_sort_state;
59544
- //! Whether or not the RHS has NULL values
59545
- atomic<idx_t> has_null;
59546
- //! The total number of rows in the RHS
59547
- atomic<idx_t> count;
59548
- //! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
59549
- unique_ptr<bool[]> found_match;
59550
- //! Memory usage per thread
59551
- idx_t memory_per_thread;
59552
- };
59553
-
59554
- class IEJoinGlobalState : public GlobalSinkState {
59555
59482
  public:
59556
59483
  IEJoinGlobalState(ClientContext &context, const PhysicalIEJoin &op) : child(0) {
59557
59484
  tables.resize(2);
@@ -59559,13 +59486,13 @@ public:
59559
59486
  lhs_layout.Initialize(op.children[0]->types);
59560
59487
  vector<BoundOrderByNode> lhs_order;
59561
59488
  lhs_order.emplace_back(op.lhs_orders[0][0].Copy());
59562
- tables[0] = make_unique<IEJoinSortedTable>(context, lhs_order, lhs_layout);
59489
+ tables[0] = make_unique<GlobalSortedTable>(context, lhs_order, lhs_layout);
59563
59490
 
59564
59491
  RowLayout rhs_layout;
59565
59492
  rhs_layout.Initialize(op.children[1]->types);
59566
59493
  vector<BoundOrderByNode> rhs_order;
59567
59494
  rhs_order.emplace_back(op.rhs_orders[0][0].Copy());
59568
- tables[1] = make_unique<IEJoinSortedTable>(context, rhs_order, rhs_layout);
59495
+ tables[1] = make_unique<GlobalSortedTable>(context, rhs_order, rhs_layout);
59569
59496
  }
59570
59497
 
59571
59498
  IEJoinGlobalState(IEJoinGlobalState &prev)
@@ -59575,10 +59502,10 @@ public:
59575
59502
  void Sink(DataChunk &input, IEJoinLocalState &lstate) {
59576
59503
  auto &table = *tables[child];
59577
59504
  auto &global_sort_state = table.global_sort_state;
59578
- auto &local_sort_state = lstate.local_sort_state;
59505
+ auto &local_sort_state = lstate.table.local_sort_state;
59579
59506
 
59580
59507
  // Sink the data into the local sort state
59581
- lstate.Sink(input, global_sort_state);
59508
+ lstate.table.Sink(input, global_sort_state);
59582
59509
 
59583
59510
  // When sorting data reaches a certain size, we sort it
59584
59511
  if (local_sort_state.SizeInBytes() >= table.memory_per_thread) {
@@ -59586,7 +59513,7 @@ public:
59586
59513
  }
59587
59514
  }
59588
59515
 
59589
- vector<unique_ptr<IEJoinSortedTable>> tables;
59516
+ vector<unique_ptr<GlobalSortedTable>> tables;
59590
59517
  size_t child;
59591
59518
  };
59592
59519
 
@@ -59601,7 +59528,7 @@ unique_ptr<LocalSinkState> PhysicalIEJoin::GetLocalSinkState(ExecutionContext &c
59601
59528
  const auto &ie_sink = (IEJoinGlobalState &)*sink_state;
59602
59529
  sink_child = ie_sink.child;
59603
59530
  }
59604
- return make_unique<IEJoinLocalState>(conditions, sink_child);
59531
+ return make_unique<IEJoinLocalState>(*this, sink_child);
59605
59532
  }
59606
59533
 
59607
59534
  SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
@@ -59617,80 +59544,16 @@ SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, GlobalSinkState &
59617
59544
  void PhysicalIEJoin::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
59618
59545
  auto &gstate = (IEJoinGlobalState &)gstate_p;
59619
59546
  auto &lstate = (IEJoinLocalState &)lstate_p;
59620
- gstate.tables[gstate.child]->Combine(lstate);
59547
+ gstate.tables[gstate.child]->Combine(lstate.table);
59621
59548
  auto &client_profiler = QueryProfiler::Get(context.client);
59622
59549
 
59623
- context.thread.profiler.Flush(this, &lstate.executor, gstate.child ? "rhs_executor" : "lhs_executor", 1);
59550
+ context.thread.profiler.Flush(this, &lstate.table.executor, gstate.child ? "rhs_executor" : "lhs_executor", 1);
59624
59551
  client_profiler.Flush(context.thread.profiler);
59625
59552
  }
59626
59553
 
59627
59554
  //===--------------------------------------------------------------------===//
59628
59555
  // Finalize
59629
59556
  //===--------------------------------------------------------------------===//
59630
- class IEJoinFinalizeTask : public ExecutorTask {
59631
- public:
59632
- IEJoinFinalizeTask(shared_ptr<Event> event_p, ClientContext &context, IEJoinSortedTable &table)
59633
- : ExecutorTask(context), event(move(event_p)), context(context), table(table) {
59634
- }
59635
-
59636
- TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
59637
- // Initialize iejoin sorted and iterate until done
59638
- auto &global_sort_state = table.global_sort_state;
59639
- MergeSorter merge_sorter(global_sort_state, BufferManager::GetBufferManager(context));
59640
- merge_sorter.PerformInMergeRound();
59641
- event->FinishTask();
59642
-
59643
- return TaskExecutionResult::TASK_FINISHED;
59644
- }
59645
-
59646
- private:
59647
- shared_ptr<Event> event;
59648
- ClientContext &context;
59649
- IEJoinSortedTable &table;
59650
- };
59651
-
59652
- class IEJoinFinalizeEvent : public Event {
59653
- public:
59654
- IEJoinFinalizeEvent(IEJoinSortedTable &table_p, Pipeline &pipeline_p)
59655
- : Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
59656
- }
59657
-
59658
- IEJoinSortedTable &table;
59659
- Pipeline &pipeline;
59660
-
59661
- public:
59662
- void Schedule() override {
59663
- auto &context = pipeline.GetClientContext();
59664
-
59665
- // Schedule tasks equal to the number of threads, which will each iejoin multiple partitions
59666
- auto &ts = TaskScheduler::GetScheduler(context);
59667
- idx_t num_threads = ts.NumberOfThreads();
59668
-
59669
- vector<unique_ptr<Task>> iejoin_tasks;
59670
- for (idx_t tnum = 0; tnum < num_threads; tnum++) {
59671
- iejoin_tasks.push_back(make_unique<IEJoinFinalizeTask>(shared_from_this(), context, table));
59672
- }
59673
- SetTasks(move(iejoin_tasks));
59674
- }
59675
-
59676
- void FinishEvent() override {
59677
- auto &global_sort_state = table.global_sort_state;
59678
-
59679
- global_sort_state.CompleteMergeRound(true);
59680
- if (global_sort_state.sorted_blocks.size() > 1) {
59681
- // Multiple blocks remaining: Schedule the next round
59682
- PhysicalIEJoin::ScheduleMergeTasks(pipeline, *this, table);
59683
- }
59684
- }
59685
- };
59686
-
59687
- void PhysicalIEJoin::ScheduleMergeTasks(Pipeline &pipeline, Event &event, IEJoinSortedTable &table) {
59688
- // Initialize global sort state for a round of merging
59689
- table.global_sort_state.InitializeMergeRound();
59690
- auto new_event = make_shared<IEJoinFinalizeEvent>(table, pipeline);
59691
- event.InsertEvent(move(new_event));
59692
- }
59693
-
59694
59557
  SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
59695
59558
  GlobalSinkState &gstate_p) const {
59696
59559
  auto &gstate = (IEJoinGlobalState &)gstate_p;
@@ -59706,14 +59569,10 @@ SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, Clie
59706
59569
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
59707
59570
  }
59708
59571
 
59709
- // Prepare for child sort phase
59710
- global_sort_state.PrepareMergePhase();
59711
-
59712
- // Start the iejoin phase or finish if a iejoin is not necessary
59713
- if (global_sort_state.sorted_blocks.size() > 1) {
59714
- PhysicalIEJoin::ScheduleMergeTasks(pipeline, event, table);
59715
- }
59572
+ // Sort the current input child
59573
+ table.Finalize(pipeline, event);
59716
59574
 
59575
+ // Move to the next input child
59717
59576
  ++gstate.child;
59718
59577
 
59719
59578
  return SinkFinalizeType::READY;
@@ -59722,6 +59581,14 @@ SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, Clie
59722
59581
  //===--------------------------------------------------------------------===//
59723
59582
  // Operator
59724
59583
  //===--------------------------------------------------------------------===//
59584
+ OperatorResultType PhysicalIEJoin::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
59585
+ GlobalOperatorState &gstate, OperatorState &state) const {
59586
+ return OperatorResultType::FINISHED;
59587
+ }
59588
+
59589
+ //===--------------------------------------------------------------------===//
59590
+ // Source
59591
+ //===--------------------------------------------------------------------===//
59725
59592
  struct SBIterator {
59726
59593
  static int ComparisonValue(ExpressionType comparison) {
59727
59594
  switch (comparison) {
@@ -59821,7 +59688,7 @@ struct SBIterator {
59821
59688
  };
59822
59689
 
59823
59690
  struct IEJoinUnion {
59824
- using SortedTable = IEJoinSortedTable;
59691
+ using SortedTable = PhysicalRangeJoin::GlobalSortedTable;
59825
59692
 
59826
59693
  static idx_t AppendKey(SortedTable &table, ExpressionExecutor &executor, SortedTable &marked, int64_t increment,
59827
59694
  int64_t base, const idx_t block_idx);
@@ -60262,61 +60129,11 @@ idx_t IEJoinUnion::JoinComplexBlocks(SelectionVector &lsel, SelectionVector &rse
60262
60129
 
60263
60130
  class IEJoinState : public OperatorState {
60264
60131
  public:
60265
- explicit IEJoinState(const PhysicalIEJoin &op) : local_left(op.conditions, 0) {};
60132
+ explicit IEJoinState(const PhysicalIEJoin &op) : local_left(op, 0) {};
60266
60133
 
60267
60134
  IEJoinLocalState local_left;
60268
60135
  };
60269
60136
 
60270
- static void SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
60271
- const SelectionVector &result, const idx_t result_count, const idx_t left_cols = 0) {
60272
- // There should only be one sorted block if they have been sorted
60273
- D_ASSERT(state.sorted_blocks.size() == 1);
60274
- SBScanState read_state(state.buffer_manager, state);
60275
- read_state.sb = state.sorted_blocks[0].get();
60276
- auto &sorted_data = *read_state.sb->payload_data;
60277
-
60278
- read_state.SetIndices(block_idx, 0);
60279
- read_state.PinData(sorted_data);
60280
- const auto data_ptr = read_state.DataPtr(sorted_data);
60281
-
60282
- // Set up a batch of pointers to scan data from
60283
- Vector addresses(LogicalType::POINTER, result_count);
60284
- auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
60285
-
60286
- // Set up the data pointers for the values that are actually referenced
60287
- const idx_t &row_width = sorted_data.layout.GetRowWidth();
60288
-
60289
- auto prev_idx = result.get_index(0);
60290
- SelectionVector gsel(result_count);
60291
- idx_t addr_count = 0;
60292
- gsel.set_index(0, addr_count);
60293
- data_pointers[addr_count] = data_ptr + prev_idx * row_width;
60294
- for (idx_t i = 1; i < result_count; ++i) {
60295
- const auto row_idx = result.get_index(i);
60296
- if (row_idx != prev_idx) {
60297
- data_pointers[++addr_count] = data_ptr + row_idx * row_width;
60298
- prev_idx = row_idx;
60299
- }
60300
- gsel.set_index(i, addr_count);
60301
- }
60302
- ++addr_count;
60303
-
60304
- // Unswizzle the offsets back to pointers (if needed)
60305
- if (!sorted_data.layout.AllConstant() && state.external) {
60306
- RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle->Ptr(),
60307
- addr_count);
60308
- }
60309
-
60310
- // Deserialize the payload data
60311
- auto sel = FlatVector::IncrementalSelectionVector();
60312
- for (idx_t col_idx = 0; col_idx < sorted_data.layout.ColumnCount(); col_idx++) {
60313
- const auto col_offset = sorted_data.layout.GetOffsets()[col_idx];
60314
- auto &col = payload.data[left_cols + col_idx];
60315
- RowOperations::Gather(addresses, *sel, col, *sel, addr_count, col_offset, col_idx);
60316
- col.Slice(gsel, result_count);
60317
- }
60318
- }
60319
-
60320
60137
  class IEJoinLocalSourceState : public LocalSourceState {
60321
60138
  public:
60322
60139
  explicit IEJoinLocalSourceState(const PhysicalIEJoin &op)
@@ -60342,9 +60159,6 @@ public:
60342
60159
  right_keys.Initialize(right_types);
60343
60160
  }
60344
60161
 
60345
- idx_t SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right, const SelectionVector *sel,
60346
- idx_t count);
60347
-
60348
60162
  idx_t SelectOuterRows(bool *matches) {
60349
60163
  idx_t count = 0;
60350
60164
  for (; outer_idx < outer_count; ++outer_idx) {
@@ -60386,30 +60200,6 @@ public:
60386
60200
  bool *right_matches;
60387
60201
  };
60388
60202
 
60389
- idx_t IEJoinLocalSourceState::SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right,
60390
- const SelectionVector *sel, idx_t count) {
60391
- switch (condition) {
60392
- case ExpressionType::COMPARE_NOTEQUAL:
60393
- return VectorOperations::NotEquals(left, right, sel, count, &true_sel, nullptr);
60394
- case ExpressionType::COMPARE_LESSTHAN:
60395
- return VectorOperations::LessThan(left, right, sel, count, &true_sel, nullptr);
60396
- case ExpressionType::COMPARE_GREATERTHAN:
60397
- return VectorOperations::GreaterThan(left, right, sel, count, &true_sel, nullptr);
60398
- case ExpressionType::COMPARE_LESSTHANOREQUALTO:
60399
- return VectorOperations::LessThanEquals(left, right, sel, count, &true_sel, nullptr);
60400
- case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
60401
- return VectorOperations::GreaterThanEquals(left, right, sel, count, &true_sel, nullptr);
60402
- case ExpressionType::COMPARE_DISTINCT_FROM:
60403
- return VectorOperations::DistinctFrom(left, right, sel, count, &true_sel, nullptr);
60404
- case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
60405
- case ExpressionType::COMPARE_EQUAL:
60406
- default:
60407
- throw InternalException("Unsupported comparison type for PhysicalIEJoin");
60408
- }
60409
-
60410
- return count;
60411
- }
60412
-
60413
60203
  void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk, LocalSourceState &state_p) const {
60414
60204
  auto &state = (IEJoinLocalSourceState &)state_p;
60415
60205
  auto &ie_sink = (IEJoinGlobalState &)*sink_state;
@@ -60446,6 +60236,7 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
60446
60236
  state.right_executor.SetChunk(right_chunk);
60447
60237
 
60448
60238
  auto tail_count = result_count;
60239
+ auto true_sel = &state.true_sel;
60449
60240
  for (size_t cmp_idx = 0; cmp_idx < tail_cols; ++cmp_idx) {
60450
60241
  auto &left = state.left_keys.data[cmp_idx];
60451
60242
  state.left_executor.ExecuteExpression(cmp_idx, left);
@@ -60457,8 +60248,8 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
60457
60248
  left.Slice(*sel, tail_count);
60458
60249
  right.Slice(*sel, tail_count);
60459
60250
  }
60460
- tail_count = state.SelectJoinTail(conditions[cmp_idx + 2].comparison, left, right, sel, tail_count);
60461
- sel = &state.true_sel;
60251
+ tail_count = SelectJoinTail(conditions[cmp_idx + 2].comparison, left, right, sel, tail_count, true_sel);
60252
+ sel = true_sel;
60462
60253
  }
60463
60254
  chunk.Fuse(right_chunk);
60464
60255
 
@@ -60483,14 +60274,6 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
60483
60274
  } while (chunk.size() == 0);
60484
60275
  }
60485
60276
 
60486
- OperatorResultType PhysicalIEJoin::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
60487
- GlobalOperatorState &gstate, OperatorState &state) const {
60488
- return OperatorResultType::FINISHED;
60489
- }
60490
-
60491
- //===--------------------------------------------------------------------===//
60492
- // Source
60493
- //===--------------------------------------------------------------------===//
60494
60277
  class IEJoinGlobalSourceState : public GlobalSourceState {
60495
60278
  public:
60496
60279
  explicit IEJoinGlobalSourceState(const PhysicalIEJoin &op)
@@ -61631,7 +61414,7 @@ class MergeJoinGlobalState;
61631
61414
 
61632
61415
  //! PhysicalPiecewiseMergeJoin represents a piecewise merge loop join between
61633
61416
  //! two tables
61634
- class PhysicalPiecewiseMergeJoin : public PhysicalComparisonJoin {
61417
+ class PhysicalPiecewiseMergeJoin : public PhysicalRangeJoin {
61635
61418
  public:
61636
61419
  PhysicalPiecewiseMergeJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
61637
61420
  unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
@@ -61678,9 +61461,6 @@ public:
61678
61461
  SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
61679
61462
  GlobalSinkState &gstate) const override;
61680
61463
 
61681
- //! Schedules tasks to merge sort the RHS data during the Finalize phase
61682
- static void ScheduleMergeTasks(Pipeline &pipeline, Event &event, MergeJoinGlobalState &state);
61683
-
61684
61464
  bool IsSink() const override {
61685
61465
  return true;
61686
61466
  }
@@ -61715,29 +61495,8 @@ namespace duckdb {
61715
61495
  PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left,
61716
61496
  unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond,
61717
61497
  JoinType join_type, idx_t estimated_cardinality)
61718
- : PhysicalComparisonJoin(op, PhysicalOperatorType::PIECEWISE_MERGE_JOIN, move(cond), join_type,
61719
- estimated_cardinality) {
61720
- // Reorder the conditions so that ranges are at the front.
61721
- // TODO: use stats to improve the choice?
61722
- if (conditions.size() > 1) {
61723
- auto conditions_p = std::move(conditions);
61724
- conditions.resize(conditions_p.size());
61725
- idx_t range_position = 0;
61726
- idx_t other_position = conditions_p.size();
61727
- for (idx_t i = 0; i < conditions_p.size(); ++i) {
61728
- switch (conditions_p[i].comparison) {
61729
- case ExpressionType::COMPARE_LESSTHAN:
61730
- case ExpressionType::COMPARE_LESSTHANOREQUALTO:
61731
- case ExpressionType::COMPARE_GREATERTHAN:
61732
- case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
61733
- conditions[range_position++] = std::move(conditions_p[i]);
61734
- break;
61735
- default:
61736
- conditions[--other_position] = std::move(conditions_p[i]);
61737
- break;
61738
- }
61739
- }
61740
- }
61498
+ : PhysicalRangeJoin(op, PhysicalOperatorType::PIECEWISE_MERGE_JOIN, move(left), move(right), move(cond), join_type,
61499
+ estimated_cardinality) {
61741
61500
 
61742
61501
  for (auto &cond : conditions) {
61743
61502
  D_ASSERT(cond.left->return_type == cond.right->return_type);
@@ -61770,172 +61529,60 @@ PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalOperator &op, uniq
61770
61529
  throw NotImplementedException("Unimplemented join type for merge join");
61771
61530
  }
61772
61531
  }
61773
- children.push_back(move(left));
61774
- children.push_back(move(right));
61775
61532
  }
61776
61533
 
61777
61534
  //===--------------------------------------------------------------------===//
61778
61535
  // Sink
61779
61536
  //===--------------------------------------------------------------------===//
61780
- class MergeJoinGlobalState : public GlobalSinkState {
61537
+ class MergeJoinLocalState : public LocalSinkState {
61781
61538
  public:
61782
- MergeJoinGlobalState(BufferManager &buffer_manager, const vector<BoundOrderByNode> &orders, RowLayout &rhs_layout)
61783
- : rhs_global_sort_state(buffer_manager, orders, rhs_layout), rhs_has_null(0), rhs_count(0),
61784
- memory_per_thread(0) {
61785
- D_ASSERT(orders.size() == 1);
61786
- }
61787
-
61788
- inline idx_t Count() const {
61789
- return rhs_count;
61539
+ explicit MergeJoinLocalState(const PhysicalRangeJoin &op, const idx_t child) : table(op, child) {
61790
61540
  }
61791
61541
 
61792
- //! The lock for updating the global state
61793
- mutex lock;
61794
- //! Global sort state
61795
- GlobalSortState rhs_global_sort_state;
61796
- //! Whether or not the RHS has NULL values
61797
- idx_t rhs_has_null;
61798
- //! The total number of rows in the RHS
61799
- idx_t rhs_count;
61800
- //! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
61801
- unique_ptr<bool[]> rhs_found_match;
61802
- //! Memory usage per thread
61803
- idx_t memory_per_thread;
61542
+ //! The local sort state
61543
+ PhysicalRangeJoin::LocalSortedTable table;
61804
61544
  };
61805
61545
 
61806
- unique_ptr<GlobalSinkState> PhysicalPiecewiseMergeJoin::GetGlobalSinkState(ClientContext &context) const {
61807
- // Get the payload layout from the rhs types and tail predicates
61808
- RowLayout rhs_layout;
61809
- rhs_layout.Initialize(children[1]->types);
61810
- vector<BoundOrderByNode> rhs_order;
61811
- rhs_order.emplace_back(rhs_orders[0].Copy());
61812
- auto state = make_unique<MergeJoinGlobalState>(BufferManager::GetBufferManager(context), rhs_order, rhs_layout);
61813
- // Set external (can be force with the PRAGMA)
61814
- auto &config = ClientConfig::GetConfig(context);
61815
- state->rhs_global_sort_state.external = config.force_external;
61816
- // Memory usage per thread should scale with max mem / num threads
61817
- // We take 1/4th of this, to be conservative
61818
- idx_t max_memory = BufferManager::GetBufferManager(context).GetMaxMemory();
61819
- idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
61820
- state->memory_per_thread = (max_memory / num_threads) / 4;
61821
- return move(state);
61822
- }
61546
+ class MergeJoinGlobalState : public GlobalSinkState {
61547
+ public:
61548
+ using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
61823
61549
 
61824
- class MergeJoinLocalState : public LocalSinkState {
61825
61550
  public:
61826
- explicit MergeJoinLocalState() : rhs_has_null(0), rhs_count(0) {
61551
+ MergeJoinGlobalState(ClientContext &context, const PhysicalPiecewiseMergeJoin &op) {
61552
+ RowLayout rhs_layout;
61553
+ rhs_layout.Initialize(op.children[1]->types);
61554
+ vector<BoundOrderByNode> rhs_order;
61555
+ rhs_order.emplace_back(op.rhs_orders[0].Copy());
61556
+ table = make_unique<GlobalSortedTable>(context, rhs_order, rhs_layout);
61827
61557
  }
61828
61558
 
61829
- //! The local sort state
61830
- LocalSortState rhs_local_sort_state;
61831
- //! Local copy of the sorting expression executor
61832
- ExpressionExecutor rhs_executor;
61833
- //! Holds a vector of incoming sorting columns
61834
- DataChunk rhs_keys;
61835
- //! Whether or not the RHS has NULL values
61836
- idx_t rhs_has_null;
61837
- //! The total number of rows in the RHS
61838
- idx_t rhs_count;
61839
- };
61840
-
61841
- unique_ptr<LocalSinkState> PhysicalPiecewiseMergeJoin::GetLocalSinkState(ExecutionContext &context) const {
61842
- auto result = make_unique<MergeJoinLocalState>();
61843
- // Initialize order clause expression executor and DataChunk
61844
- vector<LogicalType> types;
61845
- for (auto &order : rhs_orders) {
61846
- types.push_back(order.expression->return_type);
61847
- result->rhs_executor.AddExpression(*order.expression);
61559
+ inline idx_t Count() const {
61560
+ return table->count;
61848
61561
  }
61849
- result->rhs_keys.Initialize(types);
61850
- return move(result);
61851
- }
61852
61562
 
61853
- static idx_t PiecewiseMergeNulls(DataChunk &keys, const vector<JoinCondition> &conditions) {
61854
- // Merge the validity masks of the comparison keys into the primary
61855
- // Return the number of NULLs in the resulting chunk
61856
- D_ASSERT(keys.ColumnCount() > 0);
61857
- const auto count = keys.size();
61563
+ void Sink(DataChunk &input, MergeJoinLocalState &lstate) {
61564
+ auto &global_sort_state = table->global_sort_state;
61565
+ auto &local_sort_state = lstate.table.local_sort_state;
61858
61566
 
61859
- size_t all_constant = 0;
61860
- for (auto &v : keys.data) {
61861
- if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) {
61862
- ++all_constant;
61863
- }
61864
- }
61567
+ // Sink the data into the local sort state
61568
+ lstate.table.Sink(input, global_sort_state);
61865
61569
 
61866
- auto &primary = keys.data[0];
61867
- if (all_constant == keys.data.size()) {
61868
- // Either all NULL or no NULLs
61869
- for (auto &v : keys.data) {
61870
- if (ConstantVector::IsNull(v)) {
61871
- ConstantVector::SetNull(primary, true);
61872
- return count;
61873
- }
61874
- }
61875
- return 0;
61876
- } else if (keys.ColumnCount() > 1) {
61877
- // Normalify the primary, as it will need to merge arbitrary validity masks
61878
- primary.Normalify(count);
61879
- auto &pvalidity = FlatVector::Validity(primary);
61880
- for (size_t c = 1; c < keys.data.size(); ++c) {
61881
- // Skip comparisons that accept NULLs
61882
- if (conditions[c].comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
61883
- continue;
61884
- }
61885
- // Orrify the rest, as the sort code will do this anyway.
61886
- auto &v = keys.data[c];
61887
- VectorData vdata;
61888
- v.Orrify(count, vdata);
61889
- auto &vvalidity = vdata.validity;
61890
- if (vvalidity.AllValid()) {
61891
- continue;
61892
- }
61893
- pvalidity.EnsureWritable();
61894
- switch (v.GetVectorType()) {
61895
- case VectorType::FLAT_VECTOR: {
61896
- // Merge entire entries
61897
- auto pmask = pvalidity.GetData();
61898
- const auto entry_count = pvalidity.EntryCount(count);
61899
- for (idx_t entry_idx = 0; entry_idx < entry_count; ++entry_idx) {
61900
- pmask[entry_idx] &= vvalidity.GetValidityEntry(entry_idx);
61901
- }
61902
- break;
61903
- }
61904
- case VectorType::CONSTANT_VECTOR:
61905
- // All or nothing
61906
- if (ConstantVector::IsNull(v)) {
61907
- pvalidity.SetAllInvalid(count);
61908
- return count;
61909
- }
61910
- break;
61911
- default:
61912
- // One by one
61913
- for (idx_t i = 0; i < count; ++i) {
61914
- const auto idx = vdata.sel->get_index(i);
61915
- if (!vvalidity.RowIsValidUnsafe(idx)) {
61916
- pvalidity.SetInvalidUnsafe(i);
61917
- }
61918
- }
61919
- break;
61920
- }
61570
+ // When sorting data reaches a certain size, we sort it
61571
+ if (local_sort_state.SizeInBytes() >= table->memory_per_thread) {
61572
+ local_sort_state.Sort(global_sort_state, true);
61921
61573
  }
61922
- return count - pvalidity.CountValid(count);
61923
- } else {
61924
- return count - VectorOperations::CountNotNull(primary, count);
61925
61574
  }
61926
- }
61927
61575
 
61928
- static inline void SinkPiecewiseMergeChunk(LocalSortState &sort_state, DataChunk &join_keys, DataChunk &input) {
61929
- if (join_keys.ColumnCount() > 1) {
61930
- // Only sort the first key
61931
- DataChunk join_head;
61932
- join_head.data.emplace_back(Vector(join_keys.data[0]));
61933
- join_head.SetCardinality(join_keys.size());
61576
+ unique_ptr<GlobalSortedTable> table;
61577
+ };
61934
61578
 
61935
- sort_state.SinkChunk(join_head, input);
61936
- } else {
61937
- sort_state.SinkChunk(join_keys, input);
61938
- }
61579
+ unique_ptr<GlobalSinkState> PhysicalPiecewiseMergeJoin::GetGlobalSinkState(ClientContext &context) const {
61580
+ return make_unique<MergeJoinGlobalState>(context, *this);
61581
+ }
61582
+
61583
+ unique_ptr<LocalSinkState> PhysicalPiecewiseMergeJoin::GetLocalSinkState(ExecutionContext &context) const {
61584
+ // We only sink the RHS
61585
+ return make_unique<MergeJoinLocalState>(*this, 1);
61939
61586
  }
61940
61587
 
61941
61588
  SinkResultType PhysicalPiecewiseMergeJoin::Sink(ExecutionContext &context, GlobalSinkState &gstate_p,
@@ -61943,30 +61590,8 @@ SinkResultType PhysicalPiecewiseMergeJoin::Sink(ExecutionContext &context, Globa
61943
61590
  auto &gstate = (MergeJoinGlobalState &)gstate_p;
61944
61591
  auto &lstate = (MergeJoinLocalState &)lstate_p;
61945
61592
 
61946
- auto &global_sort_state = gstate.rhs_global_sort_state;
61947
- auto &local_sort_state = lstate.rhs_local_sort_state;
61948
-
61949
- // Initialize local state (if necessary)
61950
- if (!local_sort_state.initialized) {
61951
- local_sort_state.Initialize(global_sort_state, BufferManager::GetBufferManager(context.client));
61952
- }
61953
-
61954
- // Obtain sorting columns
61955
- auto &join_keys = lstate.rhs_keys;
61956
- join_keys.Reset();
61957
- lstate.rhs_executor.Execute(input, join_keys);
61958
-
61959
- // Count the NULLs so we can exclude them later
61960
- lstate.rhs_has_null += PiecewiseMergeNulls(join_keys, conditions);
61961
- lstate.rhs_count += join_keys.size();
61962
-
61963
- // Sink the data into the local sort state
61964
- SinkPiecewiseMergeChunk(local_sort_state, join_keys, input);
61593
+ gstate.Sink(input, lstate);
61965
61594
 
61966
- // When sorting data reaches a certain size, we sort it
61967
- if (local_sort_state.SizeInBytes() >= gstate.memory_per_thread) {
61968
- local_sort_state.Sort(global_sort_state, true);
61969
- }
61970
61595
  return SinkResultType::NEED_MORE_INPUT;
61971
61596
  }
61972
61597
 
@@ -61974,105 +61599,33 @@ void PhysicalPiecewiseMergeJoin::Combine(ExecutionContext &context, GlobalSinkSt
61974
61599
  LocalSinkState &lstate_p) const {
61975
61600
  auto &gstate = (MergeJoinGlobalState &)gstate_p;
61976
61601
  auto &lstate = (MergeJoinLocalState &)lstate_p;
61977
- gstate.rhs_global_sort_state.AddLocalState(lstate.rhs_local_sort_state);
61978
- lock_guard<mutex> locked(gstate.lock);
61979
- gstate.rhs_has_null += lstate.rhs_has_null;
61980
- gstate.rhs_count += lstate.rhs_count;
61602
+ gstate.table->Combine(lstate.table);
61981
61603
  auto &client_profiler = QueryProfiler::Get(context.client);
61982
61604
 
61983
- context.thread.profiler.Flush(this, &lstate.rhs_executor, "rhs_executor", 1);
61605
+ context.thread.profiler.Flush(this, &lstate.table.executor, "rhs_executor", 1);
61984
61606
  client_profiler.Flush(context.thread.profiler);
61985
61607
  }
61986
61608
 
61987
61609
  //===--------------------------------------------------------------------===//
61988
61610
  // Finalize
61989
61611
  //===--------------------------------------------------------------------===//
61990
- class MergeJoinFinalizeTask : public ExecutorTask {
61991
- public:
61992
- MergeJoinFinalizeTask(shared_ptr<Event> event_p, ClientContext &context, MergeJoinGlobalState &state)
61993
- : ExecutorTask(context), event(move(event_p)), context(context), state(state) {
61994
- }
61995
-
61996
- TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
61997
- // Initialize merge sorted and iterate until done
61998
- auto &global_sort_state = state.rhs_global_sort_state;
61999
- MergeSorter merge_sorter(global_sort_state, BufferManager::GetBufferManager(context));
62000
- merge_sorter.PerformInMergeRound();
62001
- event->FinishTask();
62002
-
62003
- return TaskExecutionResult::TASK_FINISHED;
62004
- }
62005
-
62006
- private:
62007
- shared_ptr<Event> event;
62008
- ClientContext &context;
62009
- MergeJoinGlobalState &state;
62010
- };
62011
-
62012
- class MergeJoinFinalizeEvent : public Event {
62013
- public:
62014
- MergeJoinFinalizeEvent(MergeJoinGlobalState &gstate_p, Pipeline &pipeline_p)
62015
- : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p) {
62016
- }
62017
-
62018
- MergeJoinGlobalState &gstate;
62019
- Pipeline &pipeline;
62020
-
62021
- public:
62022
- void Schedule() override {
62023
- auto &context = pipeline.GetClientContext();
62024
-
62025
- // Schedule tasks equal to the number of threads, which will each merge multiple partitions
62026
- auto &ts = TaskScheduler::GetScheduler(context);
62027
- idx_t num_threads = ts.NumberOfThreads();
62028
-
62029
- vector<unique_ptr<Task>> merge_tasks;
62030
- for (idx_t tnum = 0; tnum < num_threads; tnum++) {
62031
- merge_tasks.push_back(make_unique<MergeJoinFinalizeTask>(shared_from_this(), context, gstate));
62032
- }
62033
- SetTasks(move(merge_tasks));
62034
- }
62035
-
62036
- void FinishEvent() override {
62037
- auto &global_sort_state = gstate.rhs_global_sort_state;
62038
-
62039
- global_sort_state.CompleteMergeRound(true);
62040
- if (global_sort_state.sorted_blocks.size() > 1) {
62041
- // Multiple blocks remaining: Schedule the next round
62042
- PhysicalPiecewiseMergeJoin::ScheduleMergeTasks(pipeline, *this, gstate);
62043
- }
62044
- }
62045
- };
62046
-
62047
- void PhysicalPiecewiseMergeJoin::ScheduleMergeTasks(Pipeline &pipeline, Event &event, MergeJoinGlobalState &gstate) {
62048
- // Initialize global sort state for a round of merging
62049
- gstate.rhs_global_sort_state.InitializeMergeRound();
62050
- auto new_event = make_shared<MergeJoinFinalizeEvent>(gstate, pipeline);
62051
- event.InsertEvent(move(new_event));
62052
- }
62053
-
62054
61612
  SinkFinalizeType PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
62055
61613
  GlobalSinkState &gstate_p) const {
62056
61614
  auto &gstate = (MergeJoinGlobalState &)gstate_p;
62057
- auto &global_sort_state = gstate.rhs_global_sort_state;
61615
+ auto &global_sort_state = gstate.table->global_sort_state;
62058
61616
 
62059
61617
  if (IsRightOuterJoin(join_type)) {
62060
61618
  // for FULL/RIGHT OUTER JOIN, initialize found_match to false for every tuple
62061
- gstate.rhs_found_match = unique_ptr<bool[]>(new bool[gstate.Count()]);
62062
- memset(gstate.rhs_found_match.get(), 0, sizeof(bool) * gstate.Count());
61619
+ gstate.table->IntializeMatches();
62063
61620
  }
62064
61621
  if (global_sort_state.sorted_blocks.empty() && EmptyResultIfRHSIsEmpty()) {
62065
61622
  // Empty input!
62066
61623
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
62067
61624
  }
62068
61625
 
62069
- // Prepare for merge sort phase
62070
- global_sort_state.PrepareMergePhase();
61626
+ // Sort the current input child
61627
+ gstate.table->Finalize(pipeline, event);
62071
61628
 
62072
- // Start the merge phase or finish if a merge is not necessary
62073
- if (global_sort_state.sorted_blocks.size() > 1) {
62074
- PhysicalPiecewiseMergeJoin::ScheduleMergeTasks(pipeline, event, gstate);
62075
- }
62076
61629
  return SinkFinalizeType::READY;
62077
61630
  }
62078
61631
 
@@ -62081,16 +61634,16 @@ SinkFinalizeType PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, Event
62081
61634
  //===--------------------------------------------------------------------===//
62082
61635
  class PiecewiseMergeJoinState : public OperatorState {
62083
61636
  public:
61637
+ using LocalSortedTable = PhysicalRangeJoin::LocalSortedTable;
61638
+
62084
61639
  explicit PiecewiseMergeJoinState(const PhysicalPiecewiseMergeJoin &op, BufferManager &buffer_manager,
62085
61640
  bool force_external)
62086
61641
  : op(op), buffer_manager(buffer_manager), force_external(force_external), left_position(0), first_fetch(true),
62087
61642
  finished(true), right_position(0), right_chunk_index(0) {
62088
61643
  vector<LogicalType> condition_types;
62089
61644
  for (auto &order : op.lhs_orders) {
62090
- lhs_executor.AddExpression(*order.expression);
62091
61645
  condition_types.push_back(order.expression->return_type);
62092
61646
  }
62093
- lhs_keys.Initialize(condition_types);
62094
61647
  if (IsLeftOuterJoin(op.join_type)) {
62095
61648
  lhs_found_match = unique_ptr<bool[]>(new bool[STANDARD_VECTOR_SIZE]);
62096
61649
  memset(lhs_found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE);
@@ -62115,16 +61668,12 @@ public:
62115
61668
  bool force_external;
62116
61669
 
62117
61670
  // Block sorting
62118
- DataChunk lhs_keys;
62119
61671
  DataChunk lhs_payload;
62120
- ExpressionExecutor lhs_executor;
62121
61672
  unique_ptr<bool[]> lhs_found_match;
62122
61673
  vector<BoundOrderByNode> lhs_order;
62123
61674
  RowLayout lhs_layout;
62124
- unique_ptr<LocalSortState> lhs_local_state;
61675
+ unique_ptr<LocalSortedTable> lhs_local_table;
62125
61676
  unique_ptr<GlobalSortState> lhs_global_state;
62126
- idx_t lhs_count;
62127
- idx_t lhs_has_null;
62128
61677
 
62129
61678
  // Simple scans
62130
61679
  idx_t left_position;
@@ -62144,23 +61693,14 @@ public:
62144
61693
 
62145
61694
  public:
62146
61695
  void ResolveJoinKeys(DataChunk &input) {
62147
- // resolve the join keys for the input
62148
- lhs_keys.Reset();
62149
- lhs_executor.Execute(input, lhs_keys);
62150
-
62151
- // Count the NULLs so we can exclude them later
62152
- lhs_count = lhs_keys.size();
62153
- lhs_has_null = PiecewiseMergeNulls(lhs_keys, op.conditions);
62154
-
62155
61696
  // sort by join key
62156
61697
  lhs_global_state = make_unique<GlobalSortState>(buffer_manager, lhs_order, lhs_layout);
62157
- lhs_local_state = make_unique<LocalSortState>();
62158
- lhs_local_state->Initialize(*lhs_global_state, buffer_manager);
62159
- SinkPiecewiseMergeChunk(*lhs_local_state, lhs_keys, input);
61698
+ lhs_local_table = make_unique<LocalSortedTable>(op, 0);
61699
+ lhs_local_table->Sink(input, *lhs_global_state);
62160
61700
 
62161
- // Set external (can be force with the PRAGMA)
61701
+ // Set external (can be forced with the PRAGMA)
62162
61702
  lhs_global_state->external = force_external;
62163
- lhs_global_state->AddLocalState(*lhs_local_state);
61703
+ lhs_global_state->AddLocalState(lhs_local_table->local_sort_state);
62164
61704
  lhs_global_state->PrepareMergePhase();
62165
61705
  while (lhs_global_state->sorted_blocks.size() > 1) {
62166
61706
  MergeSorter merge_sorter(*lhs_global_state, buffer_manager);
@@ -62176,12 +61716,14 @@ public:
62176
61716
  scanner.Scan(lhs_payload);
62177
61717
 
62178
61718
  // Recompute the sorted keys from the sorted input
62179
- lhs_keys.Reset();
62180
- lhs_executor.Execute(lhs_payload, lhs_keys);
61719
+ lhs_local_table->keys.Reset();
61720
+ lhs_local_table->executor.Execute(lhs_payload, lhs_local_table->keys);
62181
61721
  }
62182
61722
 
62183
61723
  void Finalize(PhysicalOperator *op, ExecutionContext &context) override {
62184
- context.thread.profiler.Flush(op, &lhs_executor, "lhs_executor", 0);
61724
+ if (lhs_local_table) {
61725
+ context.thread.profiler.Flush(op, &lhs_local_table->executor, "lhs_executor", 0);
61726
+ }
62185
61727
  }
62186
61728
  };
62187
61729
 
@@ -62212,76 +61754,17 @@ struct BlockMergeInfo {
62212
61754
  GlobalSortState &state;
62213
61755
  //! The block being scanned
62214
61756
  const idx_t block_idx;
62215
- //! The start position being read from the block
62216
- const idx_t base_idx;
62217
61757
  //! The number of not-NULL values in the block (they are at the end)
62218
61758
  const idx_t not_null;
62219
61759
  //! The current offset in the block
62220
61760
  idx_t &entry_idx;
62221
61761
  SelectionVector result;
62222
61762
 
62223
- BlockMergeInfo(GlobalSortState &state, idx_t block_idx, idx_t base_idx, idx_t &entry_idx, idx_t not_null)
62224
- : state(state), block_idx(block_idx), base_idx(base_idx), not_null(not_null), entry_idx(entry_idx),
62225
- result(STANDARD_VECTOR_SIZE) {
61763
+ BlockMergeInfo(GlobalSortState &state, idx_t block_idx, idx_t &entry_idx, idx_t not_null)
61764
+ : state(state), block_idx(block_idx), not_null(not_null), entry_idx(entry_idx), result(STANDARD_VECTOR_SIZE) {
62226
61765
  }
62227
61766
  };
62228
61767
 
62229
- static idx_t SliceSortedPayload(DataChunk &payload, BlockMergeInfo &info, const idx_t result_count,
62230
- const idx_t left_cols = 0) {
62231
- // There should only be one sorted block if they have been sorted
62232
- D_ASSERT(info.state.sorted_blocks.size() == 1);
62233
- SBScanState read_state(info.state.buffer_manager, info.state);
62234
- read_state.sb = info.state.sorted_blocks[0].get();
62235
- auto &sorted_data = *read_state.sb->payload_data;
62236
-
62237
- // We have to create pointers for the entire block
62238
- // because unswizzle works on ranges not selections.
62239
- const auto first_idx = info.result.get_index(0);
62240
- read_state.SetIndices(info.block_idx, info.base_idx + first_idx);
62241
- read_state.PinData(sorted_data);
62242
- const auto data_ptr = read_state.DataPtr(sorted_data);
62243
-
62244
- // Set up a batch of pointers to scan data from
62245
- Vector addresses(LogicalType::POINTER, result_count);
62246
- auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
62247
-
62248
- // Set up the data pointers for the values that are actually referenced
62249
- // and normalise the selection vector to zero
62250
- data_ptr_t row_ptr = data_ptr;
62251
- const idx_t &row_width = sorted_data.layout.GetRowWidth();
62252
-
62253
- auto prev_idx = first_idx;
62254
- info.result.set_index(0, 0);
62255
- idx_t addr_count = 0;
62256
- data_pointers[addr_count++] = row_ptr;
62257
- for (idx_t i = 1; i < result_count; ++i) {
62258
- const auto row_idx = info.result.get_index(i);
62259
- info.result.set_index(i, row_idx - first_idx);
62260
- if (row_idx == prev_idx) {
62261
- continue;
62262
- }
62263
- row_ptr += (row_idx - prev_idx) * row_width;
62264
- data_pointers[addr_count++] = row_ptr;
62265
- prev_idx = row_idx;
62266
- }
62267
- // Unswizzle the offsets back to pointers (if needed)
62268
- if (!sorted_data.layout.AllConstant() && info.state.external) {
62269
- const auto next = prev_idx + 1;
62270
- RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle->Ptr(), next);
62271
- }
62272
-
62273
- // Deserialize the payload data
62274
- auto sel = FlatVector::IncrementalSelectionVector();
62275
- for (idx_t col_idx = 0; col_idx < sorted_data.layout.ColumnCount(); col_idx++) {
62276
- const auto col_offset = sorted_data.layout.GetOffsets()[col_idx];
62277
- auto &col = payload.data[left_cols + col_idx];
62278
- RowOperations::Gather(addresses, *sel, col, *sel, addr_count, col_offset, col_idx);
62279
- col.Slice(info.result, result_count);
62280
- }
62281
-
62282
- return first_idx;
62283
- }
62284
-
62285
61768
  static void MergeJoinPinSortingBlock(SBScanState &scan, const idx_t block_idx) {
62286
61769
  scan.SetIndices(block_idx, 0);
62287
61770
  scan.PinRadix(block_idx);
@@ -62303,7 +61786,7 @@ static idx_t MergeJoinSimpleBlocks(PiecewiseMergeJoinState &lstate, MergeJoinGlo
62303
61786
 
62304
61787
  // The sort parameters should all be the same
62305
61788
  auto &lsort = *lstate.lhs_global_state;
62306
- auto &rsort = rstate.rhs_global_sort_state;
61789
+ auto &rsort = rstate.table->global_sort_state;
62307
61790
  D_ASSERT(lsort.sort_layout.all_constant == rsort.sort_layout.all_constant);
62308
61791
  const auto all_constant = lsort.sort_layout.all_constant;
62309
61792
  D_ASSERT(lsort.external == rsort.external);
@@ -62316,7 +61799,7 @@ static idx_t MergeJoinSimpleBlocks(PiecewiseMergeJoinState &lstate, MergeJoinGlo
62316
61799
 
62317
61800
  const idx_t l_block_idx = 0;
62318
61801
  idx_t l_entry_idx = 0;
62319
- const auto lhs_not_null = lstate.lhs_count - lstate.lhs_has_null;
61802
+ const auto lhs_not_null = lstate.lhs_local_table->count - lstate.lhs_local_table->has_null;
62320
61803
  MergeJoinPinSortingBlock(lread, l_block_idx);
62321
61804
  auto l_ptr = MergeJoinRadixPtr(lread, l_entry_idx);
62322
61805
 
@@ -62335,7 +61818,8 @@ static idx_t MergeJoinSimpleBlocks(PiecewiseMergeJoinState &lstate, MergeJoinGlo
62335
61818
  MergeJoinPinSortingBlock(rread, r_block_idx);
62336
61819
 
62337
61820
  auto &rblock = rread.sb->radix_sorting_data[r_block_idx];
62338
- const auto r_not_null = SortedBlockNotNull(right_base, rblock.count, rstate.rhs_count - rstate.rhs_has_null);
61821
+ const auto r_not_null =
61822
+ SortedBlockNotNull(right_base, rblock.count, rstate.table->count - rstate.table->has_null);
62339
61823
  if (r_not_null == 0) {
62340
61824
  break;
62341
61825
  }
@@ -62381,6 +61865,7 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
62381
61865
  auto &gstate = (MergeJoinGlobalState &)*sink_state;
62382
61866
 
62383
61867
  state.ResolveJoinKeys(input);
61868
+ auto &lhs_table = *state.lhs_local_table;
62384
61869
 
62385
61870
  // perform the actual join
62386
61871
  bool found_match[STANDARD_VECTOR_SIZE];
@@ -62388,7 +61873,7 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
62388
61873
  MergeJoinSimpleBlocks(state, gstate, found_match, conditions[0].comparison);
62389
61874
 
62390
61875
  // use the sorted payload
62391
- const auto lhs_not_null = state.lhs_count - state.lhs_has_null;
61876
+ const auto lhs_not_null = lhs_table.count - lhs_table.has_null;
62392
61877
  auto &payload = state.lhs_payload;
62393
61878
 
62394
61879
  // now construct the result based on the join result
@@ -62396,19 +61881,19 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
62396
61881
  case JoinType::MARK: {
62397
61882
  // The only part of the join keys that is actually used is the validity mask.
62398
61883
  // Since the payload is sorted, we can just set the tail end of the validity masks to invalid.
62399
- for (auto &key : state.lhs_keys.data) {
62400
- key.Normalify(state.lhs_keys.size());
61884
+ for (auto &key : lhs_table.keys.data) {
61885
+ key.Normalify(lhs_table.keys.size());
62401
61886
  auto &mask = FlatVector::Validity(key);
62402
61887
  if (mask.AllValid()) {
62403
61888
  continue;
62404
61889
  }
62405
61890
  mask.SetAllValid(lhs_not_null);
62406
- for (idx_t i = lhs_not_null; i < state.lhs_count; ++i) {
61891
+ for (idx_t i = lhs_not_null; i < lhs_table.count; ++i) {
62407
61892
  mask.SetInvalid(i);
62408
61893
  }
62409
61894
  }
62410
61895
  // So we make a set of keys that have the validity mask set for the
62411
- PhysicalJoin::ConstructMarkJoinResult(state.lhs_keys, payload, chunk, found_match, gstate.rhs_has_null);
61896
+ PhysicalJoin::ConstructMarkJoinResult(lhs_table.keys, payload, chunk, found_match, gstate.table->has_null);
62412
61897
  break;
62413
61898
  }
62414
61899
  case JoinType::SEMI:
@@ -62468,8 +61953,8 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
62468
61953
 
62469
61954
  if (comp_res <= cmp) {
62470
61955
  // left side smaller: found match
62471
- l.result.set_index(result_count, sel_t(l.entry_idx - l.base_idx));
62472
- r.result.set_index(result_count, sel_t(r.entry_idx - r.base_idx));
61956
+ l.result.set_index(result_count, sel_t(l.entry_idx));
61957
+ r.result.set_index(result_count, sel_t(r.entry_idx));
62473
61958
  result_count++;
62474
61959
  // move left side forward
62475
61960
  l.entry_idx++;
@@ -62496,35 +61981,11 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
62496
61981
  return result_count;
62497
61982
  }
62498
61983
 
62499
- static idx_t SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right, const SelectionVector *sel,
62500
- idx_t count, SelectionVector *true_sel) {
62501
- switch (condition) {
62502
- case ExpressionType::COMPARE_NOTEQUAL:
62503
- return VectorOperations::NotEquals(left, right, sel, count, true_sel, nullptr);
62504
- case ExpressionType::COMPARE_LESSTHAN:
62505
- return VectorOperations::LessThan(left, right, sel, count, true_sel, nullptr);
62506
- case ExpressionType::COMPARE_GREATERTHAN:
62507
- return VectorOperations::GreaterThan(left, right, sel, count, true_sel, nullptr);
62508
- case ExpressionType::COMPARE_LESSTHANOREQUALTO:
62509
- return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, nullptr);
62510
- case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
62511
- return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, nullptr);
62512
- case ExpressionType::COMPARE_DISTINCT_FROM:
62513
- return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
62514
- case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
62515
- case ExpressionType::COMPARE_EQUAL:
62516
- default:
62517
- throw InternalException("Unsupported comparison type for PhysicalPiecewiseMergeJoin");
62518
- }
62519
-
62520
- return count;
62521
- }
62522
-
62523
61984
  OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &input,
62524
61985
  DataChunk &chunk, OperatorState &state_p) const {
62525
61986
  auto &state = (PiecewiseMergeJoinState &)state_p;
62526
61987
  auto &gstate = (MergeJoinGlobalState &)*sink_state;
62527
- auto &rsorted = *gstate.rhs_global_sort_state.sorted_blocks[0];
61988
+ auto &rsorted = *gstate.table->global_sort_state.sorted_blocks[0];
62528
61989
  const auto left_cols = input.ColumnCount();
62529
61990
  const auto tail_cols = conditions.size() - 1;
62530
61991
  do {
@@ -62550,14 +62011,15 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
62550
62011
  return OperatorResultType::NEED_MORE_INPUT;
62551
62012
  }
62552
62013
 
62553
- const auto lhs_not_null = state.lhs_count - state.lhs_has_null;
62554
- BlockMergeInfo left_info(*state.lhs_global_state, 0, 0, state.left_position, lhs_not_null);
62014
+ auto &lhs_table = *state.lhs_local_table;
62015
+ const auto lhs_not_null = lhs_table.count - lhs_table.has_null;
62016
+ BlockMergeInfo left_info(*state.lhs_global_state, 0, state.left_position, lhs_not_null);
62555
62017
 
62556
62018
  const auto &rblock = rsorted.radix_sorting_data[state.right_chunk_index];
62557
62019
  const auto rhs_not_null =
62558
- SortedBlockNotNull(state.right_base, rblock.count, gstate.rhs_count - gstate.rhs_has_null);
62559
- BlockMergeInfo right_info(gstate.rhs_global_sort_state, state.right_chunk_index, state.right_position,
62560
- state.right_position, rhs_not_null);
62020
+ SortedBlockNotNull(state.right_base, rblock.count, gstate.table->count - gstate.table->has_null);
62021
+ BlockMergeInfo right_info(gstate.table->global_sort_state, state.right_chunk_index, state.right_position,
62022
+ rhs_not_null);
62561
62023
 
62562
62024
  idx_t result_count = MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison);
62563
62025
  if (result_count == 0) {
@@ -62576,7 +62038,8 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
62576
62038
  for (idx_t c = 0; c < state.lhs_payload.ColumnCount(); ++c) {
62577
62039
  chunk.data[c].Slice(state.lhs_payload.data[c], left_info.result, result_count);
62578
62040
  }
62579
- const auto first_idx = SliceSortedPayload(chunk, right_info, result_count, left_cols);
62041
+ SliceSortedPayload(chunk, right_info.state, right_info.block_idx, right_info.result, result_count,
62042
+ left_cols);
62580
62043
  chunk.SetCardinality(result_count);
62581
62044
 
62582
62045
  auto sel = FlatVector::IncrementalSelectionVector();
@@ -62590,7 +62053,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
62590
62053
 
62591
62054
  auto tail_count = result_count;
62592
62055
  for (size_t cmp_idx = 1; cmp_idx < conditions.size(); ++cmp_idx) {
62593
- Vector left(state.lhs_keys.data[cmp_idx]);
62056
+ Vector left(lhs_table.keys.data[cmp_idx]);
62594
62057
  left.Slice(left_info.result, result_count);
62595
62058
 
62596
62059
  auto &right = state.rhs_keys.data[cmp_idx];
@@ -62618,11 +62081,10 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
62618
62081
  state.lhs_found_match[left_info.result[sel->get_index(i)]] = true;
62619
62082
  }
62620
62083
  }
62621
- if (gstate.rhs_found_match) {
62084
+ if (gstate.table->found_match) {
62622
62085
  // Absolute position of the block + start position inside that block
62623
- const idx_t base_index = right_info.base_idx + first_idx;
62624
62086
  for (idx_t i = 0; i < result_count; i++) {
62625
- gstate.rhs_found_match[base_index + right_info.result[sel->get_index(i)]] = true;
62087
+ gstate.table->found_match[state.right_base + right_info.result[sel->get_index(i)]] = true;
62626
62088
  }
62627
62089
  }
62628
62090
  chunk.SetCardinality(result_count);
@@ -62639,7 +62101,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::Execute(ExecutionContext &context
62639
62101
  if (gstate.Count() == 0) {
62640
62102
  // empty RHS
62641
62103
  if (!EmptyResultIfRHSIsEmpty()) {
62642
- ConstructEmptyJoinResult(join_type, gstate.rhs_has_null, input, chunk);
62104
+ ConstructEmptyJoinResult(join_type, gstate.table->has_null, input, chunk);
62643
62105
  return OperatorResultType::NEED_MORE_INPUT;
62644
62106
  } else {
62645
62107
  return OperatorResultType::FINISHED;
@@ -62697,7 +62159,7 @@ void PhysicalPiecewiseMergeJoin::GetData(ExecutionContext &context, DataChunk &r
62697
62159
  lock_guard<mutex> l(state.lock);
62698
62160
  if (!state.scanner) {
62699
62161
  // Initialize scanner (if not yet initialized)
62700
- auto &sort_state = sink.rhs_global_sort_state;
62162
+ auto &sort_state = sink.table->global_sort_state;
62701
62163
  if (sort_state.sorted_blocks.empty()) {
62702
62164
  return;
62703
62165
  }
@@ -62706,11 +62168,12 @@ void PhysicalPiecewiseMergeJoin::GetData(ExecutionContext &context, DataChunk &r
62706
62168
 
62707
62169
  // if the LHS is exhausted in a FULL/RIGHT OUTER JOIN, we scan the found_match for any chunks we
62708
62170
  // still need to output
62709
- const auto found_match = sink.rhs_found_match.get();
62171
+ const auto found_match = sink.table->found_match.get();
62710
62172
 
62711
- // ConstructFullOuterJoinResult(sink.rhs_found_match.get(), sink.right_chunks, chunk, state.right_outer_position);
62173
+ // ConstructFullOuterJoinResult(sink.table->found_match.get(), sink.right_chunks, chunk,
62174
+ // state.right_outer_position);
62712
62175
  DataChunk rhs_chunk;
62713
- rhs_chunk.Initialize(sink.rhs_global_sort_state.payload_layout.GetTypes());
62176
+ rhs_chunk.Initialize(sink.table->global_sort_state.payload_layout.GetTypes());
62714
62177
  SelectionVector rsel(STANDARD_VECTOR_SIZE);
62715
62178
  for (;;) {
62716
62179
  // Read the next sorted chunk
@@ -62749,6 +62212,353 @@ void PhysicalPiecewiseMergeJoin::GetData(ExecutionContext &context, DataChunk &r
62749
62212
  }
62750
62213
 
62751
62214
  } // namespace duckdb
62215
+
62216
+
62217
+
62218
+
62219
+
62220
+
62221
+
62222
+
62223
+
62224
+
62225
+
62226
+
62227
+
62228
+ #include <thread>
62229
+
62230
+ namespace duckdb {
62231
+
62232
+ PhysicalRangeJoin::LocalSortedTable::LocalSortedTable(const PhysicalRangeJoin &op, const idx_t child)
62233
+ : op(op), has_null(0), count(0) {
62234
+ // Initialize order clause expression executor and key DataChunk
62235
+ vector<LogicalType> types;
62236
+ for (const auto &cond : op.conditions) {
62237
+ const auto &expr = child ? cond.right : cond.left;
62238
+ executor.AddExpression(*expr);
62239
+
62240
+ types.push_back(expr->return_type);
62241
+ }
62242
+ keys.Initialize(types);
62243
+ }
62244
+
62245
+ void PhysicalRangeJoin::LocalSortedTable::Sink(DataChunk &input, GlobalSortState &global_sort_state) {
62246
+ // Initialize local state (if necessary)
62247
+ if (!local_sort_state.initialized) {
62248
+ local_sort_state.Initialize(global_sort_state, global_sort_state.buffer_manager);
62249
+ }
62250
+
62251
+ // Obtain sorting columns
62252
+ keys.Reset();
62253
+ executor.Execute(input, keys);
62254
+
62255
+ // Count the NULLs so we can exclude them later
62256
+ has_null += MergeNulls(op.conditions);
62257
+ count += keys.size();
62258
+
62259
+ // Only sort the primary key
62260
+ DataChunk join_head;
62261
+ join_head.data.emplace_back(Vector(keys.data[0]));
62262
+ join_head.SetCardinality(keys.size());
62263
+
62264
+ // Sink the data into the local sort state
62265
+ local_sort_state.SinkChunk(join_head, input);
62266
+ }
62267
+
62268
+ PhysicalRangeJoin::GlobalSortedTable::GlobalSortedTable(ClientContext &context, const vector<BoundOrderByNode> &orders,
62269
+ RowLayout &payload_layout)
62270
+ : global_sort_state(BufferManager::GetBufferManager(context), orders, payload_layout), has_null(0), count(0),
62271
+ memory_per_thread(0) {
62272
+ D_ASSERT(orders.size() == 1);
62273
+
62274
+ // Set external (can be force with the PRAGMA)
62275
+ auto &config = ClientConfig::GetConfig(context);
62276
+ global_sort_state.external = config.force_external;
62277
+ // Memory usage per thread should scale with max mem / num threads
62278
+ // We take 1/4th of this, to be conservative
62279
+ idx_t max_memory = global_sort_state.buffer_manager.GetMaxMemory();
62280
+ idx_t num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
62281
+ memory_per_thread = (max_memory / num_threads) / 4;
62282
+ }
62283
+
62284
+ void PhysicalRangeJoin::GlobalSortedTable::Combine(LocalSortedTable &ltable) {
62285
+ global_sort_state.AddLocalState(ltable.local_sort_state);
62286
+ has_null += ltable.has_null;
62287
+ count += ltable.count;
62288
+ }
62289
+
62290
+ void PhysicalRangeJoin::GlobalSortedTable::IntializeMatches() {
62291
+ found_match = unique_ptr<bool[]>(new bool[Count()]);
62292
+ memset(found_match.get(), 0, sizeof(bool) * Count());
62293
+ }
62294
+
62295
+ void PhysicalRangeJoin::GlobalSortedTable::Print() {
62296
+ global_sort_state.Print();
62297
+ }
62298
+
62299
+ class RangeJoinMergeTask : public ExecutorTask {
62300
+ public:
62301
+ using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
62302
+
62303
+ public:
62304
+ RangeJoinMergeTask(shared_ptr<Event> event_p, ClientContext &context, GlobalSortedTable &table)
62305
+ : ExecutorTask(context), event(move(event_p)), context(context), table(table) {
62306
+ }
62307
+
62308
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
62309
+ // Initialize iejoin sorted and iterate until done
62310
+ auto &global_sort_state = table.global_sort_state;
62311
+ MergeSorter merge_sorter(global_sort_state, BufferManager::GetBufferManager(context));
62312
+ merge_sorter.PerformInMergeRound();
62313
+ event->FinishTask();
62314
+
62315
+ return TaskExecutionResult::TASK_FINISHED;
62316
+ }
62317
+
62318
+ private:
62319
+ shared_ptr<Event> event;
62320
+ ClientContext &context;
62321
+ GlobalSortedTable &table;
62322
+ };
62323
+
62324
+ class RangeJoinMergeEvent : public Event {
62325
+ public:
62326
+ using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
62327
+
62328
+ public:
62329
+ RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
62330
+ : Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
62331
+ }
62332
+
62333
+ GlobalSortedTable &table;
62334
+ Pipeline &pipeline;
62335
+
62336
+ public:
62337
+ void Schedule() override {
62338
+ auto &context = pipeline.GetClientContext();
62339
+
62340
+ // Schedule tasks equal to the number of threads, which will each merge multiple partitions
62341
+ auto &ts = TaskScheduler::GetScheduler(context);
62342
+ idx_t num_threads = ts.NumberOfThreads();
62343
+
62344
+ vector<unique_ptr<Task>> iejoin_tasks;
62345
+ for (idx_t tnum = 0; tnum < num_threads; tnum++) {
62346
+ iejoin_tasks.push_back(make_unique<RangeJoinMergeTask>(shared_from_this(), context, table));
62347
+ }
62348
+ SetTasks(move(iejoin_tasks));
62349
+ }
62350
+
62351
+ void FinishEvent() override {
62352
+ auto &global_sort_state = table.global_sort_state;
62353
+
62354
+ global_sort_state.CompleteMergeRound(true);
62355
+ if (global_sort_state.sorted_blocks.size() > 1) {
62356
+ // Multiple blocks remaining: Schedule the next round
62357
+ table.ScheduleMergeTasks(pipeline, *this);
62358
+ }
62359
+ }
62360
+ };
62361
+
62362
+ void PhysicalRangeJoin::GlobalSortedTable::ScheduleMergeTasks(Pipeline &pipeline, Event &event) {
62363
+ // Initialize global sort state for a round of merging
62364
+ global_sort_state.InitializeMergeRound();
62365
+ auto new_event = make_shared<RangeJoinMergeEvent>(*this, pipeline);
62366
+ event.InsertEvent(move(new_event));
62367
+ }
62368
+
62369
+ void PhysicalRangeJoin::GlobalSortedTable::Finalize(Pipeline &pipeline, Event &event) {
62370
+ // Prepare for merge sort phase
62371
+ global_sort_state.PrepareMergePhase();
62372
+
62373
+ // Start the merge phase or finish if a merge is not necessary
62374
+ if (global_sort_state.sorted_blocks.size() > 1) {
62375
+ ScheduleMergeTasks(pipeline, event);
62376
+ }
62377
+ }
62378
+
62379
+ PhysicalRangeJoin::PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType type, unique_ptr<PhysicalOperator> left,
62380
+ unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
62381
+ idx_t estimated_cardinality)
62382
+ : PhysicalComparisonJoin(op, type, move(cond), join_type, estimated_cardinality) {
62383
+ // Reorder the conditions so that ranges are at the front.
62384
+ // TODO: use stats to improve the choice?
62385
+ // TODO: Prefer fixed length types?
62386
+ if (conditions.size() > 1) {
62387
+ auto conditions_p = std::move(conditions);
62388
+ conditions.resize(conditions_p.size());
62389
+ idx_t range_position = 0;
62390
+ idx_t other_position = conditions_p.size();
62391
+ for (idx_t i = 0; i < conditions_p.size(); ++i) {
62392
+ switch (conditions_p[i].comparison) {
62393
+ case ExpressionType::COMPARE_LESSTHAN:
62394
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
62395
+ case ExpressionType::COMPARE_GREATERTHAN:
62396
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
62397
+ conditions[range_position++] = std::move(conditions_p[i]);
62398
+ break;
62399
+ default:
62400
+ conditions[--other_position] = std::move(conditions_p[i]);
62401
+ break;
62402
+ }
62403
+ }
62404
+ }
62405
+
62406
+ children.push_back(move(left));
62407
+ children.push_back(move(right));
62408
+ }
62409
+
62410
+ idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition> &conditions) {
62411
+ // Merge the validity masks of the comparison keys into the primary
62412
+ // Return the number of NULLs in the resulting chunk
62413
+ D_ASSERT(keys.ColumnCount() > 0);
62414
+ const auto count = keys.size();
62415
+
62416
+ size_t all_constant = 0;
62417
+ for (auto &v : keys.data) {
62418
+ if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) {
62419
+ ++all_constant;
62420
+ }
62421
+ }
62422
+
62423
+ auto &primary = keys.data[0];
62424
+ if (all_constant == keys.data.size()) {
62425
+ // Either all NULL or no NULLs
62426
+ for (auto &v : keys.data) {
62427
+ if (ConstantVector::IsNull(v)) {
62428
+ ConstantVector::SetNull(primary, true);
62429
+ return count;
62430
+ }
62431
+ }
62432
+ return 0;
62433
+ } else if (keys.ColumnCount() > 1) {
62434
+ // Normalify the primary, as it will need to merge arbitrary validity masks
62435
+ primary.Normalify(count);
62436
+ auto &pvalidity = FlatVector::Validity(primary);
62437
+ D_ASSERT(keys.ColumnCount() == conditions.size());
62438
+ for (size_t c = 1; c < keys.data.size(); ++c) {
62439
+ // Skip comparisons that accept NULLs
62440
+ if (conditions[c].comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
62441
+ continue;
62442
+ }
62443
+ // Orrify the rest, as the sort code will do this anyway.
62444
+ auto &v = keys.data[c];
62445
+ VectorData vdata;
62446
+ v.Orrify(count, vdata);
62447
+ auto &vvalidity = vdata.validity;
62448
+ if (vvalidity.AllValid()) {
62449
+ continue;
62450
+ }
62451
+ pvalidity.EnsureWritable();
62452
+ switch (v.GetVectorType()) {
62453
+ case VectorType::FLAT_VECTOR: {
62454
+ // Merge entire entries
62455
+ auto pmask = pvalidity.GetData();
62456
+ const auto entry_count = pvalidity.EntryCount(count);
62457
+ for (idx_t entry_idx = 0; entry_idx < entry_count; ++entry_idx) {
62458
+ pmask[entry_idx] &= vvalidity.GetValidityEntry(entry_idx);
62459
+ }
62460
+ break;
62461
+ }
62462
+ case VectorType::CONSTANT_VECTOR:
62463
+ // All or nothing
62464
+ if (ConstantVector::IsNull(v)) {
62465
+ pvalidity.SetAllInvalid(count);
62466
+ return count;
62467
+ }
62468
+ break;
62469
+ default:
62470
+ // One by one
62471
+ for (idx_t i = 0; i < count; ++i) {
62472
+ const auto idx = vdata.sel->get_index(i);
62473
+ if (!vvalidity.RowIsValidUnsafe(idx)) {
62474
+ pvalidity.SetInvalidUnsafe(i);
62475
+ }
62476
+ }
62477
+ break;
62478
+ }
62479
+ }
62480
+ return count - pvalidity.CountValid(count);
62481
+ } else {
62482
+ return count - VectorOperations::CountNotNull(primary, count);
62483
+ }
62484
+ }
62485
+
62486
+ void PhysicalRangeJoin::SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
62487
+ const SelectionVector &result, const idx_t result_count,
62488
+ const idx_t left_cols) {
62489
+ // There should only be one sorted block if they have been sorted
62490
+ D_ASSERT(state.sorted_blocks.size() == 1);
62491
+ SBScanState read_state(state.buffer_manager, state);
62492
+ read_state.sb = state.sorted_blocks[0].get();
62493
+ auto &sorted_data = *read_state.sb->payload_data;
62494
+
62495
+ read_state.SetIndices(block_idx, 0);
62496
+ read_state.PinData(sorted_data);
62497
+ const auto data_ptr = read_state.DataPtr(sorted_data);
62498
+
62499
+ // Set up a batch of pointers to scan data from
62500
+ Vector addresses(LogicalType::POINTER, result_count);
62501
+ auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
62502
+
62503
+ // Set up the data pointers for the values that are actually referenced
62504
+ const idx_t &row_width = sorted_data.layout.GetRowWidth();
62505
+
62506
+ auto prev_idx = result.get_index(0);
62507
+ SelectionVector gsel(result_count);
62508
+ idx_t addr_count = 0;
62509
+ gsel.set_index(0, addr_count);
62510
+ data_pointers[addr_count] = data_ptr + prev_idx * row_width;
62511
+ for (idx_t i = 1; i < result_count; ++i) {
62512
+ const auto row_idx = result.get_index(i);
62513
+ if (row_idx != prev_idx) {
62514
+ data_pointers[++addr_count] = data_ptr + row_idx * row_width;
62515
+ prev_idx = row_idx;
62516
+ }
62517
+ gsel.set_index(i, addr_count);
62518
+ }
62519
+ ++addr_count;
62520
+
62521
+ // Unswizzle the offsets back to pointers (if needed)
62522
+ if (!sorted_data.layout.AllConstant() && state.external) {
62523
+ RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle->Ptr(),
62524
+ addr_count);
62525
+ }
62526
+
62527
+ // Deserialize the payload data
62528
+ auto sel = FlatVector::IncrementalSelectionVector();
62529
+ for (idx_t col_idx = 0; col_idx < sorted_data.layout.ColumnCount(); col_idx++) {
62530
+ const auto col_offset = sorted_data.layout.GetOffsets()[col_idx];
62531
+ auto &col = payload.data[left_cols + col_idx];
62532
+ RowOperations::Gather(addresses, *sel, col, *sel, addr_count, col_offset, col_idx);
62533
+ col.Slice(gsel, result_count);
62534
+ }
62535
+ }
62536
+
62537
+ idx_t PhysicalRangeJoin::SelectJoinTail(const ExpressionType &condition, Vector &left, Vector &right,
62538
+ const SelectionVector *sel, idx_t count, SelectionVector *true_sel) {
62539
+ switch (condition) {
62540
+ case ExpressionType::COMPARE_NOTEQUAL:
62541
+ return VectorOperations::NotEquals(left, right, sel, count, true_sel, nullptr);
62542
+ case ExpressionType::COMPARE_LESSTHAN:
62543
+ return VectorOperations::LessThan(left, right, sel, count, true_sel, nullptr);
62544
+ case ExpressionType::COMPARE_GREATERTHAN:
62545
+ return VectorOperations::GreaterThan(left, right, sel, count, true_sel, nullptr);
62546
+ case ExpressionType::COMPARE_LESSTHANOREQUALTO:
62547
+ return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, nullptr);
62548
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
62549
+ return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, nullptr);
62550
+ case ExpressionType::COMPARE_DISTINCT_FROM:
62551
+ return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
62552
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
62553
+ case ExpressionType::COMPARE_EQUAL:
62554
+ default:
62555
+ throw InternalException("Unsupported comparison type for PhysicalRangeJoin");
62556
+ }
62557
+
62558
+ return count;
62559
+ }
62560
+
62561
+ } // namespace duckdb
62752
62562
  //===----------------------------------------------------------------------===//
62753
62563
  // DuckDB
62754
62564
  //
@@ -63751,7 +63561,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
63751
63561
  ", HEADER=" + std::to_string(header) +
63752
63562
  (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
63753
63563
  ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
63754
- ", ALL_VARCHAR=" + std::to_string(all_varchar);
63564
+ ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
63755
63565
  }
63756
63566
 
63757
63567
  static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
@@ -65214,9 +65024,14 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
65214
65024
  return;
65215
65025
  }
65216
65026
  if (column >= sql_types.size()) {
65217
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
65218
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
65219
- options.ToString());
65027
+ if (options.ignore_errors) {
65028
+ error_column_overflow = true;
65029
+ return;
65030
+ } else {
65031
+ throw InvalidInputException("Error on line %s: expected %lld values per row, but got more. (%s)",
65032
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(),
65033
+ options.ToString());
65034
+ }
65220
65035
  }
65221
65036
 
65222
65037
  // insert the line number into the chunk
@@ -65268,10 +65083,23 @@ bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
65268
65083
  }
65269
65084
  }
65270
65085
 
65086
+ // Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
65087
+ if (error_column_overflow) {
65088
+ D_ASSERT(options.ignore_errors);
65089
+ error_column_overflow = false;
65090
+ column = 0;
65091
+ return false;
65092
+ }
65093
+
65271
65094
  if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
65272
- throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
65273
- GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
65274
- options.ToString());
65095
+ if (options.ignore_errors) {
65096
+ column = 0;
65097
+ return false;
65098
+ } else {
65099
+ throw InvalidInputException("Error on line %s: expected %lld values per row, but got %d. (%s)",
65100
+ GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
65101
+ options.ToString());
65102
+ }
65275
65103
  }
65276
65104
 
65277
65105
  if (mode == ParserMode::SNIFFING_DIALECT) {
@@ -65305,6 +65133,9 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
65305
65133
  if (parse_chunk.size() == 0) {
65306
65134
  return;
65307
65135
  }
65136
+
65137
+ bool conversion_error_ignored = false;
65138
+
65308
65139
  // convert the columns in the parsed chunk to the types of the table
65309
65140
  insert_chunk.SetCardinality(parse_chunk);
65310
65141
  for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
@@ -65346,26 +65177,56 @@ void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
65346
65177
  success = VectorOperations::TryCast(parse_chunk.data[col_idx], insert_chunk.data[col_idx],
65347
65178
  parse_chunk.size(), &error_message);
65348
65179
  }
65349
- if (!success) {
65350
- string col_name = to_string(col_idx);
65351
- if (col_idx < col_names.size()) {
65352
- col_name = "\"" + col_names[col_idx] + "\"";
65353
- }
65180
+ if (success) {
65181
+ continue;
65182
+ }
65183
+ if (options.ignore_errors) {
65184
+ conversion_error_ignored = true;
65185
+ continue;
65186
+ }
65187
+ string col_name = to_string(col_idx);
65188
+ if (col_idx < col_names.size()) {
65189
+ col_name = "\"" + col_names[col_idx] + "\"";
65190
+ }
65354
65191
 
65355
- if (options.auto_detect) {
65356
- throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
65357
- "options: %s. Consider either increasing the sample size "
65358
- "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
65359
- "or skipping column conversion (ALL_VARCHAR=1)",
65360
- error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
65361
- options.ToString());
65362
- } else {
65363
- throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
65364
- error_message, linenr - parse_chunk.size(), linenr, col_name,
65365
- options.ToString());
65192
+ if (options.auto_detect) {
65193
+ throw InvalidInputException("%s in column %s, between line %llu and %llu. Parser "
65194
+ "options: %s. Consider either increasing the sample size "
65195
+ "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
65196
+ "or skipping column conversion (ALL_VARCHAR=1)",
65197
+ error_message, col_name, linenr - parse_chunk.size() + 1, linenr,
65198
+ options.ToString());
65199
+ } else {
65200
+ throw InvalidInputException("%s between line %llu and %llu in column %s. Parser options: %s ",
65201
+ error_message, linenr - parse_chunk.size(), linenr, col_name,
65202
+ options.ToString());
65203
+ }
65204
+ }
65205
+ }
65206
+ if (conversion_error_ignored) {
65207
+ D_ASSERT(options.ignore_errors);
65208
+ SelectionVector succesful_rows;
65209
+ succesful_rows.Initialize(parse_chunk.size());
65210
+ idx_t sel_size = 0;
65211
+
65212
+ for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
65213
+ bool failed = false;
65214
+ for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
65215
+
65216
+ auto &inserted_column = insert_chunk.data[column_idx];
65217
+ auto &parsed_column = parse_chunk.data[column_idx];
65218
+
65219
+ bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
65220
+ if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
65221
+ failed = true;
65222
+ break;
65366
65223
  }
65367
65224
  }
65225
+ if (!failed) {
65226
+ succesful_rows.set_index(sel_size++, row_idx);
65227
+ }
65368
65228
  }
65229
+ insert_chunk.Slice(succesful_rows, sel_size);
65369
65230
  }
65370
65231
  parse_chunk.Reset();
65371
65232
  }
@@ -79724,6 +79585,11 @@ struct ListAggregateFun {
79724
79585
  static void RegisterFunction(BuiltinFunctions &set);
79725
79586
  };
79726
79587
 
79588
+ struct ListSortFun {
79589
+ static ScalarFunction GetFunction();
79590
+ static void RegisterFunction(BuiltinFunctions &set);
79591
+ };
79592
+
79727
79593
  struct CardinalityFun {
79728
79594
  static void RegisterFunction(BuiltinFunctions &set);
79729
79595
  };
@@ -84973,9 +84839,6 @@ void DateSubFun::RegisterFunction(BuiltinFunctions &set) {
84973
84839
 
84974
84840
 
84975
84841
 
84976
- // TODO date_trunc function should also handle interval data type when it is implemented. See
84977
- // https://www.postgresql.org/docs/9.1/functions-datetime.html
84978
-
84979
84842
  namespace duckdb {
84980
84843
 
84981
84844
  struct DateTrunc {
@@ -85186,6 +85049,101 @@ timestamp_t DateTrunc::MicrosecondOperator::Operation(date_t input) {
85186
85049
  return DayOperator::Operation<date_t, timestamp_t>(input);
85187
85050
  }
85188
85051
 
85052
+ // INTERVAL specialisations
85053
+ template <>
85054
+ interval_t DateTrunc::MillenniumOperator::Operation(interval_t input) {
85055
+ input.days = 0;
85056
+ input.micros = 0;
85057
+ input.months = (input.months / Interval::MONTHS_PER_MILLENIUM) * Interval::MONTHS_PER_MILLENIUM;
85058
+ return input;
85059
+ }
85060
+
85061
+ template <>
85062
+ interval_t DateTrunc::CenturyOperator::Operation(interval_t input) {
85063
+ input.days = 0;
85064
+ input.micros = 0;
85065
+ input.months = (input.months / Interval::MONTHS_PER_CENTURY) * Interval::MONTHS_PER_CENTURY;
85066
+ return input;
85067
+ }
85068
+
85069
+ template <>
85070
+ interval_t DateTrunc::DecadeOperator::Operation(interval_t input) {
85071
+ input.days = 0;
85072
+ input.micros = 0;
85073
+ input.months = (input.months / Interval::MONTHS_PER_DECADE) * Interval::MONTHS_PER_DECADE;
85074
+ return input;
85075
+ }
85076
+
85077
+ template <>
85078
+ interval_t DateTrunc::YearOperator::Operation(interval_t input) {
85079
+ input.days = 0;
85080
+ input.micros = 0;
85081
+ input.months = (input.months / Interval::MONTHS_PER_YEAR) * Interval::MONTHS_PER_YEAR;
85082
+ return input;
85083
+ }
85084
+
85085
+ template <>
85086
+ interval_t DateTrunc::QuarterOperator::Operation(interval_t input) {
85087
+ input.days = 0;
85088
+ input.micros = 0;
85089
+ input.months = (input.months / Interval::MONTHS_PER_QUARTER) * Interval::MONTHS_PER_QUARTER;
85090
+ return input;
85091
+ }
85092
+
85093
+ template <>
85094
+ interval_t DateTrunc::MonthOperator::Operation(interval_t input) {
85095
+ input.days = 0;
85096
+ input.micros = 0;
85097
+ return input;
85098
+ }
85099
+
85100
+ template <>
85101
+ interval_t DateTrunc::WeekOperator::Operation(interval_t input) {
85102
+ input.micros = 0;
85103
+ input.days = (input.days / Interval::DAYS_PER_WEEK) * Interval::DAYS_PER_WEEK;
85104
+ return input;
85105
+ }
85106
+
85107
+ template <>
85108
+ interval_t DateTrunc::ISOYearOperator::Operation(interval_t input) {
85109
+ return YearOperator::Operation<interval_t, interval_t>(input);
85110
+ }
85111
+
85112
+ template <>
85113
+ interval_t DateTrunc::DayOperator::Operation(interval_t input) {
85114
+ input.micros = 0;
85115
+ return input;
85116
+ }
85117
+
85118
+ template <>
85119
+ interval_t DateTrunc::HourOperator::Operation(interval_t input) {
85120
+ input.micros = (input.micros / Interval::MICROS_PER_HOUR) * Interval::MICROS_PER_HOUR;
85121
+ return input;
85122
+ }
85123
+
85124
+ template <>
85125
+ interval_t DateTrunc::MinuteOperator::Operation(interval_t input) {
85126
+ input.micros = (input.micros / Interval::MICROS_PER_MINUTE) * Interval::MICROS_PER_MINUTE;
85127
+ return input;
85128
+ }
85129
+
85130
+ template <>
85131
+ interval_t DateTrunc::SecondOperator::Operation(interval_t input) {
85132
+ input.micros = (input.micros / Interval::MICROS_PER_SEC) * Interval::MICROS_PER_SEC;
85133
+ return input;
85134
+ }
85135
+
85136
+ template <>
85137
+ interval_t DateTrunc::MillisecondOperator::Operation(interval_t input) {
85138
+ input.micros = (input.micros / Interval::MICROS_PER_MSEC) * Interval::MICROS_PER_MSEC;
85139
+ return input;
85140
+ }
85141
+
85142
+ template <>
85143
+ interval_t DateTrunc::MicrosecondOperator::Operation(interval_t input) {
85144
+ return input;
85145
+ }
85146
+
85189
85147
  template <class TA, class TR>
85190
85148
  static TR TruncateElement(DatePartSpecifier type, TA element) {
85191
85149
  switch (type) {
@@ -85289,7 +85247,7 @@ static void DateTruncUnaryExecutor(DatePartSpecifier type, Vector &left, Vector
85289
85247
  }
85290
85248
  }
85291
85249
 
85292
- template <typename T>
85250
+ template <typename TA, typename TR>
85293
85251
  static void DateTruncFunction(DataChunk &args, ExpressionState &state, Vector &result) {
85294
85252
  D_ASSERT(args.ColumnCount() == 2);
85295
85253
  auto &part_arg = args.data[0];
@@ -85302,20 +85260,22 @@ static void DateTruncFunction(DataChunk &args, ExpressionState &state, Vector &r
85302
85260
  ConstantVector::SetNull(result, true);
85303
85261
  } else {
85304
85262
  const auto type = GetDatePartSpecifier(ConstantVector::GetData<string_t>(part_arg)->GetString());
85305
- DateTruncUnaryExecutor<T, timestamp_t>(type, date_arg, result, args.size());
85263
+ DateTruncUnaryExecutor<TA, TR>(type, date_arg, result, args.size());
85306
85264
  }
85307
85265
  } else {
85308
- BinaryExecutor::ExecuteStandard<string_t, T, timestamp_t, DateTruncBinaryOperator>(part_arg, date_arg, result,
85309
- args.size());
85266
+ BinaryExecutor::ExecuteStandard<string_t, TA, TR, DateTruncBinaryOperator>(part_arg, date_arg, result,
85267
+ args.size());
85310
85268
  }
85311
85269
  }
85312
85270
 
85313
85271
  void DateTruncFun::RegisterFunction(BuiltinFunctions &set) {
85314
85272
  ScalarFunctionSet date_trunc("date_trunc");
85315
85273
  date_trunc.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::TIMESTAMP,
85316
- DateTruncFunction<timestamp_t>));
85317
- date_trunc.AddFunction(
85318
- ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE}, LogicalType::TIMESTAMP, DateTruncFunction<date_t>));
85274
+ DateTruncFunction<timestamp_t, timestamp_t>));
85275
+ date_trunc.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::DATE}, LogicalType::TIMESTAMP,
85276
+ DateTruncFunction<date_t, timestamp_t>));
85277
+ date_trunc.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::INTERVAL}, LogicalType::INTERVAL,
85278
+ DateTruncFunction<interval_t, interval_t>));
85319
85279
  set.AddFunction(date_trunc);
85320
85280
  date_trunc.name = "datetrunc";
85321
85281
  set.AddFunction(date_trunc);
@@ -85647,7 +85607,8 @@ void StrfTimeFormat::AddFormatSpecifier(string preceding_literal, StrTimeSpecifi
85647
85607
  StrTimeFormat::AddFormatSpecifier(move(preceding_literal), specifier);
85648
85608
  }
85649
85609
 
85650
- idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time) {
85610
+ idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time, int32_t utc_offset,
85611
+ const char *tz_name) {
85651
85612
  switch (specifier) {
85652
85613
  case StrTimeSpecifier::FULL_WEEKDAY_NAME:
85653
85614
  return Date::DAY_NAMES[Date::ExtractISODayOfTheWeek(date) % 7].GetSize();
@@ -85664,9 +85625,12 @@ idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date
85664
85625
  return len;
85665
85626
  }
85666
85627
  case StrTimeSpecifier::UTC_OFFSET:
85667
- // +00
85668
- return 3;
85628
+ // ±HH or ±HH:MM
85629
+ return (utc_offset % 60) ? 6 : 3;
85669
85630
  case StrTimeSpecifier::TZ_NAME:
85631
+ if (tz_name) {
85632
+ return strlen(tz_name);
85633
+ }
85670
85634
  // empty for now
85671
85635
  return 0;
85672
85636
  case StrTimeSpecifier::HOUR_24_DECIMAL:
@@ -85711,11 +85675,11 @@ idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date
85711
85675
  }
85712
85676
 
85713
85677
  //! Returns the total length of the date formatted by this format specifier
85714
- idx_t StrfTimeFormat::GetLength(date_t date, dtime_t time) {
85678
+ idx_t StrfTimeFormat::GetLength(date_t date, dtime_t time, int32_t utc_offset, const char *tz_name) {
85715
85679
  idx_t size = constant_size;
85716
85680
  if (!var_length_specifiers.empty()) {
85717
85681
  for (auto &specifier : var_length_specifiers) {
85718
- size += GetSpecifierLength(specifier, date, time);
85682
+ size += GetSpecifierLength(specifier, date, time, utc_offset, tz_name);
85719
85683
  }
85720
85684
  }
85721
85685
  return size;
@@ -85828,8 +85792,9 @@ char *StrfTimeFormat::WriteDateSpecifier(StrTimeSpecifier specifier, date_t date
85828
85792
  return target;
85829
85793
  }
85830
85794
 
85831
- char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], char *target) {
85832
- // data contains [0] year, [1] month, [2] day, [3] hour, [4] minute, [5] second, [6] msec
85795
+ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], const char *tz_name,
85796
+ char *target) {
85797
+ // data contains [0] year, [1] month, [2] day, [3] hour, [4] minute, [5] second, [6] msec, [7] utc
85833
85798
  switch (specifier) {
85834
85799
  case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
85835
85800
  target = WritePadded2(target, data[2]);
@@ -85892,13 +85857,24 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t
85892
85857
  case StrTimeSpecifier::MILLISECOND_PADDED:
85893
85858
  target = WritePadded3(target, data[6] / 1000);
85894
85859
  break;
85895
- case StrTimeSpecifier::UTC_OFFSET:
85896
- *target++ = '+';
85897
- *target++ = '0';
85898
- *target++ = '0';
85860
+ case StrTimeSpecifier::UTC_OFFSET: {
85861
+ *target++ = (data[7] < 0) ? '-' : '+';
85862
+
85863
+ auto offset = abs(data[7]);
85864
+ auto offset_hours = offset / Interval::MINS_PER_HOUR;
85865
+ auto offset_minutes = offset % Interval::MINS_PER_HOUR;
85866
+ target = WritePadded2(target, offset_hours);
85867
+ if (offset_minutes) {
85868
+ *target++ = ':';
85869
+ target = WritePadded2(target, offset_minutes);
85870
+ }
85899
85871
  break;
85872
+ }
85900
85873
  case StrTimeSpecifier::TZ_NAME:
85901
- // always empty for now, FIXME when we have timestamp with tz
85874
+ if (tz_name) {
85875
+ strcpy(target, tz_name);
85876
+ target += strlen(tz_name);
85877
+ }
85902
85878
  break;
85903
85879
  case StrTimeSpecifier::DAY_OF_MONTH: {
85904
85880
  target = Write2(target, data[2] % 100);
@@ -85938,7 +85914,7 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t
85938
85914
  return target;
85939
85915
  }
85940
85916
 
85941
- void StrfTimeFormat::FormatString(date_t date, int32_t data[7], char *target) {
85917
+ void StrfTimeFormat::FormatString(date_t date, int32_t data[8], const char *tz_name, char *target) {
85942
85918
  D_ASSERT(specifiers.size() + 1 == literals.size());
85943
85919
  idx_t i;
85944
85920
  for (i = 0; i < specifiers.size(); i++) {
@@ -85949,7 +85925,7 @@ void StrfTimeFormat::FormatString(date_t date, int32_t data[7], char *target) {
85949
85925
  if (is_date_specifier[i]) {
85950
85926
  target = WriteDateSpecifier(specifiers[i], date, target);
85951
85927
  } else {
85952
- target = WriteStandardSpecifier(specifiers[i], data, target);
85928
+ target = WriteStandardSpecifier(specifiers[i], data, tz_name, target);
85953
85929
  }
85954
85930
  }
85955
85931
  // copy the final literal into the target
@@ -85957,11 +85933,12 @@ void StrfTimeFormat::FormatString(date_t date, int32_t data[7], char *target) {
85957
85933
  }
85958
85934
 
85959
85935
  void StrfTimeFormat::FormatString(date_t date, dtime_t time, char *target) {
85960
- int32_t data[7]; // year, month, day, hour, min, sec, msec
85936
+ int32_t data[8]; // year, month, day, hour, min, sec, µs, offset
85961
85937
  Date::Convert(date, data[0], data[1], data[2]);
85962
85938
  Time::Convert(time, data[3], data[4], data[5], data[6]);
85939
+ data[7] = 0;
85963
85940
 
85964
- FormatString(date, data, target);
85941
+ FormatString(date, data, nullptr, target);
85965
85942
  }
85966
85943
 
85967
85944
  string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
@@ -85971,7 +85948,7 @@ string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
85971
85948
  auto date = Timestamp::GetDate(timestamp);
85972
85949
  auto time = Timestamp::GetTime(timestamp);
85973
85950
 
85974
- auto len = format.GetLength(date, time);
85951
+ auto len = format.GetLength(date, time, 0, nullptr);
85975
85952
  auto result = unique_ptr<char[]>(new char[len]);
85976
85953
  format.FormatString(date, time, result.get());
85977
85954
  return string(result.get(), len);
@@ -86187,7 +86164,7 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector
86187
86164
  }
86188
86165
  UnaryExecutor::Execute<date_t, string_t>(args.data[REVERSED ? 1 : 0], result, args.size(), [&](date_t input) {
86189
86166
  dtime_t time(0);
86190
- idx_t len = info.format.GetLength(input, time);
86167
+ idx_t len = info.format.GetLength(input, time, 0, nullptr);
86191
86168
  string_t target = StringVector::EmptyString(result, len);
86192
86169
  info.format.FormatString(input, time, target.GetDataWriteable());
86193
86170
  target.Finalize();
@@ -86211,7 +86188,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V
86211
86188
  date_t date;
86212
86189
  dtime_t time;
86213
86190
  Timestamp::Convert(input, date, time);
86214
- idx_t len = info.format.GetLength(date, time);
86191
+ idx_t len = info.format.GetLength(date, time, 0, nullptr);
86215
86192
  string_t target = StringVector::EmptyString(result, len);
86216
86193
  info.format.FormatString(date, time, target.GetDataWriteable());
86217
86194
  target.Finalize();
@@ -86319,6 +86296,7 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) {
86319
86296
  result_data[4] = 0;
86320
86297
  result_data[5] = 0;
86321
86298
  result_data[6] = 0;
86299
+ result_data[7] = 0;
86322
86300
 
86323
86301
  auto data = str.GetDataUnsafe();
86324
86302
  idx_t size = str.GetSize();
@@ -86591,8 +86569,7 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) {
86591
86569
  error_position = pos;
86592
86570
  return false;
86593
86571
  }
86594
- result_data[3] -= hour_offset;
86595
- result_data[4] -= minute_offset;
86572
+ result_data[7] = hour_offset * Interval::MINS_PER_HOUR + minute_offset;
86596
86573
  break;
86597
86574
  }
86598
86575
  case StrTimeSpecifier::TZ_NAME: {
@@ -86600,11 +86577,20 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) {
86600
86577
  while (pos < size && StringUtil::CharacterIsSpace(data[pos])) {
86601
86578
  pos++;
86602
86579
  }
86580
+ const auto tz_begin = data + pos;
86603
86581
  // stop when we encounter a space or the end of the string
86604
86582
  while (pos < size && !StringUtil::CharacterIsSpace(data[pos])) {
86605
86583
  pos++;
86606
86584
  }
86607
- // FIXME: actually use the timestamp...
86585
+ const auto tz_end = data + pos;
86586
+ // Can't fully validate without a list - caller's responsibility.
86587
+ // But tz must not be empty.
86588
+ if (tz_end == tz_begin) {
86589
+ error_message = "Empty Time Zone name";
86590
+ error_position = tz_begin - data;
86591
+ return false;
86592
+ }
86593
+ result.tz.assign(tz_begin, tz_end);
86608
86594
  break;
86609
86595
  }
86610
86596
  default:
@@ -86701,6 +86687,9 @@ static unique_ptr<FunctionData> StrpTimeBindFunction(ClientContext &context, Sca
86701
86687
  if (!error.empty()) {
86702
86688
  throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
86703
86689
  }
86690
+ if (format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET)) {
86691
+ bound_function.return_type = LogicalType::TIMESTAMP_TZ;
86692
+ }
86704
86693
  }
86705
86694
  return make_unique<StrpTimeBindData>(format);
86706
86695
  }
@@ -86732,7 +86721,9 @@ date_t StrpTimeFormat::ParseResult::ToDate() {
86732
86721
 
86733
86722
  timestamp_t StrpTimeFormat::ParseResult::ToTimestamp() {
86734
86723
  date_t date = Date::FromDate(data[0], data[1], data[2]);
86735
- dtime_t time = Time::FromTime(data[3], data[4], data[5], data[6]);
86724
+ const auto hour_offset = data[7] / Interval::MINS_PER_HOUR;
86725
+ const auto mins_offset = data[7] % Interval::MINS_PER_HOUR;
86726
+ dtime_t time = Time::FromTime(data[3] - hour_offset, data[4] - mins_offset, data[5], data[6]);
86736
86727
  return Timestamp::FromDatetime(date, time);
86737
86728
  }
86738
86729
 
@@ -88774,6 +88765,371 @@ void ListExtractFun::RegisterFunction(BuiltinFunctions &set) {
88774
88765
 
88775
88766
 
88776
88767
 
88768
+ namespace duckdb {
88769
+
88770
+ struct ListSortBindData : public FunctionData {
88771
+ ListSortBindData(OrderType order_type_p, OrderByNullType null_order_p, LogicalType &return_type_p,
88772
+ LogicalType &child_type_p, ClientContext &context_p);
88773
+ ~ListSortBindData() override;
88774
+
88775
+ OrderType order_type;
88776
+ OrderByNullType null_order;
88777
+ LogicalType return_type;
88778
+ LogicalType child_type;
88779
+
88780
+ vector<LogicalType> types;
88781
+ vector<LogicalType> payload_types;
88782
+
88783
+ ClientContext &context;
88784
+ unique_ptr<GlobalSortState> global_sort_state;
88785
+ RowLayout payload_layout;
88786
+ vector<BoundOrderByNode> orders;
88787
+
88788
+ unique_ptr<FunctionData> Copy() override;
88789
+ };
88790
+
88791
+ ListSortBindData::ListSortBindData(OrderType order_type_p, OrderByNullType null_order_p, LogicalType &return_type_p,
88792
+ LogicalType &child_type_p, ClientContext &context_p)
88793
+ : order_type(order_type_p), null_order(null_order_p), return_type(return_type_p), child_type(child_type_p),
88794
+ context(context_p) {
88795
+
88796
+ // get the vector types
88797
+ types.emplace_back(LogicalType::USMALLINT);
88798
+ types.emplace_back(child_type);
88799
+ D_ASSERT(types.size() == 2);
88800
+
88801
+ // get the payload types
88802
+ payload_types.emplace_back(LogicalType::UINTEGER);
88803
+ D_ASSERT(payload_types.size() == 1);
88804
+
88805
+ // initialize the payload layout
88806
+ payload_layout.Initialize(payload_types);
88807
+
88808
+ // get the BoundOrderByNode
88809
+ auto idx_col_expr = make_unique_base<Expression, BoundReferenceExpression>(LogicalType::USMALLINT, 0);
88810
+ auto lists_col_expr = make_unique_base<Expression, BoundReferenceExpression>(child_type, 1);
88811
+ orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, move(idx_col_expr));
88812
+ orders.emplace_back(order_type, null_order, move(lists_col_expr));
88813
+ }
88814
+
88815
+ unique_ptr<FunctionData> ListSortBindData::Copy() {
88816
+ return make_unique<ListSortBindData>(order_type, null_order, return_type, child_type, context);
88817
+ }
88818
+
88819
+ ListSortBindData::~ListSortBindData() {
88820
+ }
88821
+
88822
+ // create the key_chunk and the payload_chunk and sink them into the local_sort_state
88823
+ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_lists_indices, vector<LogicalType> &types,
88824
+ vector<LogicalType> &payload_types, Vector &payload_vector, LocalSortState &local_sort_state,
88825
+ bool &data_to_sort, Vector &lists_indices) {
88826
+
88827
+ // slice the child vector
88828
+ Vector slice(*child_vector, sel, offset_lists_indices);
88829
+
88830
+ // initialize and fill key_chunk
88831
+ DataChunk key_chunk;
88832
+ key_chunk.InitializeEmpty(types);
88833
+ key_chunk.data[0].Reference(lists_indices);
88834
+ key_chunk.data[1].Reference(slice);
88835
+ key_chunk.SetCardinality(offset_lists_indices);
88836
+
88837
+ // initialize and fill key_chunk and payload_chunk
88838
+ DataChunk payload_chunk;
88839
+ payload_chunk.InitializeEmpty(payload_types);
88840
+ payload_chunk.data[0].Reference(payload_vector);
88841
+ payload_chunk.SetCardinality(offset_lists_indices);
88842
+
88843
+ // sink
88844
+ local_sort_state.SinkChunk(key_chunk, payload_chunk);
88845
+ data_to_sort = true;
88846
+ }
88847
+
88848
+ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
88849
+
88850
+ D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
88851
+ auto count = args.size();
88852
+ Vector &lists = args.data[0];
88853
+
88854
+ result.SetVectorType(VectorType::FLAT_VECTOR);
88855
+ auto &result_validity = FlatVector::Validity(result);
88856
+
88857
+ if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
88858
+ result_validity.SetInvalid(0);
88859
+ return;
88860
+ }
88861
+
88862
+ auto &func_expr = (BoundFunctionExpression &)state.expr;
88863
+ auto &info = (ListSortBindData &)*func_expr.bind_info;
88864
+
88865
+ // initialize the global and local sorting state
88866
+ auto &buffer_manager = BufferManager::GetBufferManager(info.context);
88867
+ info.global_sort_state = make_unique<GlobalSortState>(buffer_manager, info.orders, info.payload_layout);
88868
+ auto &global_sort_state = *info.global_sort_state;
88869
+ LocalSortState local_sort_state;
88870
+ local_sort_state.Initialize(global_sort_state, buffer_manager);
88871
+
88872
+ // get the child vector
88873
+ auto lists_size = ListVector::GetListSize(lists);
88874
+ auto &child_vector = ListVector::GetEntry(lists);
88875
+ VectorData child_data;
88876
+ child_vector.Orrify(lists_size, child_data);
88877
+
88878
+ // get the lists data
88879
+ VectorData lists_data;
88880
+ lists.Orrify(count, lists_data);
88881
+ auto list_entries = (list_entry_t *)lists_data.data;
88882
+
88883
+ // create the lists_indices vector, this contains an element for each list's entry,
88884
+ // the element corresponds to the list's index, e.g. for [1, 2, 4], [5, 4]
88885
+ // lists_indices contains [0, 0, 0, 1, 1]
88886
+ Vector lists_indices(LogicalType::USMALLINT);
88887
+ auto lists_indices_data = FlatVector::GetData<uint16_t>(lists_indices);
88888
+
88889
+ // create the payload_vector, this is just a vector containing incrementing integers
88890
+ // this will later be used as the 'new' selection vector of the child_vector, after
88891
+ // rearranging the payload according to the sorting order
88892
+ Vector payload_vector(LogicalType::UINTEGER);
88893
+ auto payload_vector_data = FlatVector::GetData<uint32_t>(payload_vector);
88894
+
88895
+ // selection vector pointing to the data of the child vector,
88896
+ // used for slicing the child_vector correctly
88897
+ SelectionVector sel(STANDARD_VECTOR_SIZE);
88898
+
88899
+ idx_t offset_lists_indices = 0;
88900
+ uint32_t incr_payload_count = 0;
88901
+ bool data_to_sort = false;
88902
+
88903
+ for (idx_t i = 0; i < count; i++) {
88904
+
88905
+ auto lists_index = lists_data.sel->get_index(i);
88906
+ const auto &list_entry = list_entries[lists_index];
88907
+
88908
+ // nothing to do for this list
88909
+ if (!lists_data.validity.RowIsValid(lists_index)) {
88910
+ result_validity.SetInvalid(i);
88911
+ continue;
88912
+ }
88913
+
88914
+ // empty list, no sorting required
88915
+ if (list_entry.length == 0) {
88916
+ continue;
88917
+ }
88918
+
88919
+ for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
88920
+
88921
+ // lists_indices vector is full, sink
88922
+ if (offset_lists_indices == STANDARD_VECTOR_SIZE) {
88923
+ SinkDataChunk(&child_vector, sel, offset_lists_indices, info.types, info.payload_types, payload_vector,
88924
+ local_sort_state, data_to_sort, lists_indices);
88925
+ offset_lists_indices = 0;
88926
+ }
88927
+
88928
+ auto source_idx = child_data.sel->get_index(list_entry.offset + child_idx);
88929
+ sel.set_index(offset_lists_indices, source_idx);
88930
+ lists_indices_data[offset_lists_indices] = (uint32_t)i;
88931
+ payload_vector_data[offset_lists_indices] = incr_payload_count;
88932
+ offset_lists_indices++;
88933
+ incr_payload_count++;
88934
+ }
88935
+ }
88936
+
88937
+ if (offset_lists_indices != 0) {
88938
+ SinkDataChunk(&child_vector, sel, offset_lists_indices, info.types, info.payload_types, payload_vector,
88939
+ local_sort_state, data_to_sort, lists_indices);
88940
+ }
88941
+
88942
+ if (data_to_sort) {
88943
+
88944
+ // add local state to global state, which sorts the data
88945
+ global_sort_state.AddLocalState(local_sort_state);
88946
+ global_sort_state.PrepareMergePhase();
88947
+
88948
+ // selection vector that is to be filled with the 'sorted' payload
88949
+ SelectionVector sel_sorted(incr_payload_count);
88950
+ idx_t sel_sorted_idx = 0;
88951
+
88952
+ // scan the sorted row data
88953
+ PayloadScanner scanner(*global_sort_state.sorted_blocks[0]->payload_data, global_sort_state);
88954
+ for (;;) {
88955
+ DataChunk result_chunk;
88956
+ result_chunk.Initialize(info.payload_types);
88957
+ result_chunk.SetCardinality(0);
88958
+ scanner.Scan(result_chunk);
88959
+ if (result_chunk.size() == 0) {
88960
+ break;
88961
+ }
88962
+
88963
+ // construct the selection vector with the new order from the result vectors
88964
+ Vector result_vector(result_chunk.data[0]);
88965
+ auto result_data = FlatVector::GetData<uint32_t>(result_vector);
88966
+ auto row_count = result_chunk.size();
88967
+
88968
+ for (idx_t i = 0; i < row_count; i++) {
88969
+ sel_sorted.set_index(sel_sorted_idx, result_data[i]);
88970
+ sel_sorted_idx++;
88971
+ }
88972
+ }
88973
+
88974
+ D_ASSERT(sel_sorted_idx == incr_payload_count);
88975
+ child_vector.Slice(sel_sorted, sel_sorted_idx);
88976
+ child_vector.Normalify(sel_sorted_idx);
88977
+ }
88978
+
88979
+ result.Reference(lists);
88980
+ }
88981
+
88982
+ static unique_ptr<FunctionData> ListSortBind(ClientContext &context, ScalarFunction &bound_function,
88983
+ vector<unique_ptr<Expression>> &arguments, OrderType &order,
88984
+ OrderByNullType &null_order) {
88985
+
88986
+ if (arguments[0]->return_type.id() == LogicalTypeId::SQLNULL) {
88987
+ bound_function.arguments[0] = LogicalType::SQLNULL;
88988
+ bound_function.return_type = LogicalType::SQLNULL;
88989
+ return make_unique<VariableReturnBindData>(bound_function.return_type);
88990
+ }
88991
+
88992
+ bound_function.arguments[0] = arguments[0]->return_type;
88993
+ bound_function.return_type = arguments[0]->return_type;
88994
+ auto child_type = ListType::GetChildType(arguments[0]->return_type);
88995
+
88996
+ return make_unique<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
88997
+ }
88998
+
88999
+ OrderByNullType GetNullOrder(vector<unique_ptr<Expression>> &arguments, idx_t idx) {
89000
+
89001
+ if (!arguments[idx]->IsFoldable()) {
89002
+ throw InvalidInputException("Null sorting order must be a constant");
89003
+ }
89004
+ Value null_order_value = ExpressionExecutor::EvaluateScalar(*arguments[idx]);
89005
+ auto null_order_name = null_order_value.ToString();
89006
+ std::transform(null_order_name.begin(), null_order_name.end(), null_order_name.begin(), ::toupper);
89007
+ if (null_order_name != "NULLS FIRST" && null_order_name != "NULLS LAST") {
89008
+ throw InvalidInputException("Null sorting order must be either NULLS FIRST or NULLS LAST");
89009
+ }
89010
+
89011
+ if (null_order_name == "NULLS LAST") {
89012
+ return OrderByNullType::NULLS_LAST;
89013
+ }
89014
+ return OrderByNullType::NULLS_FIRST;
89015
+ }
89016
+
89017
+ static unique_ptr<FunctionData> ListNormalSortBind(ClientContext &context, ScalarFunction &bound_function,
89018
+ vector<unique_ptr<Expression>> &arguments) {
89019
+
89020
+ D_ASSERT(bound_function.arguments.size() >= 1 && bound_function.arguments.size() <= 3);
89021
+ D_ASSERT(arguments.size() >= 1 && arguments.size() <= 3);
89022
+
89023
+ // set default values
89024
+ auto &config = DBConfig::GetConfig(context);
89025
+ auto order = config.default_order_type;
89026
+ auto null_order = config.default_null_order;
89027
+
89028
+ // get the sorting order
89029
+ if (arguments.size() >= 2) {
89030
+
89031
+ if (!arguments[1]->IsFoldable()) {
89032
+ throw InvalidInputException("Sorting order must be a constant");
89033
+ }
89034
+ Value order_value = ExpressionExecutor::EvaluateScalar(*arguments[1]);
89035
+ auto order_name = order_value.ToString();
89036
+ std::transform(order_name.begin(), order_name.end(), order_name.begin(), ::toupper);
89037
+ if (order_name != "DESC" && order_name != "ASC") {
89038
+ throw InvalidInputException("Sorting order must be either ASC or DESC");
89039
+ }
89040
+ if (order_name == "DESC") {
89041
+ order = OrderType::DESCENDING;
89042
+ } else {
89043
+ order = OrderType::ASCENDING;
89044
+ }
89045
+ }
89046
+
89047
+ // get the null sorting order
89048
+ if (arguments.size() == 3) {
89049
+ null_order = GetNullOrder(arguments, 2);
89050
+ }
89051
+
89052
+ return ListSortBind(context, bound_function, arguments, order, null_order);
89053
+ }
89054
+
89055
+ static unique_ptr<FunctionData> ListReverseSortBind(ClientContext &context, ScalarFunction &bound_function,
89056
+ vector<unique_ptr<Expression>> &arguments) {
89057
+
89058
+ D_ASSERT(bound_function.arguments.size() == 1 || bound_function.arguments.size() == 2);
89059
+ D_ASSERT(arguments.size() == 1 || arguments.size() == 2);
89060
+
89061
+ // set (reverse) default values
89062
+ auto &config = DBConfig::GetConfig(context);
89063
+ auto order = (config.default_order_type == OrderType::ASCENDING) ? OrderType::DESCENDING : OrderType::ASCENDING;
89064
+ auto null_order = config.default_null_order;
89065
+
89066
+ // get the null sorting order
89067
+ if (arguments.size() == 2) {
89068
+ null_order = GetNullOrder(arguments, 1);
89069
+ }
89070
+
89071
+ return ListSortBind(context, bound_function, arguments, order, null_order);
89072
+ }
89073
+
89074
+ void ListSortFun::RegisterFunction(BuiltinFunctions &set) {
89075
+
89076
+ // normal sort
89077
+
89078
+ // one parameter: list
89079
+ ScalarFunction sort({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY), ListSortFunction,
89080
+ false, false, ListNormalSortBind);
89081
+
89082
+ // two parameters: list, order
89083
+ ScalarFunction sort_order({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR},
89084
+ LogicalType::LIST(LogicalType::ANY), ListSortFunction, false, false, ListNormalSortBind);
89085
+
89086
+ // three parameters: list, order, null order
89087
+ ScalarFunction sort_orders({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR, LogicalType::VARCHAR},
89088
+ LogicalType::LIST(LogicalType::ANY), ListSortFunction, false, false, ListNormalSortBind);
89089
+
89090
+ ScalarFunctionSet list_sort("list_sort");
89091
+ list_sort.AddFunction(sort);
89092
+ list_sort.AddFunction(sort_order);
89093
+ list_sort.AddFunction(sort_orders);
89094
+ set.AddFunction(list_sort);
89095
+
89096
+ ScalarFunctionSet array_sort("array_sort");
89097
+ array_sort.AddFunction(sort);
89098
+ array_sort.AddFunction(sort_order);
89099
+ array_sort.AddFunction(sort_orders);
89100
+ set.AddFunction(array_sort);
89101
+
89102
+ // reverse sort
89103
+
89104
+ // one parameter: list
89105
+ ScalarFunction sort_reverse({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY),
89106
+ ListSortFunction, false, false, ListReverseSortBind);
89107
+
89108
+ // two parameters: list, null order
89109
+ ScalarFunction sort_reverse_null_order({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR},
89110
+ LogicalType::LIST(LogicalType::ANY), ListSortFunction, false, false,
89111
+ ListReverseSortBind);
89112
+
89113
+ ScalarFunctionSet list_reverse_sort("list_reverse_sort");
89114
+ list_reverse_sort.AddFunction(sort_reverse);
89115
+ list_reverse_sort.AddFunction(sort_reverse_null_order);
89116
+ set.AddFunction(list_reverse_sort);
89117
+
89118
+ ScalarFunctionSet array_reverse_sort("array_reverse_sort");
89119
+ array_reverse_sort.AddFunction(sort_reverse);
89120
+ array_reverse_sort.AddFunction(sort_reverse_null_order);
89121
+ set.AddFunction(array_reverse_sort);
89122
+ }
89123
+
89124
+ } // namespace duckdb
89125
+
89126
+
89127
+
89128
+
89129
+
89130
+
89131
+
89132
+
88777
89133
  namespace duckdb {
88778
89134
 
88779
89135
  static void ListValueFunction(DataChunk &args, ExpressionState &state, Vector &result) {
@@ -90668,6 +91024,7 @@ void BuiltinFunctions::RegisterNestedFunctions() {
90668
91024
  Register<ListAggregateFun>();
90669
91025
  Register<ListValueFun>();
90670
91026
  Register<ListExtractFun>();
91027
+ Register<ListSortFun>();
90671
91028
  Register<ListRangeFun>();
90672
91029
  Register<ListFlattenFun>();
90673
91030
  Register<MapFun>();
@@ -98561,6 +98918,8 @@ static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption,
98561
98918
  options.skip_rows = ParseInteger(set);
98562
98919
  } else if (loption == "max_line_size" || loption == "maximum_line_size") {
98563
98920
  options.maximum_line_size = ParseInteger(set);
98921
+ } else if (loption == "ignore_errors") {
98922
+ options.ignore_errors = ParseBoolean(set);
98564
98923
  } else {
98565
98924
  // unrecognized option in base CSV
98566
98925
  return false;
@@ -103872,44 +104231,74 @@ void BaseAppender::AppendValueInternal(T input) {
103872
104231
  throw InvalidInputException("Too many appends for chunk!");
103873
104232
  }
103874
104233
  auto &col = chunk->data[column];
103875
- switch (col.GetType().InternalType()) {
103876
- case PhysicalType::BOOL:
104234
+ switch (col.GetType().id()) {
104235
+ case LogicalTypeId::BOOLEAN:
103877
104236
  AppendValueInternal<T, bool>(col, input);
103878
104237
  break;
103879
- case PhysicalType::UINT8:
104238
+ case LogicalTypeId::UTINYINT:
103880
104239
  AppendValueInternal<T, uint8_t>(col, input);
103881
104240
  break;
103882
- case PhysicalType::INT8:
104241
+ case LogicalTypeId::TINYINT:
103883
104242
  AppendValueInternal<T, int8_t>(col, input);
103884
104243
  break;
103885
- case PhysicalType::UINT16:
104244
+ case LogicalTypeId::USMALLINT:
103886
104245
  AppendValueInternal<T, uint16_t>(col, input);
103887
104246
  break;
103888
- case PhysicalType::INT16:
104247
+ case LogicalTypeId::SMALLINT:
103889
104248
  AppendValueInternal<T, int16_t>(col, input);
103890
104249
  break;
103891
- case PhysicalType::UINT32:
104250
+ case LogicalTypeId::UINTEGER:
103892
104251
  AppendValueInternal<T, uint32_t>(col, input);
103893
104252
  break;
103894
- case PhysicalType::INT32:
104253
+ case LogicalTypeId::INTEGER:
103895
104254
  AppendValueInternal<T, int32_t>(col, input);
103896
104255
  break;
103897
- case PhysicalType::UINT64:
104256
+ case LogicalTypeId::UBIGINT:
103898
104257
  AppendValueInternal<T, uint64_t>(col, input);
103899
104258
  break;
103900
- case PhysicalType::INT64:
104259
+ case LogicalTypeId::BIGINT:
103901
104260
  AppendValueInternal<T, int64_t>(col, input);
103902
104261
  break;
103903
- case PhysicalType::INT128:
104262
+ case LogicalTypeId::HUGEINT:
103904
104263
  AppendValueInternal<T, hugeint_t>(col, input);
103905
104264
  break;
103906
- case PhysicalType::FLOAT:
104265
+ case LogicalTypeId::FLOAT:
103907
104266
  AppendValueInternal<T, float>(col, input);
103908
104267
  break;
103909
- case PhysicalType::DOUBLE:
104268
+ case LogicalTypeId::DOUBLE:
103910
104269
  AppendValueInternal<T, double>(col, input);
103911
104270
  break;
103912
- case PhysicalType::VARCHAR:
104271
+ case LogicalTypeId::DECIMAL:
104272
+ switch (col.GetType().InternalType()) {
104273
+ case PhysicalType::INT8:
104274
+ AppendValueInternal<T, int8_t>(col, input);
104275
+ break;
104276
+ case PhysicalType::INT16:
104277
+ AppendValueInternal<T, int16_t>(col, input);
104278
+ break;
104279
+ case PhysicalType::INT32:
104280
+ AppendValueInternal<T, int32_t>(col, input);
104281
+ break;
104282
+ default:
104283
+ AppendValueInternal<T, int64_t>(col, input);
104284
+ break;
104285
+ }
104286
+ break;
104287
+ case LogicalTypeId::DATE:
104288
+ AppendValueInternal<T, date_t>(col, input);
104289
+ break;
104290
+ case LogicalTypeId::TIMESTAMP:
104291
+ case LogicalTypeId::TIMESTAMP_TZ:
104292
+ AppendValueInternal<T, timestamp_t>(col, input);
104293
+ break;
104294
+ case LogicalTypeId::TIME:
104295
+ case LogicalTypeId::TIME_TZ:
104296
+ AppendValueInternal<T, dtime_t>(col, input);
104297
+ break;
104298
+ case LogicalTypeId::INTERVAL:
104299
+ AppendValueInternal<T, interval_t>(col, input);
104300
+ break;
104301
+ case LogicalTypeId::VARCHAR:
103913
104302
  FlatVector::GetData<string_t>(col)[chunk->size()] = StringCast::Operation<T>(input, col);
103914
104303
  break;
103915
104304
  default:
@@ -103995,17 +104384,17 @@ void BaseAppender::Append(double value) {
103995
104384
 
103996
104385
  template <>
103997
104386
  void BaseAppender::Append(date_t value) {
103998
- AppendValueInternal<int32_t>(value.days);
104387
+ AppendValueInternal<date_t>(value);
103999
104388
  }
104000
104389
 
104001
104390
  template <>
104002
104391
  void BaseAppender::Append(dtime_t value) {
104003
- AppendValueInternal<int64_t>(value.micros);
104392
+ AppendValueInternal<dtime_t>(value);
104004
104393
  }
104005
104394
 
104006
104395
  template <>
104007
104396
  void BaseAppender::Append(timestamp_t value) {
104008
- AppendValueInternal<int64_t>(value.value);
104397
+ AppendValueInternal<timestamp_t>(value);
104009
104398
  }
104010
104399
 
104011
104400
  template <>
@@ -105104,6 +105493,24 @@ duckdb_logical_type duckdb_create_logical_type(duckdb_type type) {
105104
105493
  return new duckdb::LogicalType(duckdb::ConvertCTypeToCPP(type));
105105
105494
  }
105106
105495
 
105496
+ duckdb_logical_type duckdb_create_list_type(duckdb_logical_type type) {
105497
+ if (!type) {
105498
+ return nullptr;
105499
+ }
105500
+ duckdb::LogicalType *ltype = new duckdb::LogicalType;
105501
+ *ltype = duckdb::LogicalType::LIST(*(duckdb::LogicalType *)type);
105502
+ return ltype;
105503
+ }
105504
+
105505
+ duckdb_logical_type duckdb_create_map_type(duckdb_logical_type key_type, duckdb_logical_type value_type) {
105506
+ if (!key_type || !value_type) {
105507
+ return nullptr;
105508
+ }
105509
+ duckdb::LogicalType *mtype = new duckdb::LogicalType;
105510
+ *mtype = duckdb::LogicalType::MAP(*(duckdb::LogicalType *)key_type, *(duckdb::LogicalType *)value_type);
105511
+ return mtype;
105512
+ }
105513
+
105107
105514
  duckdb_logical_type duckdb_create_decimal_type(uint8_t width, uint8_t scale) {
105108
105515
  return new duckdb::LogicalType(duckdb::LogicalType::DECIMAL(width, scale));
105109
105516
  }
@@ -105223,6 +105630,28 @@ duckdb_logical_type duckdb_list_type_child_type(duckdb_logical_type type) {
105223
105630
  return new duckdb::LogicalType(duckdb::ListType::GetChildType(ltype));
105224
105631
  }
105225
105632
 
105633
+ duckdb_logical_type duckdb_map_type_key_type(duckdb_logical_type type) {
105634
+ if (!type) {
105635
+ return nullptr;
105636
+ }
105637
+ auto &mtype = *((duckdb::LogicalType *)type);
105638
+ if (mtype.id() != duckdb::LogicalTypeId::MAP) {
105639
+ return nullptr;
105640
+ }
105641
+ return new duckdb::LogicalType(duckdb::MapType::KeyType(mtype));
105642
+ }
105643
+
105644
+ duckdb_logical_type duckdb_map_type_value_type(duckdb_logical_type type) {
105645
+ if (!type) {
105646
+ return nullptr;
105647
+ }
105648
+ auto &mtype = *((duckdb::LogicalType *)type);
105649
+ if (mtype.id() != duckdb::LogicalTypeId::MAP) {
105650
+ return nullptr;
105651
+ }
105652
+ return new duckdb::LogicalType(duckdb::MapType::ValueType(mtype));
105653
+ }
105654
+
105226
105655
  idx_t duckdb_struct_type_child_count(duckdb_logical_type type) {
105227
105656
  if (!type) {
105228
105657
  return 0;
@@ -121694,6 +122123,18 @@ string Relation::RenderWhitespace(idx_t depth) {
121694
122123
  return string(depth * 2, ' ');
121695
122124
  }
121696
122125
 
122126
+ vector<shared_ptr<ExternalDependency>> Relation::GetAllDependencies() {
122127
+ vector<shared_ptr<ExternalDependency>> all_dependencies;
122128
+ Relation *cur = this;
122129
+ while (cur) {
122130
+ if (cur->extra_dependencies) {
122131
+ all_dependencies.push_back(cur->extra_dependencies);
122132
+ }
122133
+ cur = ChildRelation();
122134
+ }
122135
+ return all_dependencies;
122136
+ }
122137
+
121697
122138
  } // namespace duckdb
121698
122139
 
121699
122140
 
@@ -129244,7 +129685,7 @@ unique_ptr<Expression> EnumComparisonRule::Apply(LogicalOperator &op, vector<Exp
129244
129685
  }
129245
129686
 
129246
129687
  auto cast_left_to_right =
129247
- make_unique<BoundCastExpression>(move(left_child->child), right_child->child->return_type);
129688
+ make_unique<BoundCastExpression>(move(left_child->child), right_child->child->return_type, true);
129248
129689
 
129249
129690
  return make_unique<BoundComparisonExpression>(root->type, move(cast_left_to_right), move(right_child->child));
129250
129691
  }