duckdb 0.5.2-dev815.0 → 0.5.2-dev841.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -29106,7 +29106,7 @@ struct RowOperations {
29106
29106
  namespace duckdb {
29107
29107
 
29108
29108
  template <class OP, class RETURN_TYPE, typename... ARGS>
29109
- RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
29109
+ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
29110
29110
  D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
29111
29111
  switch (radix_bits) {
29112
29112
  case 1:
@@ -29135,7 +29135,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
29135
29135
  }
29136
29136
 
29137
29137
  template <class OP, class RETURN_TYPE, idx_t radix_bits_1, typename... ARGS>
29138
- RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
29138
+ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...args) {
29139
29139
  D_ASSERT(radix_bits_2 <= sizeof(hash_t) * 8);
29140
29140
  switch (radix_bits_2) {
29141
29141
  case 1:
@@ -29164,7 +29164,7 @@ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
29164
29164
  }
29165
29165
 
29166
29166
  template <class OP, class RETURN_TYPE, typename... ARGS>
29167
- RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&... args) {
29167
+ RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...args) {
29168
29168
  D_ASSERT(radix_bits_1 <= sizeof(hash_t) * 8);
29169
29169
  switch (radix_bits_1) {
29170
29170
  case 1:
@@ -49455,6 +49455,9 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
49455
49455
  memset(data, 0, capacity * type_size);
49456
49456
  }
49457
49457
  }
49458
+ if (capacity > STANDARD_VECTOR_SIZE) {
49459
+ validity.Resize(STANDARD_VECTOR_SIZE, capacity);
49460
+ }
49458
49461
  }
49459
49462
 
49460
49463
  struct DataArrays {
@@ -93661,6 +93664,13 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
93661
93664
  vector<unique_ptr<Expression>> &arguments) {
93662
93665
 
93663
93666
  D_ASSERT(arguments.size() == 1);
93667
+
93668
+ if (arguments[0]->return_type.id() == LogicalTypeId::LIST ||
93669
+ arguments[0]->return_type.id() == LogicalTypeId::STRUCT ||
93670
+ arguments[0]->return_type.id() == LogicalTypeId::MAP) {
93671
+ throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
93672
+ }
93673
+
93664
93674
  child_list_t<LogicalType> struct_children;
93665
93675
  struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
93666
93676
  struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
@@ -95714,6 +95724,11 @@ struct VectorCastHelpers {
95714
95724
  }
95715
95725
  };
95716
95726
 
95727
+ struct VectorStringifiedListParser {
95728
+ static idx_t CountParts(const string_t &input);
95729
+ static bool SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start, Vector &child);
95730
+ };
95731
+
95717
95732
  } // namespace duckdb
95718
95733
 
95719
95734
 
@@ -96484,20 +96499,8 @@ BoundCastInfo DefaultCasts::EnumCastSwitch(BindCastInput &input, const LogicalTy
96484
96499
 
96485
96500
  namespace duckdb {
96486
96501
 
96487
- struct ListBoundCastData : public BoundCastData {
96488
- explicit ListBoundCastData(BoundCastInfo child_cast) : child_cast_info(move(child_cast)) {
96489
- }
96490
-
96491
- BoundCastInfo child_cast_info;
96492
-
96493
- public:
96494
- unique_ptr<BoundCastData> Copy() const override {
96495
- return make_unique<ListBoundCastData>(child_cast_info.Copy());
96496
- }
96497
- };
96498
-
96499
- unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
96500
- const LogicalType &target) {
96502
+ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &input, const LogicalType &source,
96503
+ const LogicalType &target) {
96501
96504
  vector<BoundCastInfo> child_cast_info;
96502
96505
  auto &source_child_type = ListType::GetChildType(source);
96503
96506
  auto &result_child_type = ListType::GetChildType(target);
@@ -96608,11 +96611,11 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
96608
96611
  BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
96609
96612
  switch (target.id()) {
96610
96613
  case LogicalTypeId::LIST:
96611
- return BoundCastInfo(ListToListCast, BindListToListCast(input, source, target));
96614
+ return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
96612
96615
  case LogicalTypeId::VARCHAR:
96613
96616
  case LogicalTypeId::JSON:
96614
- return BoundCastInfo(ListToVarcharCast,
96615
- BindListToListCast(input, source, LogicalType::LIST(LogicalType::VARCHAR)));
96617
+ return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
96618
+ input, source, LogicalType::LIST(LogicalType::VARCHAR)));
96616
96619
  default:
96617
96620
  return DefaultCasts::TryVectorNullCast;
96618
96621
  }
@@ -96950,9 +96953,97 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
96950
96953
  }
96951
96954
  }
96952
96955
 
96956
+ bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result, ValidityMask &result_mask,
96957
+ idx_t count, CastParameters &parameters, const SelectionVector *sel) {
96958
+
96959
+ idx_t total_list_size = 0;
96960
+ for (idx_t i = 0; i < count; i++) {
96961
+ idx_t idx = i;
96962
+ if (sel) {
96963
+ idx = sel->get_index(i);
96964
+ }
96965
+ if (!source_mask.RowIsValid(idx)) {
96966
+ continue;
96967
+ }
96968
+ total_list_size += VectorStringifiedListParser::CountParts(source_data[idx]);
96969
+ }
96970
+
96971
+ Vector varchar_vector(LogicalType::VARCHAR, total_list_size);
96972
+
96973
+ ListVector::Reserve(result, total_list_size);
96974
+ ListVector::SetListSize(result, total_list_size);
96975
+
96976
+ auto list_data = ListVector::GetData(result);
96977
+ auto child_data = FlatVector::GetData<string_t>(varchar_vector);
96978
+
96979
+ bool all_converted = true;
96980
+ idx_t total = 0;
96981
+ for (idx_t i = 0; i < count; i++) {
96982
+ idx_t idx = i;
96983
+ if (sel) {
96984
+ idx = sel->get_index(i);
96985
+ }
96986
+ if (!source_mask.RowIsValid(idx)) {
96987
+ result_mask.SetInvalid(i);
96988
+ continue;
96989
+ }
96990
+
96991
+ list_data[i].offset = total;
96992
+ auto valid =
96993
+ VectorStringifiedListParser::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector);
96994
+ if (!valid) {
96995
+ string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
96996
+ "' can't be cast to the destination type LIST";
96997
+ HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
96998
+ }
96999
+ list_data[i].length = total - list_data[i].offset; // length is the amount of parts coming from this string
97000
+ }
97001
+ D_ASSERT(total_list_size == total);
97002
+
97003
+ auto &result_child = ListVector::GetEntry(result);
97004
+ auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
97005
+ CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
97006
+ return cast_data.child_cast_info.function(varchar_vector, result_child, total_list_size, child_parameters) &&
97007
+ all_converted;
97008
+ }
97009
+
97010
+ bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
97011
+ D_ASSERT(source.GetType().id() == LogicalTypeId::VARCHAR);
97012
+ D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
97013
+
97014
+ switch (source.GetVectorType()) {
97015
+ case VectorType::CONSTANT_VECTOR: {
97016
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
97017
+
97018
+ auto source_data = ConstantVector::GetData<string_t>(source);
97019
+ auto &source_mask = ConstantVector::Validity(source);
97020
+ auto &result_mask = ConstantVector::Validity(result);
97021
+
97022
+ return StringListCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
97023
+ }
97024
+ default: {
97025
+ UnifiedVectorFormat unified_source;
97026
+ result.SetVectorType(VectorType::FLAT_VECTOR);
97027
+
97028
+ source.ToUnifiedFormat(count, unified_source);
97029
+ auto source_sel = unified_source.sel;
97030
+ auto source_data = (string_t *)unified_source.data;
97031
+ auto &source_mask = unified_source.validity;
97032
+ auto &result_mask = FlatVector::Validity(result);
97033
+
97034
+ return StringListCastLoop(source_data, source_mask, result, result_mask, count, parameters, source_sel);
97035
+ }
97036
+ }
97037
+ }
97038
+
97039
+ BoundCastInfo StringToListCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
97040
+ // second argument allows for a secondary casting function to be passed in the CastParameters
97041
+ return BoundCastInfo(&StringListCast,
97042
+ ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
97043
+ }
97044
+
96953
97045
  BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const LogicalType &source,
96954
97046
  const LogicalType &target) {
96955
- // now switch on the target type
96956
97047
  switch (target.id()) {
96957
97048
  case LogicalTypeId::DATE:
96958
97049
  return BoundCastInfo(&VectorCastHelpers::TryCastErrorLoop<string_t, date_t, duckdb::TryCastErrorMessage>);
@@ -96980,6 +97071,8 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
96980
97071
  case LogicalTypeId::VARCHAR:
96981
97072
  case LogicalTypeId::JSON:
96982
97073
  return &DefaultCasts::ReinterpretCast;
97074
+ case LogicalTypeId::LIST:
97075
+ return StringToListCast(input, source, target);
96983
97076
  default:
96984
97077
  return VectorStringCastNumericSwitch(input, source, target);
96985
97078
  }
@@ -97341,6 +97434,144 @@ BoundCastInfo DefaultCasts::UUIDCastSwitch(BindCastInput &input, const LogicalTy
97341
97434
  } // namespace duckdb
97342
97435
 
97343
97436
 
97437
+ namespace duckdb {
97438
+
97439
+ struct CountPartOperation {
97440
+ idx_t count = 0;
97441
+
97442
+ void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
97443
+ count++;
97444
+ }
97445
+ };
97446
+
97447
+ struct SplitStringOperation {
97448
+ SplitStringOperation(string_t *child_data, idx_t &child_start, Vector &child)
97449
+ : child_data(child_data), child_start(child_start), child(child) {
97450
+ }
97451
+
97452
+ string_t *child_data;
97453
+ idx_t &child_start;
97454
+ Vector &child;
97455
+
97456
+ void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
97457
+
97458
+ if ((pos - start_pos) >= 4 && buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' &&
97459
+ buf[start_pos + 3] == 'L') {
97460
+ FlatVector::SetNull(child, child_start, true);
97461
+ child_start++;
97462
+ return;
97463
+ }
97464
+ child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
97465
+ child_start++;
97466
+ }
97467
+ };
97468
+
97469
+ static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
97470
+ char quote = buf[pos];
97471
+ pos++;
97472
+
97473
+ while (pos < len) {
97474
+ if (buf[pos] == quote) {
97475
+ return true;
97476
+ }
97477
+ pos++;
97478
+ }
97479
+ return false;
97480
+ }
97481
+
97482
+ static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl) {
97483
+ while (idx < len) {
97484
+ if (buf[idx] == '[') {
97485
+ if (!SkipToClose(++idx, buf, len, lvl)) {
97486
+ return false;
97487
+ }
97488
+ lvl++;
97489
+ idx++;
97490
+ }
97491
+ if (buf[idx] == '"' || buf[idx] == '\'') {
97492
+ SkipToCloseQuotes(idx, buf, len);
97493
+ }
97494
+ if (buf[idx] == ']') {
97495
+ lvl--;
97496
+ return true;
97497
+ }
97498
+ idx++;
97499
+ }
97500
+ return false;
97501
+ }
97502
+
97503
+ template <class OP>
97504
+ static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
97505
+ const char *buf = input.GetDataUnsafe();
97506
+ idx_t len = input.GetSize();
97507
+ idx_t lvl = 1;
97508
+ idx_t pos = 0;
97509
+
97510
+ while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
97511
+ pos++;
97512
+ }
97513
+ if (pos == len || buf[pos] != '[') {
97514
+ return false;
97515
+ }
97516
+ pos++;
97517
+ while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
97518
+ pos++;
97519
+ }
97520
+
97521
+ idx_t start_pos = pos;
97522
+ while (pos < len) {
97523
+ if (buf[pos] == '[') {
97524
+ if (!SkipToClose(++pos, buf, len, ++lvl)) {
97525
+ return false;
97526
+ }
97527
+ } else if (buf[pos] == '"' || buf[pos] == '\'') {
97528
+ SkipToCloseQuotes(pos, buf, len);
97529
+ } else if (buf[pos] == ',' || buf[pos] == ']') {
97530
+ idx_t trailing_whitespace = 0;
97531
+ while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
97532
+ trailing_whitespace++;
97533
+ }
97534
+ if (!(buf[pos] == ']' && start_pos == (pos))) {
97535
+ state.HandleValue(buf, start_pos, pos - trailing_whitespace);
97536
+ } // else the list is empty
97537
+ if (buf[pos] == ']') {
97538
+ lvl--;
97539
+ break;
97540
+ }
97541
+ while (pos + 1 < len && StringUtil::CharacterIsSpace(buf[pos + 1])) {
97542
+ pos++;
97543
+ }
97544
+ start_pos = pos + 1;
97545
+ }
97546
+ pos++;
97547
+ }
97548
+ pos++;
97549
+ while (pos < len) {
97550
+ if (!StringUtil::CharacterIsSpace(buf[pos])) {
97551
+ return false;
97552
+ }
97553
+ pos++;
97554
+ }
97555
+ if (lvl != 0) {
97556
+ return false;
97557
+ }
97558
+ return true;
97559
+ }
97560
+
97561
+ bool VectorStringifiedListParser::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
97562
+ Vector &child) {
97563
+ SplitStringOperation state(child_data, child_start, child);
97564
+ return SplitStringifiedListInternal<SplitStringOperation>(input, state);
97565
+ }
97566
+
97567
+ idx_t VectorStringifiedListParser::CountParts(const string_t &input) {
97568
+ CountPartOperation state;
97569
+ SplitStringifiedListInternal<CountPartOperation>(input, state);
97570
+ return state.count;
97571
+ }
97572
+ } // namespace duckdb
97573
+
97574
+
97344
97575
  namespace duckdb {
97345
97576
 
97346
97577
  //! The target type determines the preferred implicit casts
@@ -146173,12 +146404,11 @@ private:
146173
146404
  //! Find Joins with a DelimGet that can be removed
146174
146405
  void FindCandidates(unique_ptr<LogicalOperator> *op_ptr, vector<unique_ptr<LogicalOperator> *> &candidates);
146175
146406
  //! Try to remove a Join with a DelimGet, returns true if it was successful
146176
- bool RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, DeliminatorPlanUpdater &updater);
146177
- //! Replace references to a removed DelimGet, remove DelimJoins if all their DelimGets are gone
146178
- void UpdatePlan(LogicalOperator &op, expression_map_t<Expression *> &expr_map,
146179
- column_binding_map_t<bool> &projection_map);
146180
- //! Whether the operator has one or more children of type DELIM_GET
146181
- bool HasChildDelimGet(LogicalOperator &op);
146407
+ bool RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146408
+ DeliminatorPlanUpdater &updater);
146409
+ //! Try to remove an inequality Join with a DelimGet, returns true if it was successful
146410
+ bool RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146411
+ DeliminatorPlanUpdater &updater);
146182
146412
  };
146183
146413
 
146184
146414
  } // namespace duckdb
@@ -146202,31 +146432,64 @@ public:
146202
146432
  //! Update the plan after a DelimGet has been removed
146203
146433
  void VisitOperator(LogicalOperator &op) override;
146204
146434
  void VisitExpression(unique_ptr<Expression> *expression) override;
146205
- //! Whether the operator has one or more children of type DELIM_GET
146206
- bool HasChildDelimGet(LogicalOperator &op);
146435
+
146436
+ public:
146207
146437
  expression_map_t<Expression *> expr_map;
146208
146438
  column_binding_map_t<bool> projection_map;
146439
+ column_binding_map_t<Expression *> reverse_proj_or_agg_map;
146209
146440
  unique_ptr<LogicalOperator> temp_ptr;
146210
146441
  };
146211
146442
 
146443
+ static idx_t DelimGetCount(LogicalOperator &op) {
146444
+ if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
146445
+ return 1;
146446
+ }
146447
+ idx_t child_count = 0;
146448
+ for (auto &child : op.children) {
146449
+ child_count += DelimGetCount(*child);
146450
+ }
146451
+ return child_count;
146452
+ }
146453
+
146454
+ static bool IsEqualityJoinCondition(JoinCondition &cond) {
146455
+ switch (cond.comparison) {
146456
+ case ExpressionType::COMPARE_EQUAL:
146457
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
146458
+ return true;
146459
+ default:
146460
+ return false;
146461
+ }
146462
+ }
146463
+
146464
+ static bool InequalityDelimJoinCanBeEliminated(JoinType &join_type) {
146465
+ switch (join_type) {
146466
+ case JoinType::ANTI:
146467
+ case JoinType::MARK:
146468
+ case JoinType::SEMI:
146469
+ case JoinType::SINGLE:
146470
+ return true;
146471
+ default:
146472
+ return false;
146473
+ }
146474
+ }
146475
+
146212
146476
  void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
146213
146477
  VisitOperatorChildren(op);
146214
146478
  VisitOperatorExpressions(op);
146215
- if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN && !HasChildDelimGet(op)) {
146479
+ if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN && DelimGetCount(op) == 0) {
146216
146480
  auto &delim_join = (LogicalDelimJoin &)op;
146217
146481
  auto decs = &delim_join.duplicate_eliminated_columns;
146218
146482
  for (auto &cond : delim_join.conditions) {
146219
- if (cond.comparison != ExpressionType::COMPARE_EQUAL &&
146220
- cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
146483
+ if (!IsEqualityJoinCondition(cond)) {
146221
146484
  continue;
146222
146485
  }
146223
- Expression *rhs = cond.right.get();
146486
+ auto rhs = cond.right.get();
146224
146487
  while (rhs->type == ExpressionType::OPERATOR_CAST) {
146225
146488
  auto &cast = (BoundCastExpression &)*rhs;
146226
146489
  rhs = cast.child.get();
146227
146490
  }
146228
146491
  if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
146229
- throw InternalException("Erorr in deliminator: expected a bound column reference");
146492
+ throw InternalException("Error in Deliminator: expected a bound column reference");
146230
146493
  }
146231
146494
  auto &colref = (BoundColumnRefExpression &)*rhs;
146232
146495
  if (projection_map.find(colref.binding) != projection_map.end()) {
@@ -146257,25 +146520,13 @@ void DeliminatorPlanUpdater::VisitExpression(unique_ptr<Expression> *expression)
146257
146520
  }
146258
146521
  }
146259
146522
 
146260
- bool DeliminatorPlanUpdater::HasChildDelimGet(LogicalOperator &op) {
146261
- if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
146262
- return true;
146263
- }
146264
- for (auto &child : op.children) {
146265
- if (HasChildDelimGet(*child)) {
146266
- return true;
146267
- }
146268
- }
146269
- return false;
146270
- }
146271
-
146272
146523
  unique_ptr<LogicalOperator> Deliminator::Optimize(unique_ptr<LogicalOperator> op) {
146273
146524
  vector<unique_ptr<LogicalOperator> *> candidates;
146274
146525
  FindCandidates(&op, candidates);
146275
146526
 
146276
- for (auto candidate : candidates) {
146527
+ for (auto &candidate : candidates) {
146277
146528
  DeliminatorPlanUpdater updater;
146278
- if (RemoveCandidate(candidate, updater)) {
146529
+ if (RemoveCandidate(&op, candidate, updater)) {
146279
146530
  updater.VisitOperator(*op);
146280
146531
  }
146281
146532
  }
@@ -146330,10 +146581,21 @@ static bool OperatorIsDelimGet(LogicalOperator &op) {
146330
146581
  return false;
146331
146582
  }
146332
146583
 
146333
- bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, DeliminatorPlanUpdater &updater) {
146334
- auto &proj_or_agg = **op_ptr;
146584
+ static bool ChildJoinTypeCanBeDeliminated(JoinType &join_type) {
146585
+ switch (join_type) {
146586
+ case JoinType::INNER:
146587
+ case JoinType::SEMI:
146588
+ return true;
146589
+ default:
146590
+ return false;
146591
+ }
146592
+ }
146593
+
146594
+ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146595
+ DeliminatorPlanUpdater &updater) {
146596
+ auto &proj_or_agg = **candidate;
146335
146597
  auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
146336
- if (join.join_type != JoinType::INNER && join.join_type != JoinType::SEMI) {
146598
+ if (!ChildJoinTypeCanBeDeliminated(join.join_type)) {
146337
146599
  return false;
146338
146600
  }
146339
146601
 
@@ -146351,13 +146613,10 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146351
146613
  return false;
146352
146614
  }
146353
146615
  // check if joining with the DelimGet is redundant, and collect relevant column information
146616
+ bool all_equality_conditions = true;
146354
146617
  vector<Expression *> nulls_are_not_equal_exprs;
146355
146618
  for (auto &cond : join.conditions) {
146356
- if (cond.comparison != ExpressionType::COMPARE_EQUAL &&
146357
- cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
146358
- // non-equality join condition
146359
- return false;
146360
- }
146619
+ all_equality_conditions = all_equality_conditions && IsEqualityJoinCondition(cond);
146361
146620
  auto delim_side = delim_idx == 0 ? cond.left.get() : cond.right.get();
146362
146621
  auto other_side = delim_idx == 0 ? cond.right.get() : cond.left.get();
146363
146622
  if (delim_side->type != ExpressionType::BOUND_COLUMN_REF) {
@@ -146370,10 +146629,12 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146370
146629
  nulls_are_not_equal_exprs.push_back(other_side);
146371
146630
  }
146372
146631
  }
146632
+
146373
146633
  // removed DelimGet columns are assigned a new ColumnBinding by Projection/Aggregation, keep track here
146374
146634
  if (proj_or_agg.type == LogicalOperatorType::LOGICAL_PROJECTION) {
146375
146635
  for (auto &cb : proj_or_agg.GetColumnBindings()) {
146376
146636
  updater.projection_map[cb] = true;
146637
+ updater.reverse_proj_or_agg_map[cb] = proj_or_agg.expressions[cb.column_index].get();
146377
146638
  for (auto &expr : nulls_are_not_equal_exprs) {
146378
146639
  if (proj_or_agg.expressions[cb.column_index]->Equals(expr)) {
146379
146640
  updater.projection_map[cb] = false;
@@ -146383,8 +146644,19 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146383
146644
  }
146384
146645
  } else {
146385
146646
  auto &agg = (LogicalAggregate &)proj_or_agg;
146647
+
146648
+ // Create a vector of all exprs in the agg
146649
+ vector<Expression *> all_agg_exprs;
146650
+ for (auto &expr : agg.groups) {
146651
+ all_agg_exprs.push_back(expr.get());
146652
+ }
146653
+ for (auto &expr : agg.expressions) {
146654
+ all_agg_exprs.push_back(expr.get());
146655
+ }
146656
+
146386
146657
  for (auto &cb : agg.GetColumnBindings()) {
146387
146658
  updater.projection_map[cb] = true;
146659
+ updater.reverse_proj_or_agg_map[cb] = all_agg_exprs[cb.column_index];
146388
146660
  for (auto &expr : nulls_are_not_equal_exprs) {
146389
146661
  if ((cb.table_index == agg.group_index && agg.groups[cb.column_index]->Equals(expr)) ||
146390
146662
  (cb.table_index == agg.aggregate_index && agg.expressions[cb.column_index]->Equals(expr))) {
@@ -146394,6 +146666,14 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146394
146666
  }
146395
146667
  }
146396
146668
  }
146669
+
146670
+ if (!all_equality_conditions) {
146671
+ // we can get rid of an inequality join with a DelimGet, but only under specific circumstances
146672
+ if (!RemoveInequalityCandidate(plan, candidate, updater)) {
146673
+ return false;
146674
+ }
146675
+ }
146676
+
146397
146677
  // make a filter if needed
146398
146678
  if (!nulls_are_not_equal_exprs.empty() || filter != nullptr) {
146399
146679
  auto filter_op = make_unique<LogicalFilter>();
@@ -146421,6 +146701,146 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146421
146701
  return true;
146422
146702
  }
146423
146703
 
146704
+ static void GetDelimJoins(LogicalOperator &op, vector<LogicalOperator *> &delim_joins) {
146705
+ for (auto &child : op.children) {
146706
+ GetDelimJoins(*child, delim_joins);
146707
+ }
146708
+ if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
146709
+ delim_joins.push_back(&op);
146710
+ }
146711
+ }
146712
+
146713
+ static bool HasChild(LogicalOperator *haystack, LogicalOperator *needle, idx_t &side) {
146714
+ if (haystack == needle) {
146715
+ return true;
146716
+ }
146717
+ for (idx_t i = 0; i < haystack->children.size(); i++) {
146718
+ auto &child = haystack->children[i];
146719
+ idx_t dummy_side;
146720
+ if (HasChild(child.get(), needle, dummy_side)) {
146721
+ side = i;
146722
+ return true;
146723
+ }
146724
+ }
146725
+ return false;
146726
+ }
146727
+
146728
+ bool Deliminator::RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146729
+ DeliminatorPlanUpdater &updater) {
146730
+ auto &proj_or_agg = **candidate;
146731
+ // first, we find a DelimJoin in "plan" that has only one DelimGet as a child, which is in "candidate"
146732
+ if (DelimGetCount(proj_or_agg) != 1) {
146733
+ // the candidate therefore must have only a single DelimGet in its children
146734
+ return false;
146735
+ }
146736
+
146737
+ vector<LogicalOperator *> delim_joins;
146738
+ GetDelimJoins(**plan, delim_joins);
146739
+
146740
+ LogicalOperator *parent = nullptr;
146741
+ idx_t parent_delim_get_side;
146742
+ for (auto dj : delim_joins) {
146743
+ D_ASSERT(dj->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
146744
+ if (!HasChild(dj, &proj_or_agg, parent_delim_get_side)) {
146745
+ continue;
146746
+ }
146747
+ // we found a parent DelimJoin
146748
+ if (DelimGetCount(*dj) != 1) {
146749
+ // it has more than one DelimGet children
146750
+ continue;
146751
+ }
146752
+
146753
+ // we can only remove inequality join with a DelimGet if the parent DelimJoin has one of these join types
146754
+ auto &delim_join = (LogicalDelimJoin &)*dj;
146755
+ if (!InequalityDelimJoinCanBeEliminated(delim_join.join_type)) {
146756
+ continue;
146757
+ }
146758
+
146759
+ parent = dj;
146760
+ break;
146761
+ }
146762
+ if (!parent) {
146763
+ return false;
146764
+ }
146765
+
146766
+ // we found the parent delim join, and we may be able to remove the child DelimGet join
146767
+ // but we need to make sure that their conditions refer to exactly the same columns
146768
+ auto &parent_delim_join = (LogicalDelimJoin &)*parent;
146769
+ auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
146770
+ if (parent_delim_join.conditions.size() != join.conditions.size()) {
146771
+ // different number of conditions, can't replace
146772
+ return false;
146773
+ }
146774
+
146775
+ // we can only do this optimization under the following conditions:
146776
+ // 1. all join expressions coming from the DelimGet side are colrefs
146777
+ // 2. these expressions refer to colrefs coming from the proj/agg on top of the child DelimGet join
146778
+ // 3. the expression (before it was proj/agg) can be found in the conditions of the child DelimGet join
146779
+ for (auto &parent_cond : parent_delim_join.conditions) {
146780
+ auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
146781
+ if (parent_expr->type != ExpressionType::BOUND_COLUMN_REF) {
146782
+ // can only deal with colrefs
146783
+ return false;
146784
+ }
146785
+ auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
146786
+ auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
146787
+ if (it == updater.reverse_proj_or_agg_map.end()) {
146788
+ // refers to a column that was not in the child DelimGet join
146789
+ return false;
146790
+ }
146791
+ // try to find the corresponding child condition
146792
+ // TODO: can be more flexible - allow CAST
146793
+ auto child_expr = it->second;
146794
+ bool found = false;
146795
+ for (auto &child_cond : join.conditions) {
146796
+ if (child_cond.left->Equals(child_expr) || child_cond.right->Equals(child_expr)) {
146797
+ found = true;
146798
+ break;
146799
+ }
146800
+ }
146801
+ if (!found) {
146802
+ // could not find the mapped expression in the child condition expressions
146803
+ return false;
146804
+ }
146805
+ }
146806
+
146807
+ // TODO: we cannot perform the optimization here because our pure inequality joins don't implement
146808
+ // JoinType::SINGLE yet
146809
+ if (parent_delim_join.join_type == JoinType::SINGLE) {
146810
+ bool has_one_equality = false;
146811
+ for (auto &cond : join.conditions) {
146812
+ has_one_equality = has_one_equality || IsEqualityJoinCondition(cond);
146813
+ }
146814
+ if (!has_one_equality) {
146815
+ return false;
146816
+ }
146817
+ }
146818
+
146819
+ // we are now sure that we can remove the child DelimGet join, so we basically do the same loop as above
146820
+ // this time without checks because we already did them, and replace the expressions
146821
+ for (auto &parent_cond : parent_delim_join.conditions) {
146822
+ auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
146823
+ auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
146824
+ auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
146825
+ auto child_expr = it->second;
146826
+ for (auto &child_cond : join.conditions) {
146827
+ if (!child_cond.left->Equals(child_expr) && !child_cond.right->Equals(child_expr)) {
146828
+ continue;
146829
+ }
146830
+ parent_expr =
146831
+ make_unique<BoundColumnRefExpression>(parent_expr->alias, parent_expr->return_type, it->first);
146832
+ parent_cond.comparison = child_cond.comparison;
146833
+ break;
146834
+ }
146835
+ }
146836
+
146837
+ // no longer needs to be a delim join
146838
+ parent_delim_join.duplicate_eliminated_columns.clear();
146839
+ parent_delim_join.type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN;
146840
+
146841
+ return true;
146842
+ }
146843
+
146424
146844
  } // namespace duckdb
146425
146845
 
146426
146846