duckdb 0.5.2-dev833.0 → 0.5.2-dev841.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev833.0",
4
+ "version": "0.5.2-dev841.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -146404,12 +146404,11 @@ private:
146404
146404
  //! Find Joins with a DelimGet that can be removed
146405
146405
  void FindCandidates(unique_ptr<LogicalOperator> *op_ptr, vector<unique_ptr<LogicalOperator> *> &candidates);
146406
146406
  //! Try to remove a Join with a DelimGet, returns true if it was successful
146407
- bool RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, DeliminatorPlanUpdater &updater);
146408
- //! Replace references to a removed DelimGet, remove DelimJoins if all their DelimGets are gone
146409
- void UpdatePlan(LogicalOperator &op, expression_map_t<Expression *> &expr_map,
146410
- column_binding_map_t<bool> &projection_map);
146411
- //! Whether the operator has one or more children of type DELIM_GET
146412
- bool HasChildDelimGet(LogicalOperator &op);
146407
+ bool RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146408
+ DeliminatorPlanUpdater &updater);
146409
+ //! Try to remove an inequality Join with a DelimGet, returns true if it was successful
146410
+ bool RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146411
+ DeliminatorPlanUpdater &updater);
146413
146412
  };
146414
146413
 
146415
146414
  } // namespace duckdb
@@ -146433,31 +146432,64 @@ public:
146433
146432
  //! Update the plan after a DelimGet has been removed
146434
146433
  void VisitOperator(LogicalOperator &op) override;
146435
146434
  void VisitExpression(unique_ptr<Expression> *expression) override;
146436
- //! Whether the operator has one or more children of type DELIM_GET
146437
- bool HasChildDelimGet(LogicalOperator &op);
146435
+
146436
+ public:
146438
146437
  expression_map_t<Expression *> expr_map;
146439
146438
  column_binding_map_t<bool> projection_map;
146439
+ column_binding_map_t<Expression *> reverse_proj_or_agg_map;
146440
146440
  unique_ptr<LogicalOperator> temp_ptr;
146441
146441
  };
146442
146442
 
146443
+ static idx_t DelimGetCount(LogicalOperator &op) {
146444
+ if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
146445
+ return 1;
146446
+ }
146447
+ idx_t child_count = 0;
146448
+ for (auto &child : op.children) {
146449
+ child_count += DelimGetCount(*child);
146450
+ }
146451
+ return child_count;
146452
+ }
146453
+
146454
+ static bool IsEqualityJoinCondition(JoinCondition &cond) {
146455
+ switch (cond.comparison) {
146456
+ case ExpressionType::COMPARE_EQUAL:
146457
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
146458
+ return true;
146459
+ default:
146460
+ return false;
146461
+ }
146462
+ }
146463
+
146464
+ static bool InequalityDelimJoinCanBeEliminated(JoinType &join_type) {
146465
+ switch (join_type) {
146466
+ case JoinType::ANTI:
146467
+ case JoinType::MARK:
146468
+ case JoinType::SEMI:
146469
+ case JoinType::SINGLE:
146470
+ return true;
146471
+ default:
146472
+ return false;
146473
+ }
146474
+ }
146475
+
146443
146476
  void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
146444
146477
  VisitOperatorChildren(op);
146445
146478
  VisitOperatorExpressions(op);
146446
- if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN && !HasChildDelimGet(op)) {
146479
+ if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN && DelimGetCount(op) == 0) {
146447
146480
  auto &delim_join = (LogicalDelimJoin &)op;
146448
146481
  auto decs = &delim_join.duplicate_eliminated_columns;
146449
146482
  for (auto &cond : delim_join.conditions) {
146450
- if (cond.comparison != ExpressionType::COMPARE_EQUAL &&
146451
- cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
146483
+ if (!IsEqualityJoinCondition(cond)) {
146452
146484
  continue;
146453
146485
  }
146454
- Expression *rhs = cond.right.get();
146486
+ auto rhs = cond.right.get();
146455
146487
  while (rhs->type == ExpressionType::OPERATOR_CAST) {
146456
146488
  auto &cast = (BoundCastExpression &)*rhs;
146457
146489
  rhs = cast.child.get();
146458
146490
  }
146459
146491
  if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
146460
- throw InternalException("Erorr in deliminator: expected a bound column reference");
146492
+ throw InternalException("Error in Deliminator: expected a bound column reference");
146461
146493
  }
146462
146494
  auto &colref = (BoundColumnRefExpression &)*rhs;
146463
146495
  if (projection_map.find(colref.binding) != projection_map.end()) {
@@ -146488,25 +146520,13 @@ void DeliminatorPlanUpdater::VisitExpression(unique_ptr<Expression> *expression)
146488
146520
  }
146489
146521
  }
146490
146522
 
146491
- bool DeliminatorPlanUpdater::HasChildDelimGet(LogicalOperator &op) {
146492
- if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
146493
- return true;
146494
- }
146495
- for (auto &child : op.children) {
146496
- if (HasChildDelimGet(*child)) {
146497
- return true;
146498
- }
146499
- }
146500
- return false;
146501
- }
146502
-
146503
146523
  unique_ptr<LogicalOperator> Deliminator::Optimize(unique_ptr<LogicalOperator> op) {
146504
146524
  vector<unique_ptr<LogicalOperator> *> candidates;
146505
146525
  FindCandidates(&op, candidates);
146506
146526
 
146507
- for (auto candidate : candidates) {
146527
+ for (auto &candidate : candidates) {
146508
146528
  DeliminatorPlanUpdater updater;
146509
- if (RemoveCandidate(candidate, updater)) {
146529
+ if (RemoveCandidate(&op, candidate, updater)) {
146510
146530
  updater.VisitOperator(*op);
146511
146531
  }
146512
146532
  }
@@ -146561,10 +146581,21 @@ static bool OperatorIsDelimGet(LogicalOperator &op) {
146561
146581
  return false;
146562
146582
  }
146563
146583
 
146564
- bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, DeliminatorPlanUpdater &updater) {
146565
- auto &proj_or_agg = **op_ptr;
146584
+ static bool ChildJoinTypeCanBeDeliminated(JoinType &join_type) {
146585
+ switch (join_type) {
146586
+ case JoinType::INNER:
146587
+ case JoinType::SEMI:
146588
+ return true;
146589
+ default:
146590
+ return false;
146591
+ }
146592
+ }
146593
+
146594
+ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146595
+ DeliminatorPlanUpdater &updater) {
146596
+ auto &proj_or_agg = **candidate;
146566
146597
  auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
146567
- if (join.join_type != JoinType::INNER && join.join_type != JoinType::SEMI) {
146598
+ if (!ChildJoinTypeCanBeDeliminated(join.join_type)) {
146568
146599
  return false;
146569
146600
  }
146570
146601
 
@@ -146582,13 +146613,10 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146582
146613
  return false;
146583
146614
  }
146584
146615
  // check if joining with the DelimGet is redundant, and collect relevant column information
146616
+ bool all_equality_conditions = true;
146585
146617
  vector<Expression *> nulls_are_not_equal_exprs;
146586
146618
  for (auto &cond : join.conditions) {
146587
- if (cond.comparison != ExpressionType::COMPARE_EQUAL &&
146588
- cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
146589
- // non-equality join condition
146590
- return false;
146591
- }
146619
+ all_equality_conditions = all_equality_conditions && IsEqualityJoinCondition(cond);
146592
146620
  auto delim_side = delim_idx == 0 ? cond.left.get() : cond.right.get();
146593
146621
  auto other_side = delim_idx == 0 ? cond.right.get() : cond.left.get();
146594
146622
  if (delim_side->type != ExpressionType::BOUND_COLUMN_REF) {
@@ -146601,10 +146629,12 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146601
146629
  nulls_are_not_equal_exprs.push_back(other_side);
146602
146630
  }
146603
146631
  }
146632
+
146604
146633
  // removed DelimGet columns are assigned a new ColumnBinding by Projection/Aggregation, keep track here
146605
146634
  if (proj_or_agg.type == LogicalOperatorType::LOGICAL_PROJECTION) {
146606
146635
  for (auto &cb : proj_or_agg.GetColumnBindings()) {
146607
146636
  updater.projection_map[cb] = true;
146637
+ updater.reverse_proj_or_agg_map[cb] = proj_or_agg.expressions[cb.column_index].get();
146608
146638
  for (auto &expr : nulls_are_not_equal_exprs) {
146609
146639
  if (proj_or_agg.expressions[cb.column_index]->Equals(expr)) {
146610
146640
  updater.projection_map[cb] = false;
@@ -146614,8 +146644,19 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146614
146644
  }
146615
146645
  } else {
146616
146646
  auto &agg = (LogicalAggregate &)proj_or_agg;
146647
+
146648
+ // Create a vector of all exprs in the agg
146649
+ vector<Expression *> all_agg_exprs;
146650
+ for (auto &expr : agg.groups) {
146651
+ all_agg_exprs.push_back(expr.get());
146652
+ }
146653
+ for (auto &expr : agg.expressions) {
146654
+ all_agg_exprs.push_back(expr.get());
146655
+ }
146656
+
146617
146657
  for (auto &cb : agg.GetColumnBindings()) {
146618
146658
  updater.projection_map[cb] = true;
146659
+ updater.reverse_proj_or_agg_map[cb] = all_agg_exprs[cb.column_index];
146619
146660
  for (auto &expr : nulls_are_not_equal_exprs) {
146620
146661
  if ((cb.table_index == agg.group_index && agg.groups[cb.column_index]->Equals(expr)) ||
146621
146662
  (cb.table_index == agg.aggregate_index && agg.expressions[cb.column_index]->Equals(expr))) {
@@ -146625,6 +146666,14 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146625
146666
  }
146626
146667
  }
146627
146668
  }
146669
+
146670
+ if (!all_equality_conditions) {
146671
+ // we can get rid of an inequality join with a DelimGet, but only under specific circumstances
146672
+ if (!RemoveInequalityCandidate(plan, candidate, updater)) {
146673
+ return false;
146674
+ }
146675
+ }
146676
+
146628
146677
  // make a filter if needed
146629
146678
  if (!nulls_are_not_equal_exprs.empty() || filter != nullptr) {
146630
146679
  auto filter_op = make_unique<LogicalFilter>();
@@ -146652,6 +146701,146 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
146652
146701
  return true;
146653
146702
  }
146654
146703
 
146704
+ static void GetDelimJoins(LogicalOperator &op, vector<LogicalOperator *> &delim_joins) {
146705
+ for (auto &child : op.children) {
146706
+ GetDelimJoins(*child, delim_joins);
146707
+ }
146708
+ if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
146709
+ delim_joins.push_back(&op);
146710
+ }
146711
+ }
146712
+
146713
+ static bool HasChild(LogicalOperator *haystack, LogicalOperator *needle, idx_t &side) {
146714
+ if (haystack == needle) {
146715
+ return true;
146716
+ }
146717
+ for (idx_t i = 0; i < haystack->children.size(); i++) {
146718
+ auto &child = haystack->children[i];
146719
+ idx_t dummy_side;
146720
+ if (HasChild(child.get(), needle, dummy_side)) {
146721
+ side = i;
146722
+ return true;
146723
+ }
146724
+ }
146725
+ return false;
146726
+ }
146727
+
146728
+ bool Deliminator::RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
146729
+ DeliminatorPlanUpdater &updater) {
146730
+ auto &proj_or_agg = **candidate;
146731
+ // first, we find a DelimJoin in "plan" that has only one DelimGet as a child, which is in "candidate"
146732
+ if (DelimGetCount(proj_or_agg) != 1) {
146733
+ // the candidate therefore must have only a single DelimGet in its children
146734
+ return false;
146735
+ }
146736
+
146737
+ vector<LogicalOperator *> delim_joins;
146738
+ GetDelimJoins(**plan, delim_joins);
146739
+
146740
+ LogicalOperator *parent = nullptr;
146741
+ idx_t parent_delim_get_side;
146742
+ for (auto dj : delim_joins) {
146743
+ D_ASSERT(dj->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
146744
+ if (!HasChild(dj, &proj_or_agg, parent_delim_get_side)) {
146745
+ continue;
146746
+ }
146747
+ // we found a parent DelimJoin
146748
+ if (DelimGetCount(*dj) != 1) {
146749
+ // it has more than one DelimGet children
146750
+ continue;
146751
+ }
146752
+
146753
+ // we can only remove inequality join with a DelimGet if the parent DelimJoin has one of these join types
146754
+ auto &delim_join = (LogicalDelimJoin &)*dj;
146755
+ if (!InequalityDelimJoinCanBeEliminated(delim_join.join_type)) {
146756
+ continue;
146757
+ }
146758
+
146759
+ parent = dj;
146760
+ break;
146761
+ }
146762
+ if (!parent) {
146763
+ return false;
146764
+ }
146765
+
146766
+ // we found the parent delim join, and we may be able to remove the child DelimGet join
146767
+ // but we need to make sure that their conditions refer to exactly the same columns
146768
+ auto &parent_delim_join = (LogicalDelimJoin &)*parent;
146769
+ auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
146770
+ if (parent_delim_join.conditions.size() != join.conditions.size()) {
146771
+ // different number of conditions, can't replace
146772
+ return false;
146773
+ }
146774
+
146775
+ // we can only do this optimization under the following conditions:
146776
+ // 1. all join expressions coming from the DelimGet side are colrefs
146777
+ // 2. these expressions refer to colrefs coming from the proj/agg on top of the child DelimGet join
146778
+ // 3. the expression (before it was proj/agg) can be found in the conditions of the child DelimGet join
146779
+ for (auto &parent_cond : parent_delim_join.conditions) {
146780
+ auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
146781
+ if (parent_expr->type != ExpressionType::BOUND_COLUMN_REF) {
146782
+ // can only deal with colrefs
146783
+ return false;
146784
+ }
146785
+ auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
146786
+ auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
146787
+ if (it == updater.reverse_proj_or_agg_map.end()) {
146788
+ // refers to a column that was not in the child DelimGet join
146789
+ return false;
146790
+ }
146791
+ // try to find the corresponding child condition
146792
+ // TODO: can be more flexible - allow CAST
146793
+ auto child_expr = it->second;
146794
+ bool found = false;
146795
+ for (auto &child_cond : join.conditions) {
146796
+ if (child_cond.left->Equals(child_expr) || child_cond.right->Equals(child_expr)) {
146797
+ found = true;
146798
+ break;
146799
+ }
146800
+ }
146801
+ if (!found) {
146802
+ // could not find the mapped expression in the child condition expressions
146803
+ return false;
146804
+ }
146805
+ }
146806
+
146807
+ // TODO: we cannot perform the optimization here because our pure inequality joins don't implement
146808
+ // JoinType::SINGLE yet
146809
+ if (parent_delim_join.join_type == JoinType::SINGLE) {
146810
+ bool has_one_equality = false;
146811
+ for (auto &cond : join.conditions) {
146812
+ has_one_equality = has_one_equality || IsEqualityJoinCondition(cond);
146813
+ }
146814
+ if (!has_one_equality) {
146815
+ return false;
146816
+ }
146817
+ }
146818
+
146819
+ // we are now sure that we can remove the child DelimGet join, so we basically do the same loop as above
146820
+ // this time without checks because we already did them, and replace the expressions
146821
+ for (auto &parent_cond : parent_delim_join.conditions) {
146822
+ auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
146823
+ auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
146824
+ auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
146825
+ auto child_expr = it->second;
146826
+ for (auto &child_cond : join.conditions) {
146827
+ if (!child_cond.left->Equals(child_expr) && !child_cond.right->Equals(child_expr)) {
146828
+ continue;
146829
+ }
146830
+ parent_expr =
146831
+ make_unique<BoundColumnRefExpression>(parent_expr->alias, parent_expr->return_type, it->first);
146832
+ parent_cond.comparison = child_cond.comparison;
146833
+ break;
146834
+ }
146835
+ }
146836
+
146837
+ // no longer needs to be a delim join
146838
+ parent_delim_join.duplicate_eliminated_columns.clear();
146839
+ parent_delim_join.type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN;
146840
+
146841
+ return true;
146842
+ }
146843
+
146655
146844
  } // namespace duckdb
146656
146845
 
146657
146846
 
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "788ebe9a0"
15
- #define DUCKDB_VERSION "v0.5.2-dev833"
14
+ #define DUCKDB_SOURCE_ID "c2ca23eef"
15
+ #define DUCKDB_VERSION "v0.5.2-dev841"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //