duckdb 0.5.2-dev833.0 → 0.5.2-dev843.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +224 -35
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +37468 -37468
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -146404,12 +146404,11 @@ private:
|
|
|
146404
146404
|
//! Find Joins with a DelimGet that can be removed
|
|
146405
146405
|
void FindCandidates(unique_ptr<LogicalOperator> *op_ptr, vector<unique_ptr<LogicalOperator> *> &candidates);
|
|
146406
146406
|
//! Try to remove a Join with a DelimGet, returns true if it was successful
|
|
146407
|
-
bool RemoveCandidate(unique_ptr<LogicalOperator> *
|
|
146408
|
-
|
|
146409
|
-
|
|
146410
|
-
|
|
146411
|
-
|
|
146412
|
-
bool HasChildDelimGet(LogicalOperator &op);
|
|
146407
|
+
bool RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146408
|
+
DeliminatorPlanUpdater &updater);
|
|
146409
|
+
//! Try to remove an inequality Join with a DelimGet, returns true if it was successful
|
|
146410
|
+
bool RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146411
|
+
DeliminatorPlanUpdater &updater);
|
|
146413
146412
|
};
|
|
146414
146413
|
|
|
146415
146414
|
} // namespace duckdb
|
|
@@ -146433,31 +146432,64 @@ public:
|
|
|
146433
146432
|
//! Update the plan after a DelimGet has been removed
|
|
146434
146433
|
void VisitOperator(LogicalOperator &op) override;
|
|
146435
146434
|
void VisitExpression(unique_ptr<Expression> *expression) override;
|
|
146436
|
-
|
|
146437
|
-
|
|
146435
|
+
|
|
146436
|
+
public:
|
|
146438
146437
|
expression_map_t<Expression *> expr_map;
|
|
146439
146438
|
column_binding_map_t<bool> projection_map;
|
|
146439
|
+
column_binding_map_t<Expression *> reverse_proj_or_agg_map;
|
|
146440
146440
|
unique_ptr<LogicalOperator> temp_ptr;
|
|
146441
146441
|
};
|
|
146442
146442
|
|
|
146443
|
+
static idx_t DelimGetCount(LogicalOperator &op) {
|
|
146444
|
+
if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
|
|
146445
|
+
return 1;
|
|
146446
|
+
}
|
|
146447
|
+
idx_t child_count = 0;
|
|
146448
|
+
for (auto &child : op.children) {
|
|
146449
|
+
child_count += DelimGetCount(*child);
|
|
146450
|
+
}
|
|
146451
|
+
return child_count;
|
|
146452
|
+
}
|
|
146453
|
+
|
|
146454
|
+
static bool IsEqualityJoinCondition(JoinCondition &cond) {
|
|
146455
|
+
switch (cond.comparison) {
|
|
146456
|
+
case ExpressionType::COMPARE_EQUAL:
|
|
146457
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
|
146458
|
+
return true;
|
|
146459
|
+
default:
|
|
146460
|
+
return false;
|
|
146461
|
+
}
|
|
146462
|
+
}
|
|
146463
|
+
|
|
146464
|
+
static bool InequalityDelimJoinCanBeEliminated(JoinType &join_type) {
|
|
146465
|
+
switch (join_type) {
|
|
146466
|
+
case JoinType::ANTI:
|
|
146467
|
+
case JoinType::MARK:
|
|
146468
|
+
case JoinType::SEMI:
|
|
146469
|
+
case JoinType::SINGLE:
|
|
146470
|
+
return true;
|
|
146471
|
+
default:
|
|
146472
|
+
return false;
|
|
146473
|
+
}
|
|
146474
|
+
}
|
|
146475
|
+
|
|
146443
146476
|
void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
|
|
146444
146477
|
VisitOperatorChildren(op);
|
|
146445
146478
|
VisitOperatorExpressions(op);
|
|
146446
|
-
if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN &&
|
|
146479
|
+
if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN && DelimGetCount(op) == 0) {
|
|
146447
146480
|
auto &delim_join = (LogicalDelimJoin &)op;
|
|
146448
146481
|
auto decs = &delim_join.duplicate_eliminated_columns;
|
|
146449
146482
|
for (auto &cond : delim_join.conditions) {
|
|
146450
|
-
if (cond
|
|
146451
|
-
cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
|
146483
|
+
if (!IsEqualityJoinCondition(cond)) {
|
|
146452
146484
|
continue;
|
|
146453
146485
|
}
|
|
146454
|
-
|
|
146486
|
+
auto rhs = cond.right.get();
|
|
146455
146487
|
while (rhs->type == ExpressionType::OPERATOR_CAST) {
|
|
146456
146488
|
auto &cast = (BoundCastExpression &)*rhs;
|
|
146457
146489
|
rhs = cast.child.get();
|
|
146458
146490
|
}
|
|
146459
146491
|
if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
146460
|
-
throw InternalException("
|
|
146492
|
+
throw InternalException("Error in Deliminator: expected a bound column reference");
|
|
146461
146493
|
}
|
|
146462
146494
|
auto &colref = (BoundColumnRefExpression &)*rhs;
|
|
146463
146495
|
if (projection_map.find(colref.binding) != projection_map.end()) {
|
|
@@ -146488,25 +146520,13 @@ void DeliminatorPlanUpdater::VisitExpression(unique_ptr<Expression> *expression)
|
|
|
146488
146520
|
}
|
|
146489
146521
|
}
|
|
146490
146522
|
|
|
146491
|
-
bool DeliminatorPlanUpdater::HasChildDelimGet(LogicalOperator &op) {
|
|
146492
|
-
if (op.type == LogicalOperatorType::LOGICAL_DELIM_GET) {
|
|
146493
|
-
return true;
|
|
146494
|
-
}
|
|
146495
|
-
for (auto &child : op.children) {
|
|
146496
|
-
if (HasChildDelimGet(*child)) {
|
|
146497
|
-
return true;
|
|
146498
|
-
}
|
|
146499
|
-
}
|
|
146500
|
-
return false;
|
|
146501
|
-
}
|
|
146502
|
-
|
|
146503
146523
|
unique_ptr<LogicalOperator> Deliminator::Optimize(unique_ptr<LogicalOperator> op) {
|
|
146504
146524
|
vector<unique_ptr<LogicalOperator> *> candidates;
|
|
146505
146525
|
FindCandidates(&op, candidates);
|
|
146506
146526
|
|
|
146507
|
-
for (auto candidate : candidates) {
|
|
146527
|
+
for (auto &candidate : candidates) {
|
|
146508
146528
|
DeliminatorPlanUpdater updater;
|
|
146509
|
-
if (RemoveCandidate(candidate, updater)) {
|
|
146529
|
+
if (RemoveCandidate(&op, candidate, updater)) {
|
|
146510
146530
|
updater.VisitOperator(*op);
|
|
146511
146531
|
}
|
|
146512
146532
|
}
|
|
@@ -146561,10 +146581,21 @@ static bool OperatorIsDelimGet(LogicalOperator &op) {
|
|
|
146561
146581
|
return false;
|
|
146562
146582
|
}
|
|
146563
146583
|
|
|
146564
|
-
bool
|
|
146565
|
-
|
|
146584
|
+
static bool ChildJoinTypeCanBeDeliminated(JoinType &join_type) {
|
|
146585
|
+
switch (join_type) {
|
|
146586
|
+
case JoinType::INNER:
|
|
146587
|
+
case JoinType::SEMI:
|
|
146588
|
+
return true;
|
|
146589
|
+
default:
|
|
146590
|
+
return false;
|
|
146591
|
+
}
|
|
146592
|
+
}
|
|
146593
|
+
|
|
146594
|
+
bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146595
|
+
DeliminatorPlanUpdater &updater) {
|
|
146596
|
+
auto &proj_or_agg = **candidate;
|
|
146566
146597
|
auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
|
|
146567
|
-
if (join.join_type
|
|
146598
|
+
if (!ChildJoinTypeCanBeDeliminated(join.join_type)) {
|
|
146568
146599
|
return false;
|
|
146569
146600
|
}
|
|
146570
146601
|
|
|
@@ -146582,13 +146613,10 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146582
146613
|
return false;
|
|
146583
146614
|
}
|
|
146584
146615
|
// check if joining with the DelimGet is redundant, and collect relevant column information
|
|
146616
|
+
bool all_equality_conditions = true;
|
|
146585
146617
|
vector<Expression *> nulls_are_not_equal_exprs;
|
|
146586
146618
|
for (auto &cond : join.conditions) {
|
|
146587
|
-
|
|
146588
|
-
cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
|
146589
|
-
// non-equality join condition
|
|
146590
|
-
return false;
|
|
146591
|
-
}
|
|
146619
|
+
all_equality_conditions = all_equality_conditions && IsEqualityJoinCondition(cond);
|
|
146592
146620
|
auto delim_side = delim_idx == 0 ? cond.left.get() : cond.right.get();
|
|
146593
146621
|
auto other_side = delim_idx == 0 ? cond.right.get() : cond.left.get();
|
|
146594
146622
|
if (delim_side->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
@@ -146601,10 +146629,12 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146601
146629
|
nulls_are_not_equal_exprs.push_back(other_side);
|
|
146602
146630
|
}
|
|
146603
146631
|
}
|
|
146632
|
+
|
|
146604
146633
|
// removed DelimGet columns are assigned a new ColumnBinding by Projection/Aggregation, keep track here
|
|
146605
146634
|
if (proj_or_agg.type == LogicalOperatorType::LOGICAL_PROJECTION) {
|
|
146606
146635
|
for (auto &cb : proj_or_agg.GetColumnBindings()) {
|
|
146607
146636
|
updater.projection_map[cb] = true;
|
|
146637
|
+
updater.reverse_proj_or_agg_map[cb] = proj_or_agg.expressions[cb.column_index].get();
|
|
146608
146638
|
for (auto &expr : nulls_are_not_equal_exprs) {
|
|
146609
146639
|
if (proj_or_agg.expressions[cb.column_index]->Equals(expr)) {
|
|
146610
146640
|
updater.projection_map[cb] = false;
|
|
@@ -146614,8 +146644,19 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146614
146644
|
}
|
|
146615
146645
|
} else {
|
|
146616
146646
|
auto &agg = (LogicalAggregate &)proj_or_agg;
|
|
146647
|
+
|
|
146648
|
+
// Create a vector of all exprs in the agg
|
|
146649
|
+
vector<Expression *> all_agg_exprs;
|
|
146650
|
+
for (auto &expr : agg.groups) {
|
|
146651
|
+
all_agg_exprs.push_back(expr.get());
|
|
146652
|
+
}
|
|
146653
|
+
for (auto &expr : agg.expressions) {
|
|
146654
|
+
all_agg_exprs.push_back(expr.get());
|
|
146655
|
+
}
|
|
146656
|
+
|
|
146617
146657
|
for (auto &cb : agg.GetColumnBindings()) {
|
|
146618
146658
|
updater.projection_map[cb] = true;
|
|
146659
|
+
updater.reverse_proj_or_agg_map[cb] = all_agg_exprs[cb.column_index];
|
|
146619
146660
|
for (auto &expr : nulls_are_not_equal_exprs) {
|
|
146620
146661
|
if ((cb.table_index == agg.group_index && agg.groups[cb.column_index]->Equals(expr)) ||
|
|
146621
146662
|
(cb.table_index == agg.aggregate_index && agg.expressions[cb.column_index]->Equals(expr))) {
|
|
@@ -146625,6 +146666,14 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146625
146666
|
}
|
|
146626
146667
|
}
|
|
146627
146668
|
}
|
|
146669
|
+
|
|
146670
|
+
if (!all_equality_conditions) {
|
|
146671
|
+
// we can get rid of an inequality join with a DelimGet, but only under specific circumstances
|
|
146672
|
+
if (!RemoveInequalityCandidate(plan, candidate, updater)) {
|
|
146673
|
+
return false;
|
|
146674
|
+
}
|
|
146675
|
+
}
|
|
146676
|
+
|
|
146628
146677
|
// make a filter if needed
|
|
146629
146678
|
if (!nulls_are_not_equal_exprs.empty() || filter != nullptr) {
|
|
146630
146679
|
auto filter_op = make_unique<LogicalFilter>();
|
|
@@ -146652,6 +146701,146 @@ bool Deliminator::RemoveCandidate(unique_ptr<LogicalOperator> *op_ptr, Deliminat
|
|
|
146652
146701
|
return true;
|
|
146653
146702
|
}
|
|
146654
146703
|
|
|
146704
|
+
static void GetDelimJoins(LogicalOperator &op, vector<LogicalOperator *> &delim_joins) {
|
|
146705
|
+
for (auto &child : op.children) {
|
|
146706
|
+
GetDelimJoins(*child, delim_joins);
|
|
146707
|
+
}
|
|
146708
|
+
if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
|
|
146709
|
+
delim_joins.push_back(&op);
|
|
146710
|
+
}
|
|
146711
|
+
}
|
|
146712
|
+
|
|
146713
|
+
static bool HasChild(LogicalOperator *haystack, LogicalOperator *needle, idx_t &side) {
|
|
146714
|
+
if (haystack == needle) {
|
|
146715
|
+
return true;
|
|
146716
|
+
}
|
|
146717
|
+
for (idx_t i = 0; i < haystack->children.size(); i++) {
|
|
146718
|
+
auto &child = haystack->children[i];
|
|
146719
|
+
idx_t dummy_side;
|
|
146720
|
+
if (HasChild(child.get(), needle, dummy_side)) {
|
|
146721
|
+
side = i;
|
|
146722
|
+
return true;
|
|
146723
|
+
}
|
|
146724
|
+
}
|
|
146725
|
+
return false;
|
|
146726
|
+
}
|
|
146727
|
+
|
|
146728
|
+
bool Deliminator::RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
146729
|
+
DeliminatorPlanUpdater &updater) {
|
|
146730
|
+
auto &proj_or_agg = **candidate;
|
|
146731
|
+
// first, we find a DelimJoin in "plan" that has only one DelimGet as a child, which is in "candidate"
|
|
146732
|
+
if (DelimGetCount(proj_or_agg) != 1) {
|
|
146733
|
+
// the candidate therefore must have only a single DelimGet in its children
|
|
146734
|
+
return false;
|
|
146735
|
+
}
|
|
146736
|
+
|
|
146737
|
+
vector<LogicalOperator *> delim_joins;
|
|
146738
|
+
GetDelimJoins(**plan, delim_joins);
|
|
146739
|
+
|
|
146740
|
+
LogicalOperator *parent = nullptr;
|
|
146741
|
+
idx_t parent_delim_get_side;
|
|
146742
|
+
for (auto dj : delim_joins) {
|
|
146743
|
+
D_ASSERT(dj->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
|
|
146744
|
+
if (!HasChild(dj, &proj_or_agg, parent_delim_get_side)) {
|
|
146745
|
+
continue;
|
|
146746
|
+
}
|
|
146747
|
+
// we found a parent DelimJoin
|
|
146748
|
+
if (DelimGetCount(*dj) != 1) {
|
|
146749
|
+
// it has more than one DelimGet children
|
|
146750
|
+
continue;
|
|
146751
|
+
}
|
|
146752
|
+
|
|
146753
|
+
// we can only remove inequality join with a DelimGet if the parent DelimJoin has one of these join types
|
|
146754
|
+
auto &delim_join = (LogicalDelimJoin &)*dj;
|
|
146755
|
+
if (!InequalityDelimJoinCanBeEliminated(delim_join.join_type)) {
|
|
146756
|
+
continue;
|
|
146757
|
+
}
|
|
146758
|
+
|
|
146759
|
+
parent = dj;
|
|
146760
|
+
break;
|
|
146761
|
+
}
|
|
146762
|
+
if (!parent) {
|
|
146763
|
+
return false;
|
|
146764
|
+
}
|
|
146765
|
+
|
|
146766
|
+
// we found the parent delim join, and we may be able to remove the child DelimGet join
|
|
146767
|
+
// but we need to make sure that their conditions refer to exactly the same columns
|
|
146768
|
+
auto &parent_delim_join = (LogicalDelimJoin &)*parent;
|
|
146769
|
+
auto &join = (LogicalComparisonJoin &)*proj_or_agg.children[0];
|
|
146770
|
+
if (parent_delim_join.conditions.size() != join.conditions.size()) {
|
|
146771
|
+
// different number of conditions, can't replace
|
|
146772
|
+
return false;
|
|
146773
|
+
}
|
|
146774
|
+
|
|
146775
|
+
// we can only do this optimization under the following conditions:
|
|
146776
|
+
// 1. all join expressions coming from the DelimGet side are colrefs
|
|
146777
|
+
// 2. these expressions refer to colrefs coming from the proj/agg on top of the child DelimGet join
|
|
146778
|
+
// 3. the expression (before it was proj/agg) can be found in the conditions of the child DelimGet join
|
|
146779
|
+
for (auto &parent_cond : parent_delim_join.conditions) {
|
|
146780
|
+
auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
|
|
146781
|
+
if (parent_expr->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
146782
|
+
// can only deal with colrefs
|
|
146783
|
+
return false;
|
|
146784
|
+
}
|
|
146785
|
+
auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
|
|
146786
|
+
auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
|
|
146787
|
+
if (it == updater.reverse_proj_or_agg_map.end()) {
|
|
146788
|
+
// refers to a column that was not in the child DelimGet join
|
|
146789
|
+
return false;
|
|
146790
|
+
}
|
|
146791
|
+
// try to find the corresponding child condition
|
|
146792
|
+
// TODO: can be more flexible - allow CAST
|
|
146793
|
+
auto child_expr = it->second;
|
|
146794
|
+
bool found = false;
|
|
146795
|
+
for (auto &child_cond : join.conditions) {
|
|
146796
|
+
if (child_cond.left->Equals(child_expr) || child_cond.right->Equals(child_expr)) {
|
|
146797
|
+
found = true;
|
|
146798
|
+
break;
|
|
146799
|
+
}
|
|
146800
|
+
}
|
|
146801
|
+
if (!found) {
|
|
146802
|
+
// could not find the mapped expression in the child condition expressions
|
|
146803
|
+
return false;
|
|
146804
|
+
}
|
|
146805
|
+
}
|
|
146806
|
+
|
|
146807
|
+
// TODO: we cannot perform the optimization here because our pure inequality joins don't implement
|
|
146808
|
+
// JoinType::SINGLE yet
|
|
146809
|
+
if (parent_delim_join.join_type == JoinType::SINGLE) {
|
|
146810
|
+
bool has_one_equality = false;
|
|
146811
|
+
for (auto &cond : join.conditions) {
|
|
146812
|
+
has_one_equality = has_one_equality || IsEqualityJoinCondition(cond);
|
|
146813
|
+
}
|
|
146814
|
+
if (!has_one_equality) {
|
|
146815
|
+
return false;
|
|
146816
|
+
}
|
|
146817
|
+
}
|
|
146818
|
+
|
|
146819
|
+
// we are now sure that we can remove the child DelimGet join, so we basically do the same loop as above
|
|
146820
|
+
// this time without checks because we already did them, and replace the expressions
|
|
146821
|
+
for (auto &parent_cond : parent_delim_join.conditions) {
|
|
146822
|
+
auto &parent_expr = parent_delim_get_side == 0 ? parent_cond.left : parent_cond.right;
|
|
146823
|
+
auto &parent_colref = (BoundColumnRefExpression &)*parent_expr;
|
|
146824
|
+
auto it = updater.reverse_proj_or_agg_map.find(parent_colref.binding);
|
|
146825
|
+
auto child_expr = it->second;
|
|
146826
|
+
for (auto &child_cond : join.conditions) {
|
|
146827
|
+
if (!child_cond.left->Equals(child_expr) && !child_cond.right->Equals(child_expr)) {
|
|
146828
|
+
continue;
|
|
146829
|
+
}
|
|
146830
|
+
parent_expr =
|
|
146831
|
+
make_unique<BoundColumnRefExpression>(parent_expr->alias, parent_expr->return_type, it->first);
|
|
146832
|
+
parent_cond.comparison = child_cond.comparison;
|
|
146833
|
+
break;
|
|
146834
|
+
}
|
|
146835
|
+
}
|
|
146836
|
+
|
|
146837
|
+
// no longer needs to be a delim join
|
|
146838
|
+
parent_delim_join.duplicate_eliminated_columns.clear();
|
|
146839
|
+
parent_delim_join.type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN;
|
|
146840
|
+
|
|
146841
|
+
return true;
|
|
146842
|
+
}
|
|
146843
|
+
|
|
146655
146844
|
} // namespace duckdb
|
|
146656
146845
|
|
|
146657
146846
|
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "82be969b1"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev843"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|