duckdb 0.4.1-dev308.0 → 0.4.1-dev318.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +247 -71
- package/src/duckdb.hpp +12 -24
- package/src/parquet-amalgamation.cpp +37003 -37003
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -39446,6 +39446,12 @@ bool ChunkCollection::Equals(ChunkCollection &other) {
|
|
|
39446
39446
|
if (compare_equals) {
|
|
39447
39447
|
return true;
|
|
39448
39448
|
}
|
|
39449
|
+
for (auto &type : types) {
|
|
39450
|
+
// sort not supported
|
|
39451
|
+
if (type.InternalType() == PhysicalType::LIST || type.InternalType() == PhysicalType::STRUCT) {
|
|
39452
|
+
return false;
|
|
39453
|
+
}
|
|
39454
|
+
}
|
|
39449
39455
|
// if the results are not equal,
|
|
39450
39456
|
// sort both chunk collections to ensure the comparison is not order insensitive
|
|
39451
39457
|
vector<OrderType> desc(ColumnCount(), OrderType::DESCENDING);
|
|
@@ -54373,6 +54379,7 @@ public:
|
|
|
54373
54379
|
//! Turns the JoinCondition into an expression; note that this destroys the JoinCondition as the expression inherits
|
|
54374
54380
|
//! the left/right expressions
|
|
54375
54381
|
static unique_ptr<Expression> CreateExpression(JoinCondition cond);
|
|
54382
|
+
static unique_ptr<Expression> CreateExpression(vector<JoinCondition> conditions);
|
|
54376
54383
|
|
|
54377
54384
|
public:
|
|
54378
54385
|
unique_ptr<Expression> left;
|
|
@@ -67309,6 +67316,8 @@ public:
|
|
|
67309
67316
|
return true;
|
|
67310
67317
|
}
|
|
67311
67318
|
|
|
67319
|
+
static bool IsSupported(const vector<JoinCondition> &conditions);
|
|
67320
|
+
|
|
67312
67321
|
private:
|
|
67313
67322
|
// resolve joins that output max N elements (SEMI, ANTI, MARK)
|
|
67314
67323
|
void ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk, OperatorState &state) const;
|
|
@@ -67425,6 +67434,16 @@ void PhysicalJoin::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left
|
|
|
67425
67434
|
}
|
|
67426
67435
|
}
|
|
67427
67436
|
|
|
67437
|
+
bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions) {
|
|
67438
|
+
for (auto &cond : conditions) {
|
|
67439
|
+
if (cond.left->return_type.InternalType() == PhysicalType::STRUCT ||
|
|
67440
|
+
cond.left->return_type.InternalType() == PhysicalType::LIST) {
|
|
67441
|
+
return false;
|
|
67442
|
+
}
|
|
67443
|
+
}
|
|
67444
|
+
return true;
|
|
67445
|
+
}
|
|
67446
|
+
|
|
67428
67447
|
//===--------------------------------------------------------------------===//
|
|
67429
67448
|
// Sink
|
|
67430
67449
|
//===--------------------------------------------------------------------===//
|
|
@@ -76036,6 +76055,44 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalChunkGet &
|
|
|
76036
76055
|
|
|
76037
76056
|
|
|
76038
76057
|
|
|
76058
|
+
//===----------------------------------------------------------------------===//
|
|
76059
|
+
// DuckDB
|
|
76060
|
+
//
|
|
76061
|
+
// duckdb/planner/expression_iterator.hpp
|
|
76062
|
+
//
|
|
76063
|
+
//
|
|
76064
|
+
//===----------------------------------------------------------------------===//
|
|
76065
|
+
|
|
76066
|
+
|
|
76067
|
+
|
|
76068
|
+
|
|
76069
|
+
|
|
76070
|
+
|
|
76071
|
+
#include <functional>
|
|
76072
|
+
|
|
76073
|
+
namespace duckdb {
|
|
76074
|
+
class BoundQueryNode;
|
|
76075
|
+
class BoundTableRef;
|
|
76076
|
+
|
|
76077
|
+
class ExpressionIterator {
|
|
76078
|
+
public:
|
|
76079
|
+
static void EnumerateChildren(const Expression &expression,
|
|
76080
|
+
const std::function<void(const Expression &child)> &callback);
|
|
76081
|
+
static void EnumerateChildren(Expression &expression, const std::function<void(Expression &child)> &callback);
|
|
76082
|
+
static void EnumerateChildren(Expression &expression,
|
|
76083
|
+
const std::function<void(unique_ptr<Expression> &child)> &callback);
|
|
76084
|
+
|
|
76085
|
+
static void EnumerateExpression(unique_ptr<Expression> &expr,
|
|
76086
|
+
const std::function<void(Expression &child)> &callback);
|
|
76087
|
+
|
|
76088
|
+
static void EnumerateTableRefChildren(BoundTableRef &ref, const std::function<void(Expression &child)> &callback);
|
|
76089
|
+
static void EnumerateQueryNodeChildren(BoundQueryNode &node,
|
|
76090
|
+
const std::function<void(Expression &child)> &callback);
|
|
76091
|
+
};
|
|
76092
|
+
|
|
76093
|
+
} // namespace duckdb
|
|
76094
|
+
|
|
76095
|
+
|
|
76039
76096
|
namespace duckdb {
|
|
76040
76097
|
|
|
76041
76098
|
static bool CanPlanIndexJoin(Transaction &transaction, TableScanBindData *bind_data, PhysicalTableScan &scan) {
|
|
@@ -76178,6 +76235,14 @@ void TransformIndexJoin(ClientContext &context, LogicalComparisonJoin &op, Index
|
|
|
76178
76235
|
}
|
|
76179
76236
|
}
|
|
76180
76237
|
|
|
76238
|
+
static void RewriteJoinCondition(Expression &expr, idx_t offset) {
|
|
76239
|
+
if (expr.type == ExpressionType::BOUND_REF) {
|
|
76240
|
+
auto &ref = (BoundReferenceExpression &)expr;
|
|
76241
|
+
ref.index += offset;
|
|
76242
|
+
}
|
|
76243
|
+
ExpressionIterator::EnumerateChildren(expr, [&](Expression &child) { RewriteJoinCondition(child, offset); });
|
|
76244
|
+
}
|
|
76245
|
+
|
|
76181
76246
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparisonJoin &op) {
|
|
76182
76247
|
// now visit the children
|
|
76183
76248
|
D_ASSERT(op.children.size() == 2);
|
|
@@ -76263,10 +76328,17 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
|
|
|
76263
76328
|
// range join: use piecewise merge join
|
|
76264
76329
|
plan = make_unique<PhysicalPiecewiseMergeJoin>(op, move(left), move(right), move(op.conditions),
|
|
76265
76330
|
op.join_type, op.estimated_cardinality);
|
|
76266
|
-
} else {
|
|
76331
|
+
} else if (PhysicalNestedLoopJoin::IsSupported(op.conditions)) {
|
|
76267
76332
|
// inequality join: use nested loop
|
|
76268
76333
|
plan = make_unique<PhysicalNestedLoopJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
|
|
76269
76334
|
op.estimated_cardinality);
|
|
76335
|
+
} else {
|
|
76336
|
+
for (auto &cond : op.conditions) {
|
|
76337
|
+
RewriteJoinCondition(*cond.right, left->types.size());
|
|
76338
|
+
}
|
|
76339
|
+
auto condition = JoinCondition::CreateExpression(move(op.conditions));
|
|
76340
|
+
plan = make_unique<PhysicalBlockwiseNLJoin>(op, move(left), move(right), move(condition), op.join_type,
|
|
76341
|
+
op.estimated_cardinality);
|
|
76270
76342
|
}
|
|
76271
76343
|
}
|
|
76272
76344
|
return plan;
|
|
@@ -76415,42 +76487,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
|
|
|
76415
76487
|
|
|
76416
76488
|
|
|
76417
76489
|
|
|
76418
|
-
//===----------------------------------------------------------------------===//
|
|
76419
|
-
// DuckDB
|
|
76420
|
-
//
|
|
76421
|
-
// duckdb/planner/expression_iterator.hpp
|
|
76422
|
-
//
|
|
76423
|
-
//
|
|
76424
|
-
//===----------------------------------------------------------------------===//
|
|
76425
|
-
|
|
76426
|
-
|
|
76427
|
-
|
|
76428
|
-
|
|
76429
|
-
|
|
76430
|
-
|
|
76431
|
-
#include <functional>
|
|
76432
|
-
|
|
76433
|
-
namespace duckdb {
|
|
76434
|
-
class BoundQueryNode;
|
|
76435
|
-
class BoundTableRef;
|
|
76436
|
-
|
|
76437
|
-
class ExpressionIterator {
|
|
76438
|
-
public:
|
|
76439
|
-
static void EnumerateChildren(const Expression &expression,
|
|
76440
|
-
const std::function<void(const Expression &child)> &callback);
|
|
76441
|
-
static void EnumerateChildren(Expression &expression, const std::function<void(Expression &child)> &callback);
|
|
76442
|
-
static void EnumerateChildren(Expression &expression,
|
|
76443
|
-
const std::function<void(unique_ptr<Expression> &child)> &callback);
|
|
76444
|
-
|
|
76445
|
-
static void EnumerateExpression(unique_ptr<Expression> &expr,
|
|
76446
|
-
const std::function<void(Expression &child)> &callback);
|
|
76447
|
-
|
|
76448
|
-
static void EnumerateTableRefChildren(BoundTableRef &ref, const std::function<void(Expression &child)> &callback);
|
|
76449
|
-
static void EnumerateQueryNodeChildren(BoundQueryNode &node,
|
|
76450
|
-
const std::function<void(Expression &child)> &callback);
|
|
76451
|
-
};
|
|
76452
|
-
|
|
76453
|
-
} // namespace duckdb
|
|
76454
76490
|
|
|
76455
76491
|
//===----------------------------------------------------------------------===//
|
|
76456
76492
|
// DuckDB
|
|
@@ -155632,6 +155668,7 @@ private:
|
|
|
155632
155668
|
unique_ptr<ConstantExpression> TransformValue(duckdb_libpgquery::PGValue val);
|
|
155633
155669
|
//! Transform a Postgres operator into an Expression
|
|
155634
155670
|
unique_ptr<ParsedExpression> TransformAExpr(duckdb_libpgquery::PGAExpr *root);
|
|
155671
|
+
unique_ptr<ParsedExpression> TransformAExprInternal(duckdb_libpgquery::PGAExpr *root);
|
|
155635
155672
|
//! Transform a Postgres abstract expression into an Expression
|
|
155636
155673
|
unique_ptr<ParsedExpression> TransformExpression(duckdb_libpgquery::PGNode *node);
|
|
155637
155674
|
//! Transform a Postgres function call into an Expression
|
|
@@ -159478,7 +159515,7 @@ unique_ptr<ParsedExpression> Transformer::TransformBinaryOperator(const string &
|
|
|
159478
159515
|
}
|
|
159479
159516
|
}
|
|
159480
159517
|
|
|
159481
|
-
unique_ptr<ParsedExpression> Transformer::
|
|
159518
|
+
unique_ptr<ParsedExpression> Transformer::TransformAExprInternal(duckdb_libpgquery::PGAExpr *root) {
|
|
159482
159519
|
D_ASSERT(root);
|
|
159483
159520
|
auto name = string((reinterpret_cast<duckdb_libpgquery::PGValue *>(root->name->head->data.ptr_value))->val.str);
|
|
159484
159521
|
|
|
@@ -159626,6 +159663,14 @@ unique_ptr<ParsedExpression> Transformer::TransformAExpr(duckdb_libpgquery::PGAE
|
|
|
159626
159663
|
}
|
|
159627
159664
|
}
|
|
159628
159665
|
|
|
159666
|
+
unique_ptr<ParsedExpression> Transformer::TransformAExpr(duckdb_libpgquery::PGAExpr *root) {
|
|
159667
|
+
auto result = TransformAExprInternal(root);
|
|
159668
|
+
if (result) {
|
|
159669
|
+
result->query_location = root->location;
|
|
159670
|
+
}
|
|
159671
|
+
return result;
|
|
159672
|
+
}
|
|
159673
|
+
|
|
159629
159674
|
} // namespace duckdb
|
|
159630
159675
|
|
|
159631
159676
|
|
|
@@ -166900,7 +166945,8 @@ namespace duckdb {
|
|
|
166900
166945
|
//! The FlattenDependentJoins class is responsible for pushing the dependent join down into the plan to create a
|
|
166901
166946
|
//! flattened subquery
|
|
166902
166947
|
struct FlattenDependentJoins {
|
|
166903
|
-
FlattenDependentJoins(Binder &binder, const vector<CorrelatedColumnInfo> &correlated, bool
|
|
166948
|
+
FlattenDependentJoins(Binder &binder, const vector<CorrelatedColumnInfo> &correlated, bool perform_delim = true,
|
|
166949
|
+
bool any_join = false);
|
|
166904
166950
|
|
|
166905
166951
|
//! Detects which Logical Operators have correlated expressions that they are dependent upon, filling the
|
|
166906
166952
|
//! has_correlated_expressions map.
|
|
@@ -166919,6 +166965,7 @@ struct FlattenDependentJoins {
|
|
|
166919
166965
|
const vector<CorrelatedColumnInfo> &correlated_columns;
|
|
166920
166966
|
vector<LogicalType> delim_types;
|
|
166921
166967
|
|
|
166968
|
+
bool perform_delim;
|
|
166922
166969
|
bool any_join;
|
|
166923
166970
|
|
|
166924
166971
|
private:
|
|
@@ -166930,6 +166977,9 @@ private:
|
|
|
166930
166977
|
|
|
166931
166978
|
|
|
166932
166979
|
|
|
166980
|
+
|
|
166981
|
+
|
|
166982
|
+
|
|
166933
166983
|
namespace duckdb {
|
|
166934
166984
|
|
|
166935
166985
|
static unique_ptr<Expression> PlanUncorrelatedSubquery(Binder &binder, BoundSubqueryExpression &expr,
|
|
@@ -167051,8 +167101,25 @@ static unique_ptr<Expression> PlanUncorrelatedSubquery(Binder &binder, BoundSubq
|
|
|
167051
167101
|
}
|
|
167052
167102
|
|
|
167053
167103
|
static unique_ptr<LogicalDelimJoin> CreateDuplicateEliminatedJoin(vector<CorrelatedColumnInfo> &correlated_columns,
|
|
167054
|
-
JoinType join_type
|
|
167104
|
+
JoinType join_type,
|
|
167105
|
+
unique_ptr<LogicalOperator> original_plan,
|
|
167106
|
+
bool perform_delim) {
|
|
167055
167107
|
auto delim_join = make_unique<LogicalDelimJoin>(join_type);
|
|
167108
|
+
if (!perform_delim) {
|
|
167109
|
+
// if we are not performing a delim join, we push a row_number() OVER() window operator on the LHS
|
|
167110
|
+
// and perform all duplicate elimination on that row number instead
|
|
167111
|
+
D_ASSERT(correlated_columns[0].type.id() == LogicalTypeId::BIGINT);
|
|
167112
|
+
auto window = make_unique<LogicalWindow>(correlated_columns[0].binding.table_index);
|
|
167113
|
+
auto row_number = make_unique<BoundWindowExpression>(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT,
|
|
167114
|
+
nullptr, nullptr);
|
|
167115
|
+
row_number->start = WindowBoundary::UNBOUNDED_PRECEDING;
|
|
167116
|
+
row_number->end = WindowBoundary::CURRENT_ROW_ROWS;
|
|
167117
|
+
row_number->alias = "delim_index";
|
|
167118
|
+
window->expressions.push_back(move(row_number));
|
|
167119
|
+
window->AddChild(move(original_plan));
|
|
167120
|
+
original_plan = move(window);
|
|
167121
|
+
}
|
|
167122
|
+
delim_join->AddChild(move(original_plan));
|
|
167056
167123
|
for (idx_t i = 0; i < correlated_columns.size(); i++) {
|
|
167057
167124
|
auto &col = correlated_columns[i];
|
|
167058
167125
|
delim_join->duplicate_eliminated_columns.push_back(
|
|
@@ -167063,8 +167130,9 @@ static unique_ptr<LogicalDelimJoin> CreateDuplicateEliminatedJoin(vector<Correla
|
|
|
167063
167130
|
}
|
|
167064
167131
|
|
|
167065
167132
|
static void CreateDelimJoinConditions(LogicalDelimJoin &delim_join, vector<CorrelatedColumnInfo> &correlated_columns,
|
|
167066
|
-
vector<ColumnBinding> bindings, idx_t base_offset) {
|
|
167067
|
-
|
|
167133
|
+
vector<ColumnBinding> bindings, idx_t base_offset, bool perform_delim) {
|
|
167134
|
+
auto col_count = perform_delim ? correlated_columns.size() : 1;
|
|
167135
|
+
for (idx_t i = 0; i < col_count; i++) {
|
|
167068
167136
|
auto &col = correlated_columns[i];
|
|
167069
167137
|
JoinCondition cond;
|
|
167070
167138
|
cond.left = make_unique<BoundColumnRefExpression>(col.name, col.type, col.binding);
|
|
@@ -167074,10 +167142,50 @@ static void CreateDelimJoinConditions(LogicalDelimJoin &delim_join, vector<Corre
|
|
|
167074
167142
|
}
|
|
167075
167143
|
}
|
|
167076
167144
|
|
|
167145
|
+
static bool PerformDelimOnType(const LogicalType &type) {
|
|
167146
|
+
if (type.InternalType() == PhysicalType::LIST) {
|
|
167147
|
+
return false;
|
|
167148
|
+
}
|
|
167149
|
+
if (type.InternalType() == PhysicalType::STRUCT) {
|
|
167150
|
+
for (auto &entry : StructType::GetChildTypes(type)) {
|
|
167151
|
+
if (!PerformDelimOnType(entry.second)) {
|
|
167152
|
+
return false;
|
|
167153
|
+
}
|
|
167154
|
+
}
|
|
167155
|
+
}
|
|
167156
|
+
return true;
|
|
167157
|
+
}
|
|
167158
|
+
|
|
167159
|
+
static bool PerformDuplicateElimination(Binder &binder, vector<CorrelatedColumnInfo> &correlated_columns) {
|
|
167160
|
+
if (!ClientConfig::GetConfig(binder.context).enable_optimizer) {
|
|
167161
|
+
// if optimizations are disabled we always do a delim join
|
|
167162
|
+
return true;
|
|
167163
|
+
}
|
|
167164
|
+
bool perform_delim = true;
|
|
167165
|
+
for (auto &col : correlated_columns) {
|
|
167166
|
+
if (!PerformDelimOnType(col.type)) {
|
|
167167
|
+
perform_delim = false;
|
|
167168
|
+
break;
|
|
167169
|
+
}
|
|
167170
|
+
}
|
|
167171
|
+
if (perform_delim) {
|
|
167172
|
+
return true;
|
|
167173
|
+
}
|
|
167174
|
+
auto binding = ColumnBinding(binder.GenerateTableIndex(), 0);
|
|
167175
|
+
auto type = LogicalType::BIGINT;
|
|
167176
|
+
auto name = "delim_index";
|
|
167177
|
+
CorrelatedColumnInfo info(binding, type, name, 0);
|
|
167178
|
+
correlated_columns.insert(correlated_columns.begin(), move(info));
|
|
167179
|
+
return false;
|
|
167180
|
+
}
|
|
167181
|
+
|
|
167077
167182
|
static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubqueryExpression &expr,
|
|
167078
167183
|
unique_ptr<LogicalOperator> &root,
|
|
167079
167184
|
unique_ptr<LogicalOperator> plan) {
|
|
167080
167185
|
auto &correlated_columns = expr.binder->correlated_columns;
|
|
167186
|
+
// FIXME: there should be a way of disabling decorrelation for ANY queries as well, but not for now...
|
|
167187
|
+
bool perform_delim =
|
|
167188
|
+
expr.subquery_type == SubqueryType::ANY ? true : PerformDuplicateElimination(binder, correlated_columns);
|
|
167081
167189
|
D_ASSERT(expr.IsCorrelated());
|
|
167082
167190
|
// correlated subquery
|
|
167083
167191
|
// for a more in-depth explanation of this code, read the paper "Unnesting Arbitrary Subqueries"
|
|
@@ -167094,15 +167202,15 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
|
|
|
167094
167202
|
// NULL values are equal in this join because we join on the correlated columns ONLY
|
|
167095
167203
|
// and e.g. in the query: SELECT (SELECT 42 FROM integers WHERE i1.i IS NULL LIMIT 1) FROM integers i1;
|
|
167096
167204
|
// the input value NULL will generate the value 42, and we need to join NULL on the LHS with NULL on the RHS
|
|
167097
|
-
auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::SINGLE);
|
|
167098
|
-
|
|
167099
167205
|
// the left side is the original plan
|
|
167100
167206
|
// this is the side that will be duplicate eliminated and pushed into the RHS
|
|
167101
|
-
delim_join
|
|
167207
|
+
auto delim_join =
|
|
167208
|
+
CreateDuplicateEliminatedJoin(correlated_columns, JoinType::SINGLE, move(root), perform_delim);
|
|
167209
|
+
|
|
167102
167210
|
// the right side initially is a DEPENDENT join between the duplicate eliminated scan and the subquery
|
|
167103
167211
|
// HOWEVER: we do not explicitly create the dependent join
|
|
167104
167212
|
// instead, we eliminate the dependent join by pushing it down into the right side of the plan
|
|
167105
|
-
FlattenDependentJoins flatten(binder, correlated_columns);
|
|
167213
|
+
FlattenDependentJoins flatten(binder, correlated_columns, perform_delim);
|
|
167106
167214
|
|
|
167107
167215
|
// first we check which logical operators have correlated expressions in the first place
|
|
167108
167216
|
flatten.DetectCorrelatedExpressions(plan.get());
|
|
@@ -167115,7 +167223,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
|
|
|
167115
167223
|
auto plan_columns = dependent_join->GetColumnBindings();
|
|
167116
167224
|
|
|
167117
167225
|
// now create the join conditions
|
|
167118
|
-
CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset);
|
|
167226
|
+
CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset, perform_delim);
|
|
167119
167227
|
delim_join->AddChild(move(dependent_join));
|
|
167120
167228
|
root = move(delim_join);
|
|
167121
167229
|
// finally push the BoundColumnRefExpression referring to the data element returned by the join
|
|
@@ -167126,12 +167234,10 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
|
|
|
167126
167234
|
// correlated EXISTS query
|
|
167127
167235
|
// this query is similar to the correlated SCALAR query, except we use a MARK join here
|
|
167128
167236
|
idx_t mark_index = binder.GenerateTableIndex();
|
|
167129
|
-
auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK);
|
|
167237
|
+
auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK, move(root), perform_delim);
|
|
167130
167238
|
delim_join->mark_index = mark_index;
|
|
167131
|
-
// LHS
|
|
167132
|
-
delim_join->AddChild(move(root));
|
|
167133
167239
|
// RHS
|
|
167134
|
-
FlattenDependentJoins flatten(binder, correlated_columns);
|
|
167240
|
+
FlattenDependentJoins flatten(binder, correlated_columns, perform_delim);
|
|
167135
167241
|
flatten.DetectCorrelatedExpressions(plan.get());
|
|
167136
167242
|
auto dependent_join = flatten.PushDownDependentJoin(move(plan));
|
|
167137
167243
|
|
|
@@ -167139,7 +167245,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
|
|
|
167139
167245
|
auto plan_columns = dependent_join->GetColumnBindings();
|
|
167140
167246
|
|
|
167141
167247
|
// now we create the join conditions between the dependent join and the original table
|
|
167142
|
-
CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset);
|
|
167248
|
+
CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset, perform_delim);
|
|
167143
167249
|
delim_join->AddChild(move(dependent_join));
|
|
167144
167250
|
root = move(delim_join);
|
|
167145
167251
|
// finally push the BoundColumnRefExpression referring to the marker
|
|
@@ -167155,10 +167261,8 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
|
|
|
167155
167261
|
// as the MARK join has one extra join condition (the original condition, of the ANY expression, e.g.
|
|
167156
167262
|
// [i=ANY(...)])
|
|
167157
167263
|
idx_t mark_index = binder.GenerateTableIndex();
|
|
167158
|
-
auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK);
|
|
167264
|
+
auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK, move(root), perform_delim);
|
|
167159
167265
|
delim_join->mark_index = mark_index;
|
|
167160
|
-
// LHS
|
|
167161
|
-
delim_join->AddChild(move(root));
|
|
167162
167266
|
// RHS
|
|
167163
167267
|
FlattenDependentJoins flatten(binder, correlated_columns, true);
|
|
167164
167268
|
flatten.DetectCorrelatedExpressions(plan.get());
|
|
@@ -167168,7 +167272,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
|
|
|
167168
167272
|
auto plan_columns = dependent_join->GetColumnBindings();
|
|
167169
167273
|
|
|
167170
167274
|
// now we create the join conditions between the dependent join and the original table
|
|
167171
|
-
CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset);
|
|
167275
|
+
CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset, perform_delim);
|
|
167172
167276
|
// add the actual condition based on the ANY/ALL predicate
|
|
167173
167277
|
JoinCondition compare_cond;
|
|
167174
167278
|
compare_cond.left = move(expr.child);
|
|
@@ -167192,9 +167296,11 @@ public:
|
|
|
167192
167296
|
void VisitOperator(LogicalOperator &op) override {
|
|
167193
167297
|
if (!op.children.empty()) {
|
|
167194
167298
|
root = move(op.children[0]);
|
|
167299
|
+
D_ASSERT(root);
|
|
167195
167300
|
VisitOperatorExpressions(op);
|
|
167196
167301
|
op.children[0] = move(root);
|
|
167197
167302
|
for (idx_t i = 0; i < op.children.size(); i++) {
|
|
167303
|
+
D_ASSERT(op.children[i]);
|
|
167198
167304
|
VisitOperator(*op.children[i]);
|
|
167199
167305
|
}
|
|
167200
167306
|
}
|
|
@@ -173807,6 +173913,7 @@ string IsNotNullFilter::ToString(const string &column_name) {
|
|
|
173807
173913
|
|
|
173808
173914
|
|
|
173809
173915
|
|
|
173916
|
+
|
|
173810
173917
|
namespace duckdb {
|
|
173811
173918
|
|
|
173812
173919
|
unique_ptr<Expression> JoinCondition::CreateExpression(JoinCondition cond) {
|
|
@@ -173814,6 +173921,21 @@ unique_ptr<Expression> JoinCondition::CreateExpression(JoinCondition cond) {
|
|
|
173814
173921
|
return move(bound_comparison);
|
|
173815
173922
|
}
|
|
173816
173923
|
|
|
173924
|
+
unique_ptr<Expression> JoinCondition::CreateExpression(vector<JoinCondition> conditions) {
|
|
173925
|
+
unique_ptr<Expression> result;
|
|
173926
|
+
for (auto &cond : conditions) {
|
|
173927
|
+
auto expr = CreateExpression(move(cond));
|
|
173928
|
+
if (!result) {
|
|
173929
|
+
result = move(expr);
|
|
173930
|
+
} else {
|
|
173931
|
+
auto conj =
|
|
173932
|
+
make_unique<BoundConjunctionExpression>(ExpressionType::CONJUNCTION_AND, move(expr), move(result));
|
|
173933
|
+
result = move(conj);
|
|
173934
|
+
}
|
|
173935
|
+
}
|
|
173936
|
+
return result;
|
|
173937
|
+
}
|
|
173938
|
+
|
|
173817
173939
|
JoinSide JoinSide::CombineJoinSide(JoinSide left, JoinSide right) {
|
|
173818
173940
|
if (left == JoinSide::NONE) {
|
|
173819
173941
|
return right;
|
|
@@ -173896,6 +174018,20 @@ JoinSide JoinSide::GetJoinSide(const unordered_set<idx_t> &bindings, unordered_s
|
|
|
173896
174018
|
|
|
173897
174019
|
namespace duckdb {
|
|
173898
174020
|
|
|
174021
|
+
LogicalOperator::LogicalOperator(LogicalOperatorType type) : type(type) {
|
|
174022
|
+
}
|
|
174023
|
+
|
|
174024
|
+
LogicalOperator::LogicalOperator(LogicalOperatorType type, vector<unique_ptr<Expression>> expressions)
|
|
174025
|
+
: type(type), expressions(move(expressions)) {
|
|
174026
|
+
}
|
|
174027
|
+
|
|
174028
|
+
LogicalOperator::~LogicalOperator() {
|
|
174029
|
+
}
|
|
174030
|
+
|
|
174031
|
+
vector<ColumnBinding> LogicalOperator::GetColumnBindings() {
|
|
174032
|
+
return {ColumnBinding(0, 0)};
|
|
174033
|
+
}
|
|
174034
|
+
|
|
173899
174035
|
string LogicalOperator::GetName() const {
|
|
173900
174036
|
return LogicalOperatorToString(type);
|
|
173901
174037
|
}
|
|
@@ -173999,6 +174135,20 @@ void LogicalOperator::Verify() {
|
|
|
173999
174135
|
#endif
|
|
174000
174136
|
}
|
|
174001
174137
|
|
|
174138
|
+
void LogicalOperator::AddChild(unique_ptr<LogicalOperator> child) {
|
|
174139
|
+
D_ASSERT(child);
|
|
174140
|
+
children.push_back(move(child));
|
|
174141
|
+
}
|
|
174142
|
+
|
|
174143
|
+
idx_t LogicalOperator::EstimateCardinality(ClientContext &context) {
|
|
174144
|
+
// simple estimator, just take the max of the children
|
|
174145
|
+
idx_t max_cardinality = 0;
|
|
174146
|
+
for (auto &child : children) {
|
|
174147
|
+
max_cardinality = MaxValue(child->EstimateCardinality(context), max_cardinality);
|
|
174148
|
+
}
|
|
174149
|
+
return max_cardinality;
|
|
174150
|
+
}
|
|
174151
|
+
|
|
174002
174152
|
void LogicalOperator::Print() {
|
|
174003
174153
|
Printer::Print(ToString());
|
|
174004
174154
|
}
|
|
@@ -175100,8 +175250,8 @@ public:
|
|
|
175100
175250
|
namespace duckdb {
|
|
175101
175251
|
|
|
175102
175252
|
FlattenDependentJoins::FlattenDependentJoins(Binder &binder, const vector<CorrelatedColumnInfo> &correlated,
|
|
175103
|
-
bool any_join)
|
|
175104
|
-
: binder(binder), correlated_columns(correlated), any_join(any_join) {
|
|
175253
|
+
bool perform_delim, bool any_join)
|
|
175254
|
+
: binder(binder), correlated_columns(correlated), perform_delim(perform_delim), any_join(any_join) {
|
|
175105
175255
|
for (idx_t i = 0; i < correlated_columns.size(); i++) {
|
|
175106
175256
|
auto &col = correlated_columns[i];
|
|
175107
175257
|
correlated_map[col.binding] = i;
|
|
@@ -175201,8 +175351,9 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
175201
175351
|
// now we add all the columns of the delim_scan to the projection list
|
|
175202
175352
|
auto proj = (LogicalProjection *)plan.get();
|
|
175203
175353
|
for (idx_t i = 0; i < correlated_columns.size(); i++) {
|
|
175354
|
+
auto &col = correlated_columns[i];
|
|
175204
175355
|
auto colref = make_unique<BoundColumnRefExpression>(
|
|
175205
|
-
|
|
175356
|
+
col.name, col.type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
|
|
175206
175357
|
plan->expressions.push_back(move(colref));
|
|
175207
175358
|
}
|
|
175208
175359
|
|
|
@@ -175223,15 +175374,42 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
175223
175374
|
RewriteCorrelatedExpressions rewriter(base_binding, correlated_map);
|
|
175224
175375
|
rewriter.VisitOperator(*plan);
|
|
175225
175376
|
// now we add all the columns of the delim_scan to the grouping operators AND the projection list
|
|
175226
|
-
|
|
175377
|
+
idx_t delim_table_index;
|
|
175378
|
+
idx_t delim_column_offset;
|
|
175379
|
+
idx_t delim_data_offset;
|
|
175380
|
+
auto new_group_count = perform_delim ? correlated_columns.size() : 1;
|
|
175381
|
+
for (idx_t i = 0; i < new_group_count; i++) {
|
|
175382
|
+
auto &col = correlated_columns[i];
|
|
175227
175383
|
auto colref = make_unique<BoundColumnRefExpression>(
|
|
175228
|
-
|
|
175384
|
+
col.name, col.type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
|
|
175229
175385
|
for (auto &set : aggr.grouping_sets) {
|
|
175230
175386
|
set.insert(aggr.groups.size());
|
|
175231
175387
|
}
|
|
175232
175388
|
aggr.groups.push_back(move(colref));
|
|
175233
175389
|
}
|
|
175234
|
-
if (
|
|
175390
|
+
if (!perform_delim) {
|
|
175391
|
+
// if we are not performing the duplicate elimination, we have only added the row_id column to the grouping
|
|
175392
|
+
// operators in this case, we push a FIRST aggregate for each of the remaining expressions
|
|
175393
|
+
delim_table_index = aggr.aggregate_index;
|
|
175394
|
+
delim_column_offset = aggr.expressions.size();
|
|
175395
|
+
delim_data_offset = aggr.groups.size();
|
|
175396
|
+
for (idx_t i = 0; i < correlated_columns.size(); i++) {
|
|
175397
|
+
auto &col = correlated_columns[i];
|
|
175398
|
+
auto first_aggregate = FirstFun::GetFunction(col.type);
|
|
175399
|
+
auto colref = make_unique<BoundColumnRefExpression>(
|
|
175400
|
+
col.name, col.type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
|
|
175401
|
+
vector<unique_ptr<Expression>> aggr_children;
|
|
175402
|
+
aggr_children.push_back(move(colref));
|
|
175403
|
+
auto first_fun = make_unique<BoundAggregateExpression>(move(first_aggregate), move(aggr_children),
|
|
175404
|
+
nullptr, nullptr, false);
|
|
175405
|
+
aggr.expressions.push_back(move(first_fun));
|
|
175406
|
+
}
|
|
175407
|
+
} else {
|
|
175408
|
+
delim_table_index = aggr.group_index;
|
|
175409
|
+
delim_column_offset = aggr.groups.size() - correlated_columns.size();
|
|
175410
|
+
delim_data_offset = aggr.groups.size();
|
|
175411
|
+
}
|
|
175412
|
+
if (aggr.groups.size() == new_group_count) {
|
|
175235
175413
|
// we have to perform a LEFT OUTER JOIN between the result of this aggregate and the delim scan
|
|
175236
175414
|
// FIXME: this does not always have to be a LEFT OUTER JOIN, depending on whether aggr.expressions return
|
|
175237
175415
|
// NULL or a value
|
|
@@ -175247,13 +175425,12 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
175247
175425
|
auto delim_scan = make_unique<LogicalDelimGet>(left_index, delim_types);
|
|
175248
175426
|
join->children.push_back(move(delim_scan));
|
|
175249
175427
|
join->children.push_back(move(plan));
|
|
175250
|
-
for (idx_t i = 0; i <
|
|
175428
|
+
for (idx_t i = 0; i < new_group_count; i++) {
|
|
175429
|
+
auto &col = correlated_columns[i];
|
|
175251
175430
|
JoinCondition cond;
|
|
175252
|
-
cond.left =
|
|
175253
|
-
make_unique<BoundColumnRefExpression>(correlated_columns[i].type, ColumnBinding(left_index, i));
|
|
175431
|
+
cond.left = make_unique<BoundColumnRefExpression>(col.name, col.type, ColumnBinding(left_index, i));
|
|
175254
175432
|
cond.right = make_unique<BoundColumnRefExpression>(
|
|
175255
|
-
correlated_columns[i].type,
|
|
175256
|
-
ColumnBinding(aggr.group_index, (aggr.groups.size() - correlated_columns.size()) + i));
|
|
175433
|
+
correlated_columns[i].type, ColumnBinding(delim_table_index, delim_column_offset + i));
|
|
175257
175434
|
cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
|
|
175258
175435
|
join->conditions.push_back(move(cond));
|
|
175259
175436
|
}
|
|
@@ -175269,16 +175446,15 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
175269
175446
|
}
|
|
175270
175447
|
}
|
|
175271
175448
|
// now we update the delim_index
|
|
175272
|
-
|
|
175273
175449
|
base_binding.table_index = left_index;
|
|
175274
175450
|
this->delim_offset = base_binding.column_index = 0;
|
|
175275
175451
|
this->data_offset = 0;
|
|
175276
175452
|
return move(join);
|
|
175277
175453
|
} else {
|
|
175278
175454
|
// update the delim_index
|
|
175279
|
-
base_binding.table_index =
|
|
175280
|
-
this->delim_offset = base_binding.column_index =
|
|
175281
|
-
this->data_offset =
|
|
175455
|
+
base_binding.table_index = delim_table_index;
|
|
175456
|
+
this->delim_offset = base_binding.column_index = delim_column_offset;
|
|
175457
|
+
this->data_offset = delim_data_offset;
|
|
175282
175458
|
return plan;
|
|
175283
175459
|
}
|
|
175284
175460
|
}
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "f5d15448b"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev318"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -8879,13 +8879,9 @@ namespace duckdb {
|
|
|
8879
8879
|
//! logical query tree
|
|
8880
8880
|
class LogicalOperator {
|
|
8881
8881
|
public:
|
|
8882
|
-
explicit LogicalOperator(LogicalOperatorType type)
|
|
8883
|
-
|
|
8884
|
-
LogicalOperator(
|
|
8885
|
-
: type(type), expressions(move(expressions)) {
|
|
8886
|
-
}
|
|
8887
|
-
virtual ~LogicalOperator() {
|
|
8888
|
-
}
|
|
8882
|
+
explicit LogicalOperator(LogicalOperatorType type);
|
|
8883
|
+
LogicalOperator(LogicalOperatorType type, vector<unique_ptr<Expression>> expressions);
|
|
8884
|
+
virtual ~LogicalOperator();
|
|
8889
8885
|
|
|
8890
8886
|
//! The type of the logical operator
|
|
8891
8887
|
LogicalOperatorType type;
|
|
@@ -8899,9 +8895,7 @@ public:
|
|
|
8899
8895
|
idx_t estimated_cardinality = 0;
|
|
8900
8896
|
|
|
8901
8897
|
public:
|
|
8902
|
-
virtual vector<ColumnBinding> GetColumnBindings()
|
|
8903
|
-
return {ColumnBinding(0, 0)};
|
|
8904
|
-
}
|
|
8898
|
+
virtual vector<ColumnBinding> GetColumnBindings();
|
|
8905
8899
|
static vector<ColumnBinding> GenerateColumnBindings(idx_t table_idx, idx_t column_count);
|
|
8906
8900
|
static vector<LogicalType> MapTypes(const vector<LogicalType> &types, const vector<idx_t> &projection_map);
|
|
8907
8901
|
static vector<ColumnBinding> MapBindings(const vector<ColumnBinding> &types, const vector<idx_t> &projection_map);
|
|
@@ -8916,18 +8910,9 @@ public:
|
|
|
8916
8910
|
//! Debug method: verify that the integrity of expressions & child nodes are maintained
|
|
8917
8911
|
virtual void Verify();
|
|
8918
8912
|
|
|
8919
|
-
void AddChild(unique_ptr<LogicalOperator> child)
|
|
8920
|
-
children.push_back(move(child));
|
|
8921
|
-
}
|
|
8913
|
+
void AddChild(unique_ptr<LogicalOperator> child);
|
|
8922
8914
|
|
|
8923
|
-
virtual idx_t EstimateCardinality(ClientContext &context)
|
|
8924
|
-
// simple estimator, just take the max of the children
|
|
8925
|
-
idx_t max_cardinality = 0;
|
|
8926
|
-
for (auto &child : children) {
|
|
8927
|
-
max_cardinality = MaxValue(child->EstimateCardinality(context), max_cardinality);
|
|
8928
|
-
}
|
|
8929
|
-
return max_cardinality;
|
|
8930
|
-
}
|
|
8915
|
+
virtual idx_t EstimateCardinality(ClientContext &context);
|
|
8931
8916
|
|
|
8932
8917
|
protected:
|
|
8933
8918
|
//! Resolve types for this specific operator
|
|
@@ -14373,8 +14358,11 @@ struct CorrelatedColumnInfo {
|
|
|
14373
14358
|
string name;
|
|
14374
14359
|
idx_t depth;
|
|
14375
14360
|
|
|
14361
|
+
CorrelatedColumnInfo(ColumnBinding binding, LogicalType type_p, string name_p, idx_t depth)
|
|
14362
|
+
: binding(binding), type(move(type_p)), name(move(name_p)), depth(depth) {
|
|
14363
|
+
}
|
|
14376
14364
|
explicit CorrelatedColumnInfo(BoundColumnRefExpression &expr)
|
|
14377
|
-
:
|
|
14365
|
+
: CorrelatedColumnInfo(expr.binding, expr.return_type, expr.GetName(), expr.depth) {
|
|
14378
14366
|
}
|
|
14379
14367
|
|
|
14380
14368
|
bool operator==(const CorrelatedColumnInfo &rhs) const {
|