duckdb 0.4.1-dev314.0 → 0.4.1-dev318.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.4.1-dev314.0",
4
+ "version": "0.4.1-dev318.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -39446,6 +39446,12 @@ bool ChunkCollection::Equals(ChunkCollection &other) {
39446
39446
  if (compare_equals) {
39447
39447
  return true;
39448
39448
  }
39449
+ for (auto &type : types) {
39450
+ // sort not supported
39451
+ if (type.InternalType() == PhysicalType::LIST || type.InternalType() == PhysicalType::STRUCT) {
39452
+ return false;
39453
+ }
39454
+ }
39449
39455
  // if the results are not equal,
39450
39456
  // sort both chunk collections to ensure the comparison is not order insensitive
39451
39457
  vector<OrderType> desc(ColumnCount(), OrderType::DESCENDING);
@@ -54373,6 +54379,7 @@ public:
54373
54379
  //! Turns the JoinCondition into an expression; note that this destroys the JoinCondition as the expression inherits
54374
54380
  //! the left/right expressions
54375
54381
  static unique_ptr<Expression> CreateExpression(JoinCondition cond);
54382
+ static unique_ptr<Expression> CreateExpression(vector<JoinCondition> conditions);
54376
54383
 
54377
54384
  public:
54378
54385
  unique_ptr<Expression> left;
@@ -67309,6 +67316,8 @@ public:
67309
67316
  return true;
67310
67317
  }
67311
67318
 
67319
+ static bool IsSupported(const vector<JoinCondition> &conditions);
67320
+
67312
67321
  private:
67313
67322
  // resolve joins that output max N elements (SEMI, ANTI, MARK)
67314
67323
  void ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk, OperatorState &state) const;
@@ -67425,6 +67434,16 @@ void PhysicalJoin::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left
67425
67434
  }
67426
67435
  }
67427
67436
 
67437
+ bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions) {
67438
+ for (auto &cond : conditions) {
67439
+ if (cond.left->return_type.InternalType() == PhysicalType::STRUCT ||
67440
+ cond.left->return_type.InternalType() == PhysicalType::LIST) {
67441
+ return false;
67442
+ }
67443
+ }
67444
+ return true;
67445
+ }
67446
+
67428
67447
  //===--------------------------------------------------------------------===//
67429
67448
  // Sink
67430
67449
  //===--------------------------------------------------------------------===//
@@ -76036,6 +76055,44 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalChunkGet &
76036
76055
 
76037
76056
 
76038
76057
 
76058
+ //===----------------------------------------------------------------------===//
76059
+ // DuckDB
76060
+ //
76061
+ // duckdb/planner/expression_iterator.hpp
76062
+ //
76063
+ //
76064
+ //===----------------------------------------------------------------------===//
76065
+
76066
+
76067
+
76068
+
76069
+
76070
+
76071
+ #include <functional>
76072
+
76073
+ namespace duckdb {
76074
+ class BoundQueryNode;
76075
+ class BoundTableRef;
76076
+
76077
+ class ExpressionIterator {
76078
+ public:
76079
+ static void EnumerateChildren(const Expression &expression,
76080
+ const std::function<void(const Expression &child)> &callback);
76081
+ static void EnumerateChildren(Expression &expression, const std::function<void(Expression &child)> &callback);
76082
+ static void EnumerateChildren(Expression &expression,
76083
+ const std::function<void(unique_ptr<Expression> &child)> &callback);
76084
+
76085
+ static void EnumerateExpression(unique_ptr<Expression> &expr,
76086
+ const std::function<void(Expression &child)> &callback);
76087
+
76088
+ static void EnumerateTableRefChildren(BoundTableRef &ref, const std::function<void(Expression &child)> &callback);
76089
+ static void EnumerateQueryNodeChildren(BoundQueryNode &node,
76090
+ const std::function<void(Expression &child)> &callback);
76091
+ };
76092
+
76093
+ } // namespace duckdb
76094
+
76095
+
76039
76096
  namespace duckdb {
76040
76097
 
76041
76098
  static bool CanPlanIndexJoin(Transaction &transaction, TableScanBindData *bind_data, PhysicalTableScan &scan) {
@@ -76178,6 +76235,14 @@ void TransformIndexJoin(ClientContext &context, LogicalComparisonJoin &op, Index
76178
76235
  }
76179
76236
  }
76180
76237
 
76238
+ static void RewriteJoinCondition(Expression &expr, idx_t offset) {
76239
+ if (expr.type == ExpressionType::BOUND_REF) {
76240
+ auto &ref = (BoundReferenceExpression &)expr;
76241
+ ref.index += offset;
76242
+ }
76243
+ ExpressionIterator::EnumerateChildren(expr, [&](Expression &child) { RewriteJoinCondition(child, offset); });
76244
+ }
76245
+
76181
76246
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparisonJoin &op) {
76182
76247
  // now visit the children
76183
76248
  D_ASSERT(op.children.size() == 2);
@@ -76263,10 +76328,17 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
76263
76328
  // range join: use piecewise merge join
76264
76329
  plan = make_unique<PhysicalPiecewiseMergeJoin>(op, move(left), move(right), move(op.conditions),
76265
76330
  op.join_type, op.estimated_cardinality);
76266
- } else {
76331
+ } else if (PhysicalNestedLoopJoin::IsSupported(op.conditions)) {
76267
76332
  // inequality join: use nested loop
76268
76333
  plan = make_unique<PhysicalNestedLoopJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
76269
76334
  op.estimated_cardinality);
76335
+ } else {
76336
+ for (auto &cond : op.conditions) {
76337
+ RewriteJoinCondition(*cond.right, left->types.size());
76338
+ }
76339
+ auto condition = JoinCondition::CreateExpression(move(op.conditions));
76340
+ plan = make_unique<PhysicalBlockwiseNLJoin>(op, move(left), move(right), move(condition), op.join_type,
76341
+ op.estimated_cardinality);
76270
76342
  }
76271
76343
  }
76272
76344
  return plan;
@@ -76415,42 +76487,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
76415
76487
 
76416
76488
 
76417
76489
 
76418
- //===----------------------------------------------------------------------===//
76419
- // DuckDB
76420
- //
76421
- // duckdb/planner/expression_iterator.hpp
76422
- //
76423
- //
76424
- //===----------------------------------------------------------------------===//
76425
-
76426
-
76427
-
76428
-
76429
-
76430
-
76431
- #include <functional>
76432
-
76433
- namespace duckdb {
76434
- class BoundQueryNode;
76435
- class BoundTableRef;
76436
-
76437
- class ExpressionIterator {
76438
- public:
76439
- static void EnumerateChildren(const Expression &expression,
76440
- const std::function<void(const Expression &child)> &callback);
76441
- static void EnumerateChildren(Expression &expression, const std::function<void(Expression &child)> &callback);
76442
- static void EnumerateChildren(Expression &expression,
76443
- const std::function<void(unique_ptr<Expression> &child)> &callback);
76444
-
76445
- static void EnumerateExpression(unique_ptr<Expression> &expr,
76446
- const std::function<void(Expression &child)> &callback);
76447
-
76448
- static void EnumerateTableRefChildren(BoundTableRef &ref, const std::function<void(Expression &child)> &callback);
76449
- static void EnumerateQueryNodeChildren(BoundQueryNode &node,
76450
- const std::function<void(Expression &child)> &callback);
76451
- };
76452
-
76453
- } // namespace duckdb
76454
76490
 
76455
76491
  //===----------------------------------------------------------------------===//
76456
76492
  // DuckDB
@@ -155632,6 +155668,7 @@ private:
155632
155668
  unique_ptr<ConstantExpression> TransformValue(duckdb_libpgquery::PGValue val);
155633
155669
  //! Transform a Postgres operator into an Expression
155634
155670
  unique_ptr<ParsedExpression> TransformAExpr(duckdb_libpgquery::PGAExpr *root);
155671
+ unique_ptr<ParsedExpression> TransformAExprInternal(duckdb_libpgquery::PGAExpr *root);
155635
155672
  //! Transform a Postgres abstract expression into an Expression
155636
155673
  unique_ptr<ParsedExpression> TransformExpression(duckdb_libpgquery::PGNode *node);
155637
155674
  //! Transform a Postgres function call into an Expression
@@ -159478,7 +159515,7 @@ unique_ptr<ParsedExpression> Transformer::TransformBinaryOperator(const string &
159478
159515
  }
159479
159516
  }
159480
159517
 
159481
- unique_ptr<ParsedExpression> Transformer::TransformAExpr(duckdb_libpgquery::PGAExpr *root) {
159518
+ unique_ptr<ParsedExpression> Transformer::TransformAExprInternal(duckdb_libpgquery::PGAExpr *root) {
159482
159519
  D_ASSERT(root);
159483
159520
  auto name = string((reinterpret_cast<duckdb_libpgquery::PGValue *>(root->name->head->data.ptr_value))->val.str);
159484
159521
 
@@ -159626,6 +159663,14 @@ unique_ptr<ParsedExpression> Transformer::TransformAExpr(duckdb_libpgquery::PGAE
159626
159663
  }
159627
159664
  }
159628
159665
 
159666
+ unique_ptr<ParsedExpression> Transformer::TransformAExpr(duckdb_libpgquery::PGAExpr *root) {
159667
+ auto result = TransformAExprInternal(root);
159668
+ if (result) {
159669
+ result->query_location = root->location;
159670
+ }
159671
+ return result;
159672
+ }
159673
+
159629
159674
  } // namespace duckdb
159630
159675
 
159631
159676
 
@@ -166900,7 +166945,8 @@ namespace duckdb {
166900
166945
  //! The FlattenDependentJoins class is responsible for pushing the dependent join down into the plan to create a
166901
166946
  //! flattened subquery
166902
166947
  struct FlattenDependentJoins {
166903
- FlattenDependentJoins(Binder &binder, const vector<CorrelatedColumnInfo> &correlated, bool any_join = false);
166948
+ FlattenDependentJoins(Binder &binder, const vector<CorrelatedColumnInfo> &correlated, bool perform_delim = true,
166949
+ bool any_join = false);
166904
166950
 
166905
166951
  //! Detects which Logical Operators have correlated expressions that they are dependent upon, filling the
166906
166952
  //! has_correlated_expressions map.
@@ -166919,6 +166965,7 @@ struct FlattenDependentJoins {
166919
166965
  const vector<CorrelatedColumnInfo> &correlated_columns;
166920
166966
  vector<LogicalType> delim_types;
166921
166967
 
166968
+ bool perform_delim;
166922
166969
  bool any_join;
166923
166970
 
166924
166971
  private:
@@ -166930,6 +166977,9 @@ private:
166930
166977
 
166931
166978
 
166932
166979
 
166980
+
166981
+
166982
+
166933
166983
  namespace duckdb {
166934
166984
 
166935
166985
  static unique_ptr<Expression> PlanUncorrelatedSubquery(Binder &binder, BoundSubqueryExpression &expr,
@@ -167051,8 +167101,25 @@ static unique_ptr<Expression> PlanUncorrelatedSubquery(Binder &binder, BoundSubq
167051
167101
  }
167052
167102
 
167053
167103
  static unique_ptr<LogicalDelimJoin> CreateDuplicateEliminatedJoin(vector<CorrelatedColumnInfo> &correlated_columns,
167054
- JoinType join_type) {
167104
+ JoinType join_type,
167105
+ unique_ptr<LogicalOperator> original_plan,
167106
+ bool perform_delim) {
167055
167107
  auto delim_join = make_unique<LogicalDelimJoin>(join_type);
167108
+ if (!perform_delim) {
167109
+ // if we are not performing a delim join, we push a row_number() OVER() window operator on the LHS
167110
+ // and perform all duplicate elimination on that row number instead
167111
+ D_ASSERT(correlated_columns[0].type.id() == LogicalTypeId::BIGINT);
167112
+ auto window = make_unique<LogicalWindow>(correlated_columns[0].binding.table_index);
167113
+ auto row_number = make_unique<BoundWindowExpression>(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT,
167114
+ nullptr, nullptr);
167115
+ row_number->start = WindowBoundary::UNBOUNDED_PRECEDING;
167116
+ row_number->end = WindowBoundary::CURRENT_ROW_ROWS;
167117
+ row_number->alias = "delim_index";
167118
+ window->expressions.push_back(move(row_number));
167119
+ window->AddChild(move(original_plan));
167120
+ original_plan = move(window);
167121
+ }
167122
+ delim_join->AddChild(move(original_plan));
167056
167123
  for (idx_t i = 0; i < correlated_columns.size(); i++) {
167057
167124
  auto &col = correlated_columns[i];
167058
167125
  delim_join->duplicate_eliminated_columns.push_back(
@@ -167063,8 +167130,9 @@ static unique_ptr<LogicalDelimJoin> CreateDuplicateEliminatedJoin(vector<Correla
167063
167130
  }
167064
167131
 
167065
167132
  static void CreateDelimJoinConditions(LogicalDelimJoin &delim_join, vector<CorrelatedColumnInfo> &correlated_columns,
167066
- vector<ColumnBinding> bindings, idx_t base_offset) {
167067
- for (idx_t i = 0; i < correlated_columns.size(); i++) {
167133
+ vector<ColumnBinding> bindings, idx_t base_offset, bool perform_delim) {
167134
+ auto col_count = perform_delim ? correlated_columns.size() : 1;
167135
+ for (idx_t i = 0; i < col_count; i++) {
167068
167136
  auto &col = correlated_columns[i];
167069
167137
  JoinCondition cond;
167070
167138
  cond.left = make_unique<BoundColumnRefExpression>(col.name, col.type, col.binding);
@@ -167074,10 +167142,50 @@ static void CreateDelimJoinConditions(LogicalDelimJoin &delim_join, vector<Corre
167074
167142
  }
167075
167143
  }
167076
167144
 
167145
+ static bool PerformDelimOnType(const LogicalType &type) {
167146
+ if (type.InternalType() == PhysicalType::LIST) {
167147
+ return false;
167148
+ }
167149
+ if (type.InternalType() == PhysicalType::STRUCT) {
167150
+ for (auto &entry : StructType::GetChildTypes(type)) {
167151
+ if (!PerformDelimOnType(entry.second)) {
167152
+ return false;
167153
+ }
167154
+ }
167155
+ }
167156
+ return true;
167157
+ }
167158
+
167159
+ static bool PerformDuplicateElimination(Binder &binder, vector<CorrelatedColumnInfo> &correlated_columns) {
167160
+ if (!ClientConfig::GetConfig(binder.context).enable_optimizer) {
167161
+ // if optimizations are disabled we always do a delim join
167162
+ return true;
167163
+ }
167164
+ bool perform_delim = true;
167165
+ for (auto &col : correlated_columns) {
167166
+ if (!PerformDelimOnType(col.type)) {
167167
+ perform_delim = false;
167168
+ break;
167169
+ }
167170
+ }
167171
+ if (perform_delim) {
167172
+ return true;
167173
+ }
167174
+ auto binding = ColumnBinding(binder.GenerateTableIndex(), 0);
167175
+ auto type = LogicalType::BIGINT;
167176
+ auto name = "delim_index";
167177
+ CorrelatedColumnInfo info(binding, type, name, 0);
167178
+ correlated_columns.insert(correlated_columns.begin(), move(info));
167179
+ return false;
167180
+ }
167181
+
167077
167182
  static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubqueryExpression &expr,
167078
167183
  unique_ptr<LogicalOperator> &root,
167079
167184
  unique_ptr<LogicalOperator> plan) {
167080
167185
  auto &correlated_columns = expr.binder->correlated_columns;
167186
+ // FIXME: there should be a way of disabling decorrelation for ANY queries as well, but not for now...
167187
+ bool perform_delim =
167188
+ expr.subquery_type == SubqueryType::ANY ? true : PerformDuplicateElimination(binder, correlated_columns);
167081
167189
  D_ASSERT(expr.IsCorrelated());
167082
167190
  // correlated subquery
167083
167191
  // for a more in-depth explanation of this code, read the paper "Unnesting Arbitrary Subqueries"
@@ -167094,15 +167202,15 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
167094
167202
  // NULL values are equal in this join because we join on the correlated columns ONLY
167095
167203
  // and e.g. in the query: SELECT (SELECT 42 FROM integers WHERE i1.i IS NULL LIMIT 1) FROM integers i1;
167096
167204
  // the input value NULL will generate the value 42, and we need to join NULL on the LHS with NULL on the RHS
167097
- auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::SINGLE);
167098
-
167099
167205
  // the left side is the original plan
167100
167206
  // this is the side that will be duplicate eliminated and pushed into the RHS
167101
- delim_join->AddChild(move(root));
167207
+ auto delim_join =
167208
+ CreateDuplicateEliminatedJoin(correlated_columns, JoinType::SINGLE, move(root), perform_delim);
167209
+
167102
167210
  // the right side initially is a DEPENDENT join between the duplicate eliminated scan and the subquery
167103
167211
  // HOWEVER: we do not explicitly create the dependent join
167104
167212
  // instead, we eliminate the dependent join by pushing it down into the right side of the plan
167105
- FlattenDependentJoins flatten(binder, correlated_columns);
167213
+ FlattenDependentJoins flatten(binder, correlated_columns, perform_delim);
167106
167214
 
167107
167215
  // first we check which logical operators have correlated expressions in the first place
167108
167216
  flatten.DetectCorrelatedExpressions(plan.get());
@@ -167115,7 +167223,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
167115
167223
  auto plan_columns = dependent_join->GetColumnBindings();
167116
167224
 
167117
167225
  // now create the join conditions
167118
- CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset);
167226
+ CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset, perform_delim);
167119
167227
  delim_join->AddChild(move(dependent_join));
167120
167228
  root = move(delim_join);
167121
167229
  // finally push the BoundColumnRefExpression referring to the data element returned by the join
@@ -167126,12 +167234,10 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
167126
167234
  // correlated EXISTS query
167127
167235
  // this query is similar to the correlated SCALAR query, except we use a MARK join here
167128
167236
  idx_t mark_index = binder.GenerateTableIndex();
167129
- auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK);
167237
+ auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK, move(root), perform_delim);
167130
167238
  delim_join->mark_index = mark_index;
167131
- // LHS
167132
- delim_join->AddChild(move(root));
167133
167239
  // RHS
167134
- FlattenDependentJoins flatten(binder, correlated_columns);
167240
+ FlattenDependentJoins flatten(binder, correlated_columns, perform_delim);
167135
167241
  flatten.DetectCorrelatedExpressions(plan.get());
167136
167242
  auto dependent_join = flatten.PushDownDependentJoin(move(plan));
167137
167243
 
@@ -167139,7 +167245,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
167139
167245
  auto plan_columns = dependent_join->GetColumnBindings();
167140
167246
 
167141
167247
  // now we create the join conditions between the dependent join and the original table
167142
- CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset);
167248
+ CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset, perform_delim);
167143
167249
  delim_join->AddChild(move(dependent_join));
167144
167250
  root = move(delim_join);
167145
167251
  // finally push the BoundColumnRefExpression referring to the marker
@@ -167155,10 +167261,8 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
167155
167261
  // as the MARK join has one extra join condition (the original condition, of the ANY expression, e.g.
167156
167262
  // [i=ANY(...)])
167157
167263
  idx_t mark_index = binder.GenerateTableIndex();
167158
- auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK);
167264
+ auto delim_join = CreateDuplicateEliminatedJoin(correlated_columns, JoinType::MARK, move(root), perform_delim);
167159
167265
  delim_join->mark_index = mark_index;
167160
- // LHS
167161
- delim_join->AddChild(move(root));
167162
167266
  // RHS
167163
167267
  FlattenDependentJoins flatten(binder, correlated_columns, true);
167164
167268
  flatten.DetectCorrelatedExpressions(plan.get());
@@ -167168,7 +167272,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
167168
167272
  auto plan_columns = dependent_join->GetColumnBindings();
167169
167273
 
167170
167274
  // now we create the join conditions between the dependent join and the original table
167171
- CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset);
167275
+ CreateDelimJoinConditions(*delim_join, correlated_columns, plan_columns, flatten.delim_offset, perform_delim);
167172
167276
  // add the actual condition based on the ANY/ALL predicate
167173
167277
  JoinCondition compare_cond;
167174
167278
  compare_cond.left = move(expr.child);
@@ -167192,9 +167296,11 @@ public:
167192
167296
  void VisitOperator(LogicalOperator &op) override {
167193
167297
  if (!op.children.empty()) {
167194
167298
  root = move(op.children[0]);
167299
+ D_ASSERT(root);
167195
167300
  VisitOperatorExpressions(op);
167196
167301
  op.children[0] = move(root);
167197
167302
  for (idx_t i = 0; i < op.children.size(); i++) {
167303
+ D_ASSERT(op.children[i]);
167198
167304
  VisitOperator(*op.children[i]);
167199
167305
  }
167200
167306
  }
@@ -173807,6 +173913,7 @@ string IsNotNullFilter::ToString(const string &column_name) {
173807
173913
 
173808
173914
 
173809
173915
 
173916
+
173810
173917
  namespace duckdb {
173811
173918
 
173812
173919
  unique_ptr<Expression> JoinCondition::CreateExpression(JoinCondition cond) {
@@ -173814,6 +173921,21 @@ unique_ptr<Expression> JoinCondition::CreateExpression(JoinCondition cond) {
173814
173921
  return move(bound_comparison);
173815
173922
  }
173816
173923
 
173924
+ unique_ptr<Expression> JoinCondition::CreateExpression(vector<JoinCondition> conditions) {
173925
+ unique_ptr<Expression> result;
173926
+ for (auto &cond : conditions) {
173927
+ auto expr = CreateExpression(move(cond));
173928
+ if (!result) {
173929
+ result = move(expr);
173930
+ } else {
173931
+ auto conj =
173932
+ make_unique<BoundConjunctionExpression>(ExpressionType::CONJUNCTION_AND, move(expr), move(result));
173933
+ result = move(conj);
173934
+ }
173935
+ }
173936
+ return result;
173937
+ }
173938
+
173817
173939
  JoinSide JoinSide::CombineJoinSide(JoinSide left, JoinSide right) {
173818
173940
  if (left == JoinSide::NONE) {
173819
173941
  return right;
@@ -173896,6 +174018,20 @@ JoinSide JoinSide::GetJoinSide(const unordered_set<idx_t> &bindings, unordered_s
173896
174018
 
173897
174019
  namespace duckdb {
173898
174020
 
174021
+ LogicalOperator::LogicalOperator(LogicalOperatorType type) : type(type) {
174022
+ }
174023
+
174024
+ LogicalOperator::LogicalOperator(LogicalOperatorType type, vector<unique_ptr<Expression>> expressions)
174025
+ : type(type), expressions(move(expressions)) {
174026
+ }
174027
+
174028
+ LogicalOperator::~LogicalOperator() {
174029
+ }
174030
+
174031
+ vector<ColumnBinding> LogicalOperator::GetColumnBindings() {
174032
+ return {ColumnBinding(0, 0)};
174033
+ }
174034
+
173899
174035
  string LogicalOperator::GetName() const {
173900
174036
  return LogicalOperatorToString(type);
173901
174037
  }
@@ -173999,6 +174135,20 @@ void LogicalOperator::Verify() {
173999
174135
  #endif
174000
174136
  }
174001
174137
 
174138
+ void LogicalOperator::AddChild(unique_ptr<LogicalOperator> child) {
174139
+ D_ASSERT(child);
174140
+ children.push_back(move(child));
174141
+ }
174142
+
174143
+ idx_t LogicalOperator::EstimateCardinality(ClientContext &context) {
174144
+ // simple estimator, just take the max of the children
174145
+ idx_t max_cardinality = 0;
174146
+ for (auto &child : children) {
174147
+ max_cardinality = MaxValue(child->EstimateCardinality(context), max_cardinality);
174148
+ }
174149
+ return max_cardinality;
174150
+ }
174151
+
174002
174152
  void LogicalOperator::Print() {
174003
174153
  Printer::Print(ToString());
174004
174154
  }
@@ -175100,8 +175250,8 @@ public:
175100
175250
  namespace duckdb {
175101
175251
 
175102
175252
  FlattenDependentJoins::FlattenDependentJoins(Binder &binder, const vector<CorrelatedColumnInfo> &correlated,
175103
- bool any_join)
175104
- : binder(binder), correlated_columns(correlated), any_join(any_join) {
175253
+ bool perform_delim, bool any_join)
175254
+ : binder(binder), correlated_columns(correlated), perform_delim(perform_delim), any_join(any_join) {
175105
175255
  for (idx_t i = 0; i < correlated_columns.size(); i++) {
175106
175256
  auto &col = correlated_columns[i];
175107
175257
  correlated_map[col.binding] = i;
@@ -175201,8 +175351,9 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
175201
175351
  // now we add all the columns of the delim_scan to the projection list
175202
175352
  auto proj = (LogicalProjection *)plan.get();
175203
175353
  for (idx_t i = 0; i < correlated_columns.size(); i++) {
175354
+ auto &col = correlated_columns[i];
175204
175355
  auto colref = make_unique<BoundColumnRefExpression>(
175205
- correlated_columns[i].type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
175356
+ col.name, col.type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
175206
175357
  plan->expressions.push_back(move(colref));
175207
175358
  }
175208
175359
 
@@ -175223,15 +175374,42 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
175223
175374
  RewriteCorrelatedExpressions rewriter(base_binding, correlated_map);
175224
175375
  rewriter.VisitOperator(*plan);
175225
175376
  // now we add all the columns of the delim_scan to the grouping operators AND the projection list
175226
- for (idx_t i = 0; i < correlated_columns.size(); i++) {
175377
+ idx_t delim_table_index;
175378
+ idx_t delim_column_offset;
175379
+ idx_t delim_data_offset;
175380
+ auto new_group_count = perform_delim ? correlated_columns.size() : 1;
175381
+ for (idx_t i = 0; i < new_group_count; i++) {
175382
+ auto &col = correlated_columns[i];
175227
175383
  auto colref = make_unique<BoundColumnRefExpression>(
175228
- correlated_columns[i].type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
175384
+ col.name, col.type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
175229
175385
  for (auto &set : aggr.grouping_sets) {
175230
175386
  set.insert(aggr.groups.size());
175231
175387
  }
175232
175388
  aggr.groups.push_back(move(colref));
175233
175389
  }
175234
- if (aggr.groups.size() == correlated_columns.size()) {
175390
+ if (!perform_delim) {
175391
+ // if we are not performing the duplicate elimination, we have only added the row_id column to the grouping
175392
+ // operators in this case, we push a FIRST aggregate for each of the remaining expressions
175393
+ delim_table_index = aggr.aggregate_index;
175394
+ delim_column_offset = aggr.expressions.size();
175395
+ delim_data_offset = aggr.groups.size();
175396
+ for (idx_t i = 0; i < correlated_columns.size(); i++) {
175397
+ auto &col = correlated_columns[i];
175398
+ auto first_aggregate = FirstFun::GetFunction(col.type);
175399
+ auto colref = make_unique<BoundColumnRefExpression>(
175400
+ col.name, col.type, ColumnBinding(base_binding.table_index, base_binding.column_index + i));
175401
+ vector<unique_ptr<Expression>> aggr_children;
175402
+ aggr_children.push_back(move(colref));
175403
+ auto first_fun = make_unique<BoundAggregateExpression>(move(first_aggregate), move(aggr_children),
175404
+ nullptr, nullptr, false);
175405
+ aggr.expressions.push_back(move(first_fun));
175406
+ }
175407
+ } else {
175408
+ delim_table_index = aggr.group_index;
175409
+ delim_column_offset = aggr.groups.size() - correlated_columns.size();
175410
+ delim_data_offset = aggr.groups.size();
175411
+ }
175412
+ if (aggr.groups.size() == new_group_count) {
175235
175413
  // we have to perform a LEFT OUTER JOIN between the result of this aggregate and the delim scan
175236
175414
  // FIXME: this does not always have to be a LEFT OUTER JOIN, depending on whether aggr.expressions return
175237
175415
  // NULL or a value
@@ -175247,13 +175425,12 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
175247
175425
  auto delim_scan = make_unique<LogicalDelimGet>(left_index, delim_types);
175248
175426
  join->children.push_back(move(delim_scan));
175249
175427
  join->children.push_back(move(plan));
175250
- for (idx_t i = 0; i < correlated_columns.size(); i++) {
175428
+ for (idx_t i = 0; i < new_group_count; i++) {
175429
+ auto &col = correlated_columns[i];
175251
175430
  JoinCondition cond;
175252
- cond.left =
175253
- make_unique<BoundColumnRefExpression>(correlated_columns[i].type, ColumnBinding(left_index, i));
175431
+ cond.left = make_unique<BoundColumnRefExpression>(col.name, col.type, ColumnBinding(left_index, i));
175254
175432
  cond.right = make_unique<BoundColumnRefExpression>(
175255
- correlated_columns[i].type,
175256
- ColumnBinding(aggr.group_index, (aggr.groups.size() - correlated_columns.size()) + i));
175433
+ correlated_columns[i].type, ColumnBinding(delim_table_index, delim_column_offset + i));
175257
175434
  cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
175258
175435
  join->conditions.push_back(move(cond));
175259
175436
  }
@@ -175269,16 +175446,15 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
175269
175446
  }
175270
175447
  }
175271
175448
  // now we update the delim_index
175272
-
175273
175449
  base_binding.table_index = left_index;
175274
175450
  this->delim_offset = base_binding.column_index = 0;
175275
175451
  this->data_offset = 0;
175276
175452
  return move(join);
175277
175453
  } else {
175278
175454
  // update the delim_index
175279
- base_binding.table_index = aggr.group_index;
175280
- this->delim_offset = base_binding.column_index = aggr.groups.size() - correlated_columns.size();
175281
- this->data_offset = aggr.groups.size();
175455
+ base_binding.table_index = delim_table_index;
175456
+ this->delim_offset = base_binding.column_index = delim_column_offset;
175457
+ this->data_offset = delim_data_offset;
175282
175458
  return plan;
175283
175459
  }
175284
175460
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "f523e0555"
15
- #define DUCKDB_VERSION "v0.4.1-dev314"
14
+ #define DUCKDB_SOURCE_ID "f5d15448b"
15
+ #define DUCKDB_VERSION "v0.4.1-dev318"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -8879,13 +8879,9 @@ namespace duckdb {
8879
8879
  //! logical query tree
8880
8880
  class LogicalOperator {
8881
8881
  public:
8882
- explicit LogicalOperator(LogicalOperatorType type) : type(type) {
8883
- }
8884
- LogicalOperator(LogicalOperatorType type, vector<unique_ptr<Expression>> expressions)
8885
- : type(type), expressions(move(expressions)) {
8886
- }
8887
- virtual ~LogicalOperator() {
8888
- }
8882
+ explicit LogicalOperator(LogicalOperatorType type);
8883
+ LogicalOperator(LogicalOperatorType type, vector<unique_ptr<Expression>> expressions);
8884
+ virtual ~LogicalOperator();
8889
8885
 
8890
8886
  //! The type of the logical operator
8891
8887
  LogicalOperatorType type;
@@ -8899,9 +8895,7 @@ public:
8899
8895
  idx_t estimated_cardinality = 0;
8900
8896
 
8901
8897
  public:
8902
- virtual vector<ColumnBinding> GetColumnBindings() {
8903
- return {ColumnBinding(0, 0)};
8904
- }
8898
+ virtual vector<ColumnBinding> GetColumnBindings();
8905
8899
  static vector<ColumnBinding> GenerateColumnBindings(idx_t table_idx, idx_t column_count);
8906
8900
  static vector<LogicalType> MapTypes(const vector<LogicalType> &types, const vector<idx_t> &projection_map);
8907
8901
  static vector<ColumnBinding> MapBindings(const vector<ColumnBinding> &types, const vector<idx_t> &projection_map);
@@ -8916,18 +8910,9 @@ public:
8916
8910
  //! Debug method: verify that the integrity of expressions & child nodes are maintained
8917
8911
  virtual void Verify();
8918
8912
 
8919
- void AddChild(unique_ptr<LogicalOperator> child) {
8920
- children.push_back(move(child));
8921
- }
8913
+ void AddChild(unique_ptr<LogicalOperator> child);
8922
8914
 
8923
- virtual idx_t EstimateCardinality(ClientContext &context) {
8924
- // simple estimator, just take the max of the children
8925
- idx_t max_cardinality = 0;
8926
- for (auto &child : children) {
8927
- max_cardinality = MaxValue(child->EstimateCardinality(context), max_cardinality);
8928
- }
8929
- return max_cardinality;
8930
- }
8915
+ virtual idx_t EstimateCardinality(ClientContext &context);
8931
8916
 
8932
8917
  protected:
8933
8918
  //! Resolve types for this specific operator
@@ -14373,8 +14358,11 @@ struct CorrelatedColumnInfo {
14373
14358
  string name;
14374
14359
  idx_t depth;
14375
14360
 
14361
+ CorrelatedColumnInfo(ColumnBinding binding, LogicalType type_p, string name_p, idx_t depth)
14362
+ : binding(binding), type(move(type_p)), name(move(name_p)), depth(depth) {
14363
+ }
14376
14364
  explicit CorrelatedColumnInfo(BoundColumnRefExpression &expr)
14377
- : binding(expr.binding), type(expr.return_type), name(expr.GetName()), depth(expr.depth) {
14365
+ : CorrelatedColumnInfo(expr.binding, expr.return_type, expr.GetName(), expr.depth) {
14378
14366
  }
14379
14367
 
14380
14368
  bool operator==(const CorrelatedColumnInfo &rhs) const {