duckdb 0.6.2-dev1184.0 → 0.6.2-dev1192.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev1184.0",
5
+ "version": "0.6.2-dev1192.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -53,7 +53,10 @@ OperatorResultType PhysicalFilter::ExecuteInternal(ExecutionContext &context, Da
53
53
  }
54
54
 
55
55
  string PhysicalFilter::ParamsToString() const {
56
- return expression->GetName();
56
+ auto result = expression->GetName();
57
+ result += "\n[INFOSEPARATOR]\n";
58
+ result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
59
+ return result;
57
60
  }
58
61
 
59
62
  } // namespace duckdb
@@ -29,8 +29,9 @@ string PhysicalComparisonJoin::ParamsToString() const {
29
29
  string op = ExpressionTypeToOperator(it.comparison);
30
30
  extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n";
31
31
  }
32
- extra_info += "\nEC = " + std::to_string(estimated_props->GetCardinality<double>()) + "\n";
33
- extra_info += "COST = " + std::to_string(estimated_props->GetCost()) + "\n";
32
+ extra_info += "\n[INFOSEPARATOR]\n";
33
+ extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
34
+ extra_info += StringUtil::Format("Cost: %llu", (idx_t)estimated_props->GetCost());
34
35
  return extra_info;
35
36
  }
36
37
 
@@ -135,7 +135,8 @@ string PhysicalTableScan::ParamsToString() const {
135
135
  }
136
136
  }
137
137
  }
138
- result += "\nEC=" + to_string(estimated_cardinality) + "\n";
138
+ result += "\n[INFOSEPARATOR]\n";
139
+ result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
139
140
  return result;
140
141
  }
141
142
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev1184"
2
+ #define DUCKDB_VERSION "0.6.2-dev1192"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "b0ed39acb9"
5
+ #define DUCKDB_SOURCE_ID "4b2d12b676"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -21,7 +21,7 @@ class DeliminatorPlanUpdater;
21
21
  //! The Deliminator optimizer traverses the logical operator tree and removes any redundant DelimGets/DelimJoins
22
22
  class Deliminator {
23
23
  public:
24
- Deliminator() {
24
+ explicit Deliminator(ClientContext &context) : context(context) {
25
25
  }
26
26
  //! Perform DelimJoin elimination
27
27
  unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> op);
@@ -35,6 +35,9 @@ private:
35
35
  //! Try to remove an inequality Join with a DelimGet, returns true if it was successful
36
36
  bool RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
37
37
  DeliminatorPlanUpdater &updater);
38
+
39
+ private:
40
+ ClientContext &context;
38
41
  };
39
42
 
40
43
  } // namespace duckdb
@@ -69,13 +69,11 @@ private:
69
69
 
70
70
  vector<RelationsToTDom> relations_to_tdoms;
71
71
 
72
+ public:
72
73
  static constexpr double DEFAULT_SELECTIVITY = 0.2;
73
74
 
74
- public:
75
75
  static void VerifySymmetry(JoinNode *result, JoinNode *entry);
76
76
 
77
- void AssertEquivalentRelationSize();
78
-
79
77
  //! given a binding of (relation, column) used for DP, and a (table, column) in that catalog
80
78
  //! Add the key value entry into the relation_column_to_original_column
81
79
  void AddRelationToColumnMapping(ColumnBinding key, ColumnBinding value);
@@ -92,7 +90,7 @@ public:
92
90
  void UpdateTotalDomains(JoinNode *node, LogicalOperator *op);
93
91
  void InitEquivalentRelations(vector<unique_ptr<FilterInfo>> *filter_infos);
94
92
 
95
- void InitCardinalityEstimatorProps(vector<struct NodeOp> *node_ops, vector<unique_ptr<FilterInfo>> *filter_infos);
93
+ void InitCardinalityEstimatorProps(vector<NodeOp> *node_ops, vector<unique_ptr<FilterInfo>> *filter_infos);
96
94
  double EstimateCardinalityWithSet(JoinRelationSet *new_set);
97
95
  void EstimateBaseTableCardinality(JoinNode *node, LogicalOperator *op);
98
96
  double EstimateCrossProduct(const JoinNode *left, const JoinNode *right);
@@ -54,8 +54,7 @@ private:
54
54
  //! The set of filter infos created from the extracted filters
55
55
  vector<unique_ptr<FilterInfo>> filter_infos;
56
56
  //! A map of all expressions a given expression has to be equivalent to. This is used to add "implied join edges".
57
- //! i.e. in the join A=B AND B=C, the equivalence set of {B} is {A, C}, thus we can add an implied join edge {A <->
58
- //! C}
57
+ //! i.e. in the join A=B AND B=C, the equivalence set of {B} is {A, C}, thus we can add an implied join edge {A = C}
59
58
  expression_map_t<vector<FilterInfo *>> equivalence_sets;
60
59
 
61
60
  CardinalityEstimator cardinality_estimator;
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/optimizer/deliminator.hpp"
2
2
 
3
+ #include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
3
4
  #include "duckdb/planner/expression/bound_cast_expression.hpp"
4
5
  #include "duckdb/planner/expression/bound_columnref_expression.hpp"
5
6
  #include "duckdb/planner/expression/bound_conjunction_expression.hpp"
@@ -13,13 +14,15 @@ namespace duckdb {
13
14
 
14
15
  class DeliminatorPlanUpdater : LogicalOperatorVisitor {
15
16
  public:
16
- DeliminatorPlanUpdater() {
17
+ explicit DeliminatorPlanUpdater(ClientContext &context) : context(context) {
17
18
  }
18
19
  //! Update the plan after a DelimGet has been removed
19
20
  void VisitOperator(LogicalOperator &op) override;
20
21
  void VisitExpression(unique_ptr<Expression> *expression) override;
21
22
 
22
23
  public:
24
+ ClientContext &context;
25
+
23
26
  expression_map_t<Expression *> expr_map;
24
27
  column_binding_map_t<bool> projection_map;
25
28
  column_binding_map_t<Expression *> reverse_proj_or_agg_map;
@@ -94,6 +97,10 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
94
97
  // change type if there are no more duplicate-eliminated columns
95
98
  if (decs->empty()) {
96
99
  delim_join.type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN;
100
+ // sub-plans with DelimGets are not re-orderable (yet), however, we removed all DelimGet of this DelimJoin
101
+ // the DelimGets are on the RHS of the DelimJoin, so we can call the JoinOrderOptimizer on the RHS now
102
+ JoinOrderOptimizer optimizer(context);
103
+ delim_join.children[1] = optimizer.Optimize(std::move(delim_join.children[1]));
97
104
  }
98
105
  }
99
106
  }
@@ -111,7 +118,7 @@ unique_ptr<LogicalOperator> Deliminator::Optimize(unique_ptr<LogicalOperator> op
111
118
  FindCandidates(&op, candidates);
112
119
 
113
120
  for (auto &candidate : candidates) {
114
- DeliminatorPlanUpdater updater;
121
+ DeliminatorPlanUpdater updater(context);
115
122
  if (RemoveCandidate(&op, candidate, updater)) {
116
123
  updater.VisitOperator(*op);
117
124
  }
@@ -358,7 +358,7 @@ bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
358
358
  return a.tdom_no_hll > b.tdom_no_hll;
359
359
  }
360
360
 
361
- void CardinalityEstimator::InitCardinalityEstimatorProps(vector<struct NodeOp> *node_ops,
361
+ void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> *node_ops,
362
362
  vector<unique_ptr<FilterInfo>> *filter_infos) {
363
363
  InitEquivalentRelations(filter_infos);
364
364
  InitTotalDomains();
@@ -564,13 +564,13 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode *node, LogicalO
564
564
  auto table_filters = GetTableFilters(op);
565
565
 
566
566
  auto card_after_filters = node->GetBaseTableCardinality();
567
- // Logical Filter on a seq scan
568
- if (has_logical_filter) {
569
- card_after_filters *= DEFAULT_SELECTIVITY;
570
- } else if (table_filters) {
567
+ if (table_filters) {
571
568
  double inspect_result = (double)InspectTableFilters(card_after_filters, op, table_filters);
572
569
  card_after_filters = MinValue(inspect_result, (double)card_after_filters);
573
570
  }
571
+ if (has_logical_filter) {
572
+ card_after_filters *= DEFAULT_SELECTIVITY;
573
+ }
574
574
  node->SetEstimatedCardinality(card_after_filters);
575
575
  }
576
576
 
@@ -182,13 +182,16 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
182
182
  relations.push_back(std::move(relation));
183
183
  return true;
184
184
  }
185
- if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
186
- op->type == LogicalOperatorType::LOGICAL_CROSS_PRODUCT) {
185
+
186
+ switch (op->type) {
187
+ case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
188
+ case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: {
187
189
  // inner join or cross product
188
190
  bool can_reorder_left = ExtractJoinRelations(*op->children[0], filter_operators, op);
189
191
  bool can_reorder_right = ExtractJoinRelations(*op->children[1], filter_operators, op);
190
192
  return can_reorder_left && can_reorder_right;
191
- } else if (op->type == LogicalOperatorType::LOGICAL_GET) {
193
+ }
194
+ case LogicalOperatorType::LOGICAL_GET: {
192
195
  // base table scan, add to set of relations
193
196
  auto get = (LogicalGet *)op;
194
197
  auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
@@ -199,24 +202,25 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
199
202
  cardinality_estimator.AddRelationColumnMapping(get, relation_id);
200
203
  relations.push_back(std::move(relation));
201
204
  return true;
202
- } else if (op->type == LogicalOperatorType::LOGICAL_EXPRESSION_GET) {
205
+ }
206
+ case LogicalOperatorType::LOGICAL_EXPRESSION_GET: {
203
207
  // base table scan, add to set of relations
204
208
  auto get = (LogicalExpressionGet *)op;
205
209
  auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
206
- idx_t relation_id = relations.size();
207
210
  //! make sure the optimizer has knowledge of the exact column bindings as well.
208
- auto table_index = get->table_index;
209
- relation_mapping[table_index] = relation_id;
211
+ relation_mapping[get->table_index] = relations.size();
210
212
  relations.push_back(std::move(relation));
211
213
  return true;
212
- } else if (op->type == LogicalOperatorType::LOGICAL_DUMMY_SCAN) {
214
+ }
215
+ case LogicalOperatorType::LOGICAL_DUMMY_SCAN: {
213
216
  // table function call, add to set of relations
214
217
  auto dummy_scan = (LogicalDummyScan *)op;
215
218
  auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
216
219
  relation_mapping[dummy_scan->table_index] = relations.size();
217
220
  relations.push_back(std::move(relation));
218
221
  return true;
219
- } else if (op->type == LogicalOperatorType::LOGICAL_PROJECTION) {
222
+ }
223
+ case LogicalOperatorType::LOGICAL_PROJECTION: {
220
224
  auto proj = (LogicalProjection *)op;
221
225
  // we run the join order optimizer witin the subquery as well
222
226
  JoinOrderOptimizer optimizer(context);
@@ -227,7 +231,9 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
227
231
  relations.push_back(std::move(relation));
228
232
  return true;
229
233
  }
230
- return false;
234
+ default:
235
+ return false;
236
+ }
231
237
  }
232
238
 
233
239
  //! Update the exclusion set with all entries in the subgraph
@@ -768,9 +774,19 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
768
774
  result_relation = node->set;
769
775
  result_operator = std::move(extracted_relations[node->set->relations[0]]);
770
776
  }
771
- result_operator->estimated_cardinality = node->GetCardinality<idx_t>();
772
- result_operator->has_estimated_cardinality = true;
773
777
  result_operator->estimated_props = node->estimated_props->Copy();
778
+ result_operator->estimated_cardinality = result_operator->estimated_props->GetCardinality<idx_t>();
779
+ result_operator->has_estimated_cardinality = true;
780
+ if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER &&
781
+ result_operator->children[0]->type == LogicalOperatorType::LOGICAL_GET) {
782
+ // FILTER on top of GET, add estimated properties to both
783
+ auto &filter_props = *result_operator->estimated_props;
784
+ auto &child_operator = *result_operator->children[0];
785
+ child_operator.estimated_props = make_unique<EstimatedProperties>(
786
+ filter_props.GetCardinality<double>() / CardinalityEstimator::DEFAULT_SELECTIVITY, filter_props.GetCost());
787
+ child_operator.estimated_cardinality = child_operator.estimated_props->GetCardinality<idx_t>();
788
+ child_operator.has_estimated_cardinality = true;
789
+ }
774
790
  // check if we should do a pushdown on this node
775
791
  // basically, any remaining filter that is a subset of the current relation will no longer be used in joins
776
792
  // hence we should push it here
@@ -911,9 +927,9 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
911
927
  // now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate
912
928
  // filters in the process
913
929
  expression_set_t filter_set;
914
- for (auto &op : filter_operators) {
915
- if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
916
- auto &join = (LogicalComparisonJoin &)*op;
930
+ for (auto &f_op : filter_operators) {
931
+ if (f_op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
932
+ auto &join = (LogicalComparisonJoin &)*f_op;
917
933
  D_ASSERT(join.join_type == JoinType::INNER);
918
934
  D_ASSERT(join.expressions.empty());
919
935
  for (auto &cond : join.conditions) {
@@ -926,13 +942,13 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
926
942
  }
927
943
  join.conditions.clear();
928
944
  } else {
929
- for (auto &expression : op->expressions) {
945
+ for (auto &expression : f_op->expressions) {
930
946
  if (filter_set.find(expression.get()) == filter_set.end()) {
931
947
  filter_set.insert(expression.get());
932
948
  filters.push_back(std::move(expression));
933
949
  }
934
950
  }
935
- op->expressions.clear();
951
+ f_op->expressions.clear();
936
952
  }
937
953
  }
938
954
  // create potential edges from the comparisons
@@ -107,7 +107,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
107
107
 
108
108
  // removes any redundant DelimGets/DelimJoins
109
109
  RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
110
- Deliminator deliminator;
110
+ Deliminator deliminator(context);
111
111
  plan = deliminator.Optimize(std::move(plan));
112
112
  });
113
113