duckdb 0.6.2-dev1184.0 → 0.6.2-dev1192.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +4 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +3 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +2 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +4 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -4
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -2
- package/src/duckdb/src/optimizer/deliminator.cpp +9 -2
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -5
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +33 -17
- package/src/duckdb/src/optimizer/optimizer.cpp +1 -1
package/package.json
CHANGED
|
@@ -53,7 +53,10 @@ OperatorResultType PhysicalFilter::ExecuteInternal(ExecutionContext &context, Da
|
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
string PhysicalFilter::ParamsToString() const {
|
|
56
|
-
|
|
56
|
+
auto result = expression->GetName();
|
|
57
|
+
result += "\n[INFOSEPARATOR]\n";
|
|
58
|
+
result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
|
|
59
|
+
return result;
|
|
57
60
|
}
|
|
58
61
|
|
|
59
62
|
} // namespace duckdb
|
|
@@ -29,8 +29,9 @@ string PhysicalComparisonJoin::ParamsToString() const {
|
|
|
29
29
|
string op = ExpressionTypeToOperator(it.comparison);
|
|
30
30
|
extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n";
|
|
31
31
|
}
|
|
32
|
-
extra_info += "\
|
|
33
|
-
extra_info += "
|
|
32
|
+
extra_info += "\n[INFOSEPARATOR]\n";
|
|
33
|
+
extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
|
|
34
|
+
extra_info += StringUtil::Format("Cost: %llu", (idx_t)estimated_props->GetCost());
|
|
34
35
|
return extra_info;
|
|
35
36
|
}
|
|
36
37
|
|
|
@@ -135,7 +135,8 @@ string PhysicalTableScan::ParamsToString() const {
|
|
|
135
135
|
}
|
|
136
136
|
}
|
|
137
137
|
}
|
|
138
|
-
result += "\
|
|
138
|
+
result += "\n[INFOSEPARATOR]\n";
|
|
139
|
+
result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
|
|
139
140
|
return result;
|
|
140
141
|
}
|
|
141
142
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev1192"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "4b2d12b676"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -21,7 +21,7 @@ class DeliminatorPlanUpdater;
|
|
|
21
21
|
//! The Deliminator optimizer traverses the logical operator tree and removes any redundant DelimGets/DelimJoins
|
|
22
22
|
class Deliminator {
|
|
23
23
|
public:
|
|
24
|
-
Deliminator() {
|
|
24
|
+
explicit Deliminator(ClientContext &context) : context(context) {
|
|
25
25
|
}
|
|
26
26
|
//! Perform DelimJoin elimination
|
|
27
27
|
unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> op);
|
|
@@ -35,6 +35,9 @@ private:
|
|
|
35
35
|
//! Try to remove an inequality Join with a DelimGet, returns true if it was successful
|
|
36
36
|
bool RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, unique_ptr<LogicalOperator> *candidate,
|
|
37
37
|
DeliminatorPlanUpdater &updater);
|
|
38
|
+
|
|
39
|
+
private:
|
|
40
|
+
ClientContext &context;
|
|
38
41
|
};
|
|
39
42
|
|
|
40
43
|
} // namespace duckdb
|
|
@@ -69,13 +69,11 @@ private:
|
|
|
69
69
|
|
|
70
70
|
vector<RelationsToTDom> relations_to_tdoms;
|
|
71
71
|
|
|
72
|
+
public:
|
|
72
73
|
static constexpr double DEFAULT_SELECTIVITY = 0.2;
|
|
73
74
|
|
|
74
|
-
public:
|
|
75
75
|
static void VerifySymmetry(JoinNode *result, JoinNode *entry);
|
|
76
76
|
|
|
77
|
-
void AssertEquivalentRelationSize();
|
|
78
|
-
|
|
79
77
|
//! given a binding of (relation, column) used for DP, and a (table, column) in that catalog
|
|
80
78
|
//! Add the key value entry into the relation_column_to_original_column
|
|
81
79
|
void AddRelationToColumnMapping(ColumnBinding key, ColumnBinding value);
|
|
@@ -92,7 +90,7 @@ public:
|
|
|
92
90
|
void UpdateTotalDomains(JoinNode *node, LogicalOperator *op);
|
|
93
91
|
void InitEquivalentRelations(vector<unique_ptr<FilterInfo>> *filter_infos);
|
|
94
92
|
|
|
95
|
-
void InitCardinalityEstimatorProps(vector<
|
|
93
|
+
void InitCardinalityEstimatorProps(vector<NodeOp> *node_ops, vector<unique_ptr<FilterInfo>> *filter_infos);
|
|
96
94
|
double EstimateCardinalityWithSet(JoinRelationSet *new_set);
|
|
97
95
|
void EstimateBaseTableCardinality(JoinNode *node, LogicalOperator *op);
|
|
98
96
|
double EstimateCrossProduct(const JoinNode *left, const JoinNode *right);
|
|
@@ -54,8 +54,7 @@ private:
|
|
|
54
54
|
//! The set of filter infos created from the extracted filters
|
|
55
55
|
vector<unique_ptr<FilterInfo>> filter_infos;
|
|
56
56
|
//! A map of all expressions a given expression has to be equivalent to. This is used to add "implied join edges".
|
|
57
|
-
//! i.e. in the join A=B AND B=C, the equivalence set of {B} is {A, C}, thus we can add an implied join edge {A
|
|
58
|
-
//! C}
|
|
57
|
+
//! i.e. in the join A=B AND B=C, the equivalence set of {B} is {A, C}, thus we can add an implied join edge {A = C}
|
|
59
58
|
expression_map_t<vector<FilterInfo *>> equivalence_sets;
|
|
60
59
|
|
|
61
60
|
CardinalityEstimator cardinality_estimator;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#include "duckdb/optimizer/deliminator.hpp"
|
|
2
2
|
|
|
3
|
+
#include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
|
|
3
4
|
#include "duckdb/planner/expression/bound_cast_expression.hpp"
|
|
4
5
|
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
|
|
5
6
|
#include "duckdb/planner/expression/bound_conjunction_expression.hpp"
|
|
@@ -13,13 +14,15 @@ namespace duckdb {
|
|
|
13
14
|
|
|
14
15
|
class DeliminatorPlanUpdater : LogicalOperatorVisitor {
|
|
15
16
|
public:
|
|
16
|
-
DeliminatorPlanUpdater() {
|
|
17
|
+
explicit DeliminatorPlanUpdater(ClientContext &context) : context(context) {
|
|
17
18
|
}
|
|
18
19
|
//! Update the plan after a DelimGet has been removed
|
|
19
20
|
void VisitOperator(LogicalOperator &op) override;
|
|
20
21
|
void VisitExpression(unique_ptr<Expression> *expression) override;
|
|
21
22
|
|
|
22
23
|
public:
|
|
24
|
+
ClientContext &context;
|
|
25
|
+
|
|
23
26
|
expression_map_t<Expression *> expr_map;
|
|
24
27
|
column_binding_map_t<bool> projection_map;
|
|
25
28
|
column_binding_map_t<Expression *> reverse_proj_or_agg_map;
|
|
@@ -94,6 +97,10 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
|
|
|
94
97
|
// change type if there are no more duplicate-eliminated columns
|
|
95
98
|
if (decs->empty()) {
|
|
96
99
|
delim_join.type = LogicalOperatorType::LOGICAL_COMPARISON_JOIN;
|
|
100
|
+
// sub-plans with DelimGets are not re-orderable (yet), however, we removed all DelimGet of this DelimJoin
|
|
101
|
+
// the DelimGets are on the RHS of the DelimJoin, so we can call the JoinOrderOptimizer on the RHS now
|
|
102
|
+
JoinOrderOptimizer optimizer(context);
|
|
103
|
+
delim_join.children[1] = optimizer.Optimize(std::move(delim_join.children[1]));
|
|
97
104
|
}
|
|
98
105
|
}
|
|
99
106
|
}
|
|
@@ -111,7 +118,7 @@ unique_ptr<LogicalOperator> Deliminator::Optimize(unique_ptr<LogicalOperator> op
|
|
|
111
118
|
FindCandidates(&op, candidates);
|
|
112
119
|
|
|
113
120
|
for (auto &candidate : candidates) {
|
|
114
|
-
DeliminatorPlanUpdater updater;
|
|
121
|
+
DeliminatorPlanUpdater updater(context);
|
|
115
122
|
if (RemoveCandidate(&op, candidate, updater)) {
|
|
116
123
|
updater.VisitOperator(*op);
|
|
117
124
|
}
|
|
@@ -358,7 +358,7 @@ bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
|
|
|
358
358
|
return a.tdom_no_hll > b.tdom_no_hll;
|
|
359
359
|
}
|
|
360
360
|
|
|
361
|
-
void CardinalityEstimator::InitCardinalityEstimatorProps(vector<
|
|
361
|
+
void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> *node_ops,
|
|
362
362
|
vector<unique_ptr<FilterInfo>> *filter_infos) {
|
|
363
363
|
InitEquivalentRelations(filter_infos);
|
|
364
364
|
InitTotalDomains();
|
|
@@ -564,13 +564,13 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode *node, LogicalO
|
|
|
564
564
|
auto table_filters = GetTableFilters(op);
|
|
565
565
|
|
|
566
566
|
auto card_after_filters = node->GetBaseTableCardinality();
|
|
567
|
-
|
|
568
|
-
if (has_logical_filter) {
|
|
569
|
-
card_after_filters *= DEFAULT_SELECTIVITY;
|
|
570
|
-
} else if (table_filters) {
|
|
567
|
+
if (table_filters) {
|
|
571
568
|
double inspect_result = (double)InspectTableFilters(card_after_filters, op, table_filters);
|
|
572
569
|
card_after_filters = MinValue(inspect_result, (double)card_after_filters);
|
|
573
570
|
}
|
|
571
|
+
if (has_logical_filter) {
|
|
572
|
+
card_after_filters *= DEFAULT_SELECTIVITY;
|
|
573
|
+
}
|
|
574
574
|
node->SetEstimatedCardinality(card_after_filters);
|
|
575
575
|
}
|
|
576
576
|
|
|
@@ -182,13 +182,16 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
|
182
182
|
relations.push_back(std::move(relation));
|
|
183
183
|
return true;
|
|
184
184
|
}
|
|
185
|
-
|
|
186
|
-
|
|
185
|
+
|
|
186
|
+
switch (op->type) {
|
|
187
|
+
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
|
|
188
|
+
case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: {
|
|
187
189
|
// inner join or cross product
|
|
188
190
|
bool can_reorder_left = ExtractJoinRelations(*op->children[0], filter_operators, op);
|
|
189
191
|
bool can_reorder_right = ExtractJoinRelations(*op->children[1], filter_operators, op);
|
|
190
192
|
return can_reorder_left && can_reorder_right;
|
|
191
|
-
}
|
|
193
|
+
}
|
|
194
|
+
case LogicalOperatorType::LOGICAL_GET: {
|
|
192
195
|
// base table scan, add to set of relations
|
|
193
196
|
auto get = (LogicalGet *)op;
|
|
194
197
|
auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
|
|
@@ -199,24 +202,25 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
|
199
202
|
cardinality_estimator.AddRelationColumnMapping(get, relation_id);
|
|
200
203
|
relations.push_back(std::move(relation));
|
|
201
204
|
return true;
|
|
202
|
-
}
|
|
205
|
+
}
|
|
206
|
+
case LogicalOperatorType::LOGICAL_EXPRESSION_GET: {
|
|
203
207
|
// base table scan, add to set of relations
|
|
204
208
|
auto get = (LogicalExpressionGet *)op;
|
|
205
209
|
auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
|
|
206
|
-
idx_t relation_id = relations.size();
|
|
207
210
|
//! make sure the optimizer has knowledge of the exact column bindings as well.
|
|
208
|
-
|
|
209
|
-
relation_mapping[table_index] = relation_id;
|
|
211
|
+
relation_mapping[get->table_index] = relations.size();
|
|
210
212
|
relations.push_back(std::move(relation));
|
|
211
213
|
return true;
|
|
212
|
-
}
|
|
214
|
+
}
|
|
215
|
+
case LogicalOperatorType::LOGICAL_DUMMY_SCAN: {
|
|
213
216
|
// table function call, add to set of relations
|
|
214
217
|
auto dummy_scan = (LogicalDummyScan *)op;
|
|
215
218
|
auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
|
|
216
219
|
relation_mapping[dummy_scan->table_index] = relations.size();
|
|
217
220
|
relations.push_back(std::move(relation));
|
|
218
221
|
return true;
|
|
219
|
-
}
|
|
222
|
+
}
|
|
223
|
+
case LogicalOperatorType::LOGICAL_PROJECTION: {
|
|
220
224
|
auto proj = (LogicalProjection *)op;
|
|
221
225
|
// we run the join order optimizer witin the subquery as well
|
|
222
226
|
JoinOrderOptimizer optimizer(context);
|
|
@@ -227,7 +231,9 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
|
227
231
|
relations.push_back(std::move(relation));
|
|
228
232
|
return true;
|
|
229
233
|
}
|
|
230
|
-
|
|
234
|
+
default:
|
|
235
|
+
return false;
|
|
236
|
+
}
|
|
231
237
|
}
|
|
232
238
|
|
|
233
239
|
//! Update the exclusion set with all entries in the subgraph
|
|
@@ -768,9 +774,19 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
|
|
|
768
774
|
result_relation = node->set;
|
|
769
775
|
result_operator = std::move(extracted_relations[node->set->relations[0]]);
|
|
770
776
|
}
|
|
771
|
-
result_operator->estimated_cardinality = node->GetCardinality<idx_t>();
|
|
772
|
-
result_operator->has_estimated_cardinality = true;
|
|
773
777
|
result_operator->estimated_props = node->estimated_props->Copy();
|
|
778
|
+
result_operator->estimated_cardinality = result_operator->estimated_props->GetCardinality<idx_t>();
|
|
779
|
+
result_operator->has_estimated_cardinality = true;
|
|
780
|
+
if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER &&
|
|
781
|
+
result_operator->children[0]->type == LogicalOperatorType::LOGICAL_GET) {
|
|
782
|
+
// FILTER on top of GET, add estimated properties to both
|
|
783
|
+
auto &filter_props = *result_operator->estimated_props;
|
|
784
|
+
auto &child_operator = *result_operator->children[0];
|
|
785
|
+
child_operator.estimated_props = make_unique<EstimatedProperties>(
|
|
786
|
+
filter_props.GetCardinality<double>() / CardinalityEstimator::DEFAULT_SELECTIVITY, filter_props.GetCost());
|
|
787
|
+
child_operator.estimated_cardinality = child_operator.estimated_props->GetCardinality<idx_t>();
|
|
788
|
+
child_operator.has_estimated_cardinality = true;
|
|
789
|
+
}
|
|
774
790
|
// check if we should do a pushdown on this node
|
|
775
791
|
// basically, any remaining filter that is a subset of the current relation will no longer be used in joins
|
|
776
792
|
// hence we should push it here
|
|
@@ -911,9 +927,9 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
|
911
927
|
// now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate
|
|
912
928
|
// filters in the process
|
|
913
929
|
expression_set_t filter_set;
|
|
914
|
-
for (auto &
|
|
915
|
-
if (
|
|
916
|
-
auto &join = (LogicalComparisonJoin &)*
|
|
930
|
+
for (auto &f_op : filter_operators) {
|
|
931
|
+
if (f_op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
|
|
932
|
+
auto &join = (LogicalComparisonJoin &)*f_op;
|
|
917
933
|
D_ASSERT(join.join_type == JoinType::INNER);
|
|
918
934
|
D_ASSERT(join.expressions.empty());
|
|
919
935
|
for (auto &cond : join.conditions) {
|
|
@@ -926,13 +942,13 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
|
926
942
|
}
|
|
927
943
|
join.conditions.clear();
|
|
928
944
|
} else {
|
|
929
|
-
for (auto &expression :
|
|
945
|
+
for (auto &expression : f_op->expressions) {
|
|
930
946
|
if (filter_set.find(expression.get()) == filter_set.end()) {
|
|
931
947
|
filter_set.insert(expression.get());
|
|
932
948
|
filters.push_back(std::move(expression));
|
|
933
949
|
}
|
|
934
950
|
}
|
|
935
|
-
|
|
951
|
+
f_op->expressions.clear();
|
|
936
952
|
}
|
|
937
953
|
}
|
|
938
954
|
// create potential edges from the comparisons
|
|
@@ -107,7 +107,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
|
107
107
|
|
|
108
108
|
// removes any redundant DelimGets/DelimJoins
|
|
109
109
|
RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
|
|
110
|
-
Deliminator deliminator;
|
|
110
|
+
Deliminator deliminator(context);
|
|
111
111
|
plan = deliminator.Optimize(std::move(plan));
|
|
112
112
|
});
|
|
113
113
|
|