duckdb 0.8.2-dev2700.0 → 0.8.2-dev2809.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
- package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
- package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
- package/src/duckdb/src/execution/window_executor.cpp +10 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb.h +11 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/parallel/executor.cpp +6 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -3,11 +3,10 @@
|
|
3
3
|
#include "duckdb/common/printer.hpp"
|
4
4
|
#include "duckdb/common/string_util.hpp"
|
5
5
|
#include "duckdb/common/assert.hpp"
|
6
|
-
#include "duckdb/common/to_string.hpp"
|
7
6
|
|
8
7
|
namespace duckdb {
|
9
8
|
|
10
|
-
using QueryEdge =
|
9
|
+
using QueryEdge = QueryGraphEdges::QueryEdge;
|
11
10
|
|
12
11
|
// LCOV_EXCL_START
|
13
12
|
static string QueryEdgeToString(const QueryEdge *info, vector<idx_t> prefix) {
|
@@ -18,7 +17,7 @@ static string QueryEdgeToString(const QueryEdge *info, vector<idx_t> prefix) {
|
|
18
17
|
}
|
19
18
|
source += "]";
|
20
19
|
for (auto &entry : info->neighbors) {
|
21
|
-
result += StringUtil::Format("%s -> %s\n", source.c_str(), entry->neighbor
|
20
|
+
result += StringUtil::Format("%s -> %s\n", source.c_str(), entry->neighbor->ToString().c_str());
|
22
21
|
}
|
23
22
|
for (auto &entry : info->children) {
|
24
23
|
vector<idx_t> new_prefix = prefix;
|
@@ -28,56 +27,58 @@ static string QueryEdgeToString(const QueryEdge *info, vector<idx_t> prefix) {
|
|
28
27
|
return result;
|
29
28
|
}
|
30
29
|
|
31
|
-
string
|
30
|
+
string QueryGraphEdges::ToString() const {
|
32
31
|
return QueryEdgeToString(&root, {});
|
33
32
|
}
|
34
33
|
|
35
|
-
void
|
34
|
+
void QueryGraphEdges::Print() {
|
36
35
|
Printer::Print(ToString());
|
37
36
|
}
|
38
37
|
// LCOV_EXCL_STOP
|
39
38
|
|
40
|
-
QueryEdge
|
39
|
+
optional_ptr<QueryEdge> QueryGraphEdges::GetQueryEdge(JoinRelationSet &left) {
|
41
40
|
D_ASSERT(left.count > 0);
|
42
41
|
// find the EdgeInfo corresponding to the left set
|
43
|
-
|
42
|
+
optional_ptr<QueryEdge> info(&root);
|
44
43
|
for (idx_t i = 0; i < left.count; i++) {
|
45
|
-
auto entry = info.get()
|
46
|
-
if (entry == info.get()
|
44
|
+
auto entry = info.get()->children.find(left.relations[i]);
|
45
|
+
if (entry == info.get()->children.end()) {
|
47
46
|
// node not found, create it
|
48
|
-
auto insert_it = info.get()
|
47
|
+
auto insert_it = info.get()->children.insert(make_pair(left.relations[i], make_uniq<QueryEdge>()));
|
49
48
|
entry = insert_it.first;
|
50
49
|
}
|
51
50
|
// move to the next node
|
52
|
-
info =
|
51
|
+
info = entry->second;
|
53
52
|
}
|
54
53
|
return info;
|
55
54
|
}
|
56
55
|
|
57
|
-
void
|
56
|
+
void QueryGraphEdges::CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> filter_info) {
|
58
57
|
D_ASSERT(left.count > 0 && right.count > 0);
|
59
58
|
// find the EdgeInfo corresponding to the left set
|
60
|
-
auto
|
59
|
+
auto info = GetQueryEdge(left);
|
61
60
|
// now insert the edge to the right relation, if it does not exist
|
62
|
-
for (idx_t i = 0; i < info
|
63
|
-
if (
|
61
|
+
for (idx_t i = 0; i < info->neighbors.size(); i++) {
|
62
|
+
if (info->neighbors[i]->neighbor == &right) {
|
64
63
|
if (filter_info) {
|
65
64
|
// neighbor already exists just add the filter, if we have any
|
66
|
-
info
|
65
|
+
info->neighbors[i]->filters.push_back(filter_info);
|
67
66
|
}
|
68
67
|
return;
|
69
68
|
}
|
70
69
|
}
|
71
70
|
// neighbor does not exist, create it
|
72
|
-
auto n = make_uniq<NeighborInfo>(right);
|
73
|
-
if
|
74
|
-
|
71
|
+
auto n = make_uniq<NeighborInfo>(&right);
|
72
|
+
// if the edge represents a cross product, filter_info is null. The easiest way then to determine
|
73
|
+
// if an edge is for a cross product is if the filters are empty
|
74
|
+
if (info && filter_info) {
|
75
|
+
n->filters.push_back(filter_info);
|
75
76
|
}
|
76
|
-
info
|
77
|
+
info->neighbors.push_back(std::move(n));
|
77
78
|
}
|
78
79
|
|
79
|
-
void
|
80
|
-
|
80
|
+
void QueryGraphEdges::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
|
81
|
+
const std::function<bool(NeighborInfo &)> &callback) const {
|
81
82
|
|
82
83
|
for (auto &neighbor : info.get().neighbors) {
|
83
84
|
if (callback(*neighbor)) {
|
@@ -94,7 +95,8 @@ void QueryGraph::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdg
|
|
94
95
|
}
|
95
96
|
}
|
96
97
|
|
97
|
-
void
|
98
|
+
void QueryGraphEdges::EnumerateNeighbors(JoinRelationSet &node,
|
99
|
+
const std::function<bool(NeighborInfo &)> &callback) const {
|
98
100
|
for (idx_t j = 0; j < node.count; j++) {
|
99
101
|
auto iter = root.children.find(node.relations[j]);
|
100
102
|
if (iter != root.children.end()) {
|
@@ -105,16 +107,16 @@ void QueryGraph::EnumerateNeighbors(JoinRelationSet &node, const std::function<b
|
|
105
107
|
}
|
106
108
|
|
107
109
|
//! Returns true if a JoinRelationSet is banned by the list of exclusion_set, false otherwise
|
108
|
-
static bool JoinRelationSetIsExcluded(JoinRelationSet
|
109
|
-
return exclusion_set.find(node
|
110
|
+
static bool JoinRelationSetIsExcluded(optional_ptr<JoinRelationSet> node, unordered_set<idx_t> &exclusion_set) {
|
111
|
+
return exclusion_set.find(node->relations[0]) != exclusion_set.end();
|
110
112
|
}
|
111
113
|
|
112
|
-
vector<idx_t>
|
114
|
+
const vector<idx_t> QueryGraphEdges::GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) const {
|
113
115
|
unordered_set<idx_t> result;
|
114
116
|
EnumerateNeighbors(node, [&](NeighborInfo &info) -> bool {
|
115
117
|
if (!JoinRelationSetIsExcluded(info.neighbor, exclusion_set)) {
|
116
118
|
// add the smallest node of the neighbor to the set
|
117
|
-
result.insert(info.neighbor
|
119
|
+
result.insert(info.neighbor->relations[0]);
|
118
120
|
}
|
119
121
|
return false;
|
120
122
|
});
|
@@ -123,10 +125,11 @@ vector<idx_t> QueryGraph::GetNeighbors(JoinRelationSet &node, unordered_set<idx_
|
|
123
125
|
return neighbors;
|
124
126
|
}
|
125
127
|
|
126
|
-
vector<reference<NeighborInfo>>
|
128
|
+
const vector<reference<NeighborInfo>> QueryGraphEdges::GetConnections(JoinRelationSet &node,
|
129
|
+
JoinRelationSet &other) const {
|
127
130
|
vector<reference<NeighborInfo>> connections;
|
128
131
|
EnumerateNeighbors(node, [&](NeighborInfo &info) -> bool {
|
129
|
-
if (JoinRelationSet::IsSubset(other, info.neighbor)) {
|
132
|
+
if (JoinRelationSet::IsSubset(other, *info.neighbor)) {
|
130
133
|
connections.push_back(info);
|
131
134
|
}
|
132
135
|
return false;
|
@@ -0,0 +1,409 @@
|
|
1
|
+
#include "duckdb/optimizer/join_order/query_graph_manager.hpp"
|
2
|
+
#include "duckdb/planner/logical_operator.hpp"
|
3
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
4
|
+
#include "duckdb/common/enums/join_type.hpp"
|
5
|
+
#include "duckdb/planner/operator/list.hpp"
|
6
|
+
#include "duckdb/planner/expression_iterator.hpp"
|
7
|
+
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
|
8
|
+
#include "duckdb/common/printer.hpp"
|
9
|
+
#include "duckdb/common/string_util.hpp"
|
10
|
+
#include "duckdb/common/assert.hpp"
|
11
|
+
|
12
|
+
namespace duckdb {
|
13
|
+
|
14
|
+
//! Returns true if A and B are disjoint, false otherwise
|
15
|
+
template <class T>
|
16
|
+
static bool Disjoint(const unordered_set<T> &a, const unordered_set<T> &b) {
|
17
|
+
return std::all_of(a.begin(), a.end(), [&b](typename std::unordered_set<T>::const_reference entry) {
|
18
|
+
return b.find(entry) == b.end();
|
19
|
+
});
|
20
|
+
}
|
21
|
+
|
22
|
+
bool QueryGraphManager::Build(LogicalOperator &op) {
|
23
|
+
vector<reference<LogicalOperator>> filter_operators;
|
24
|
+
// have the relation manager extract the join relations and create a reference list of all the
|
25
|
+
// filter operators.
|
26
|
+
auto can_reorder = relation_manager.ExtractJoinRelations(op, filter_operators);
|
27
|
+
auto num_relations = relation_manager.NumRelations();
|
28
|
+
if (num_relations <= 1 || !can_reorder) {
|
29
|
+
// nothing to optimize/reorder
|
30
|
+
return false;
|
31
|
+
}
|
32
|
+
// extract the edges of the hypergraph, creating a list of filters and their associated bindings.
|
33
|
+
filters_and_bindings = relation_manager.ExtractEdges(op, filter_operators, set_manager);
|
34
|
+
// Create the query_graph hyper edges
|
35
|
+
CreateHyperGraphEdges();
|
36
|
+
return true;
|
37
|
+
}
|
38
|
+
|
39
|
+
void QueryGraphManager::GetColumnBinding(Expression &expression, ColumnBinding &binding) {
|
40
|
+
if (expression.type == ExpressionType::BOUND_COLUMN_REF) {
|
41
|
+
// Here you have a filter on a single column in a table. Return a binding for the column
|
42
|
+
// being filtered on so the filter estimator knows what HLL count to pull
|
43
|
+
auto &colref = expression.Cast<BoundColumnRefExpression>();
|
44
|
+
D_ASSERT(colref.depth == 0);
|
45
|
+
D_ASSERT(colref.binding.table_index != DConstants::INVALID_INDEX);
|
46
|
+
// map the base table index to the relation index used by the JoinOrderOptimizer
|
47
|
+
D_ASSERT(relation_manager.relation_mapping.find(colref.binding.table_index) !=
|
48
|
+
relation_manager.relation_mapping.end());
|
49
|
+
binding =
|
50
|
+
ColumnBinding(relation_manager.relation_mapping[colref.binding.table_index], colref.binding.column_index);
|
51
|
+
}
|
52
|
+
// TODO: handle inequality filters with functions.
|
53
|
+
ExpressionIterator::EnumerateChildren(expression, [&](Expression &expr) { GetColumnBinding(expr, binding); });
|
54
|
+
}
|
55
|
+
|
56
|
+
const vector<unique_ptr<FilterInfo>> &QueryGraphManager::GetFilterBindings() const {
|
57
|
+
return filters_and_bindings;
|
58
|
+
}
|
59
|
+
|
60
|
+
static unique_ptr<LogicalOperator> PushFilter(unique_ptr<LogicalOperator> node, unique_ptr<Expression> expr) {
|
61
|
+
// push an expression into a filter
|
62
|
+
// first check if we have any filter to push it into
|
63
|
+
if (node->type != LogicalOperatorType::LOGICAL_FILTER) {
|
64
|
+
// we don't, we need to create one
|
65
|
+
auto filter = make_uniq<LogicalFilter>();
|
66
|
+
filter->children.push_back(std::move(node));
|
67
|
+
node = std::move(filter);
|
68
|
+
}
|
69
|
+
// push the filter into the LogicalFilter
|
70
|
+
D_ASSERT(node->type == LogicalOperatorType::LOGICAL_FILTER);
|
71
|
+
auto &filter = node->Cast<LogicalFilter>();
|
72
|
+
filter.expressions.push_back(std::move(expr));
|
73
|
+
return node;
|
74
|
+
}
|
75
|
+
|
76
|
+
void QueryGraphManager::CreateHyperGraphEdges() {
|
77
|
+
// create potential edges from the comparisons
|
78
|
+
for (auto &filter_info : filters_and_bindings) {
|
79
|
+
auto &filter = filter_info->filter;
|
80
|
+
// now check if it can be used as a join predicate
|
81
|
+
if (filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON) {
|
82
|
+
auto &comparison = filter->Cast<BoundComparisonExpression>();
|
83
|
+
// extract the bindings that are required for the left and right side of the comparison
|
84
|
+
unordered_set<idx_t> left_bindings, right_bindings;
|
85
|
+
relation_manager.ExtractBindings(*comparison.left, left_bindings);
|
86
|
+
relation_manager.ExtractBindings(*comparison.right, right_bindings);
|
87
|
+
GetColumnBinding(*comparison.left, filter_info->left_binding);
|
88
|
+
GetColumnBinding(*comparison.right, filter_info->right_binding);
|
89
|
+
if (!left_bindings.empty() && !right_bindings.empty()) {
|
90
|
+
// both the left and the right side have bindings
|
91
|
+
// first create the relation sets, if they do not exist
|
92
|
+
filter_info->left_set = &set_manager.GetJoinRelation(left_bindings);
|
93
|
+
filter_info->right_set = &set_manager.GetJoinRelation(right_bindings);
|
94
|
+
// we can only create a meaningful edge if the sets are not exactly the same
|
95
|
+
if (filter_info->left_set != filter_info->right_set) {
|
96
|
+
// check if the sets are disjoint
|
97
|
+
if (Disjoint(left_bindings, right_bindings)) {
|
98
|
+
// they are disjoint, we only need to create one set of edges in the join graph
|
99
|
+
query_graph.CreateEdge(*filter_info->left_set, *filter_info->right_set, filter_info);
|
100
|
+
query_graph.CreateEdge(*filter_info->right_set, *filter_info->left_set, filter_info);
|
101
|
+
} else {
|
102
|
+
continue;
|
103
|
+
}
|
104
|
+
continue;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
static unique_ptr<LogicalOperator> ExtractJoinRelation(unique_ptr<SingleJoinRelation> &rel) {
|
112
|
+
auto &children = rel->parent->children;
|
113
|
+
for (idx_t i = 0; i < children.size(); i++) {
|
114
|
+
if (children[i].get() == &rel->op) {
|
115
|
+
// found it! take ownership o/**/f it from the parent
|
116
|
+
auto result = std::move(children[i]);
|
117
|
+
children.erase(children.begin() + i);
|
118
|
+
return result;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
throw Exception("Could not find relation in parent node (?)");
|
122
|
+
}
|
123
|
+
|
124
|
+
unique_ptr<LogicalOperator> QueryGraphManager::Reconstruct(unique_ptr<LogicalOperator> plan, JoinNode &node) {
|
125
|
+
return RewritePlan(std::move(plan), node);
|
126
|
+
}
|
127
|
+
|
128
|
+
GenerateJoinRelation QueryGraphManager::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations,
|
129
|
+
JoinNode &node) {
|
130
|
+
optional_ptr<JoinRelationSet> left_node;
|
131
|
+
optional_ptr<JoinRelationSet> right_node;
|
132
|
+
optional_ptr<JoinRelationSet> result_relation;
|
133
|
+
unique_ptr<LogicalOperator> result_operator;
|
134
|
+
if (node.left && node.right && node.info) {
|
135
|
+
// generate the left and right children
|
136
|
+
auto left = GenerateJoins(extracted_relations, *node.left);
|
137
|
+
auto right = GenerateJoins(extracted_relations, *node.right);
|
138
|
+
|
139
|
+
if (node.info->filters.empty()) {
|
140
|
+
// no filters, create a cross product
|
141
|
+
result_operator = LogicalCrossProduct::Create(std::move(left.op), std::move(right.op));
|
142
|
+
} else {
|
143
|
+
// we have filters, create a join node
|
144
|
+
auto join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
|
145
|
+
// Here we optimize build side probe side. Our build side is the right side
|
146
|
+
// So the right plans should have lower cardinalities.
|
147
|
+
join->children.push_back(std::move(left.op));
|
148
|
+
join->children.push_back(std::move(right.op));
|
149
|
+
|
150
|
+
// set the join conditions from the join node
|
151
|
+
for (auto &filter_ref : node.info->filters) {
|
152
|
+
auto f = filter_ref.get();
|
153
|
+
// extract the filter from the operator it originally belonged to
|
154
|
+
D_ASSERT(filters_and_bindings[f->filter_index]->filter);
|
155
|
+
auto &filter_and_binding = filters_and_bindings.at(f->filter_index);
|
156
|
+
auto condition = std::move(filter_and_binding->filter);
|
157
|
+
// now create the actual join condition
|
158
|
+
D_ASSERT((JoinRelationSet::IsSubset(*left.set, *f->left_set) &&
|
159
|
+
JoinRelationSet::IsSubset(*right.set, *f->right_set)) ||
|
160
|
+
(JoinRelationSet::IsSubset(*left.set, *f->right_set) &&
|
161
|
+
JoinRelationSet::IsSubset(*right.set, *f->left_set)));
|
162
|
+
JoinCondition cond;
|
163
|
+
D_ASSERT(condition->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
|
164
|
+
auto &comparison = condition->Cast<BoundComparisonExpression>();
|
165
|
+
|
166
|
+
// we need to figure out which side is which by looking at the relations available to us
|
167
|
+
bool invert = !JoinRelationSet::IsSubset(*left.set, *f->left_set);
|
168
|
+
cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
|
169
|
+
cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
|
170
|
+
cond.comparison = condition->type;
|
171
|
+
|
172
|
+
if (invert) {
|
173
|
+
// reverse comparison expression if we reverse the order of the children
|
174
|
+
cond.comparison = FlipComparisonExpression(cond.comparison);
|
175
|
+
}
|
176
|
+
join->conditions.push_back(std::move(cond));
|
177
|
+
}
|
178
|
+
D_ASSERT(!join->conditions.empty());
|
179
|
+
result_operator = std::move(join);
|
180
|
+
}
|
181
|
+
left_node = left.set;
|
182
|
+
right_node = right.set;
|
183
|
+
result_relation = &set_manager.Union(*left.set, *right.set);
|
184
|
+
} else {
|
185
|
+
// base node, get the entry from the list of extracted relations
|
186
|
+
D_ASSERT(node.set.count == 1);
|
187
|
+
D_ASSERT(extracted_relations[node.set.relations[0]]);
|
188
|
+
result_relation = &node.set;
|
189
|
+
result_operator = std::move(extracted_relations[node.set.relations[0]]);
|
190
|
+
}
|
191
|
+
// TODO: this is where estimated properties start coming into play.
|
192
|
+
// when creating the result operator, we should ask the cost model and cardinality estimator what
|
193
|
+
// the cost and cardinality are
|
194
|
+
// result_operator->estimated_props = node.estimated_props->Copy();
|
195
|
+
result_operator->estimated_cardinality = node.cardinality;
|
196
|
+
result_operator->has_estimated_cardinality = true;
|
197
|
+
if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER &&
|
198
|
+
result_operator->children[0]->type == LogicalOperatorType::LOGICAL_GET) {
|
199
|
+
// FILTER on top of GET, add estimated properties to both
|
200
|
+
// auto &filter_props = *result_operator->estimated_props;
|
201
|
+
auto &child_operator = *result_operator->children[0];
|
202
|
+
child_operator.estimated_cardinality = node.cardinality;
|
203
|
+
child_operator.has_estimated_cardinality = true;
|
204
|
+
}
|
205
|
+
// check if we should do a pushdown on this node
|
206
|
+
// basically, any remaining filter that is a subset of the current relation will no longer be used in joins
|
207
|
+
// hence we should push it here
|
208
|
+
for (auto &filter_info : filters_and_bindings) {
|
209
|
+
// check if the filter has already been extracted
|
210
|
+
auto &info = *filter_info;
|
211
|
+
if (filters_and_bindings[info.filter_index]->filter) {
|
212
|
+
// now check if the filter is a subset of the current relation
|
213
|
+
// note that infos with an empty relation set are a special case and we do not push them down
|
214
|
+
if (info.set.count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) {
|
215
|
+
auto &filter_and_binding = filters_and_bindings[info.filter_index];
|
216
|
+
auto filter = std::move(filter_and_binding->filter);
|
217
|
+
// if it is, we can push the filter
|
218
|
+
// we can push it either into a join or as a filter
|
219
|
+
// check if we are in a join or in a base table
|
220
|
+
if (!left_node || !info.left_set) {
|
221
|
+
// base table or non-comparison expression, push it as a filter
|
222
|
+
result_operator = PushFilter(std::move(result_operator), std::move(filter));
|
223
|
+
continue;
|
224
|
+
}
|
225
|
+
// the node below us is a join or cross product and the expression is a comparison
|
226
|
+
// check if the nodes can be split up into left/right
|
227
|
+
bool found_subset = false;
|
228
|
+
bool invert = false;
|
229
|
+
if (JoinRelationSet::IsSubset(*left_node, *info.left_set) &&
|
230
|
+
JoinRelationSet::IsSubset(*right_node, *info.right_set)) {
|
231
|
+
found_subset = true;
|
232
|
+
} else if (JoinRelationSet::IsSubset(*right_node, *info.left_set) &&
|
233
|
+
JoinRelationSet::IsSubset(*left_node, *info.right_set)) {
|
234
|
+
invert = true;
|
235
|
+
found_subset = true;
|
236
|
+
}
|
237
|
+
if (!found_subset) {
|
238
|
+
// could not be split up into left/right
|
239
|
+
result_operator = PushFilter(std::move(result_operator), std::move(filter));
|
240
|
+
continue;
|
241
|
+
}
|
242
|
+
// create the join condition
|
243
|
+
JoinCondition cond;
|
244
|
+
D_ASSERT(filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
|
245
|
+
auto &comparison = filter->Cast<BoundComparisonExpression>();
|
246
|
+
// we need to figure out which side is which by looking at the relations available to us
|
247
|
+
cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
|
248
|
+
cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
|
249
|
+
cond.comparison = comparison.type;
|
250
|
+
if (invert) {
|
251
|
+
// reverse comparison expression if we reverse the order of the children
|
252
|
+
cond.comparison = FlipComparisonExpression(comparison.type);
|
253
|
+
}
|
254
|
+
// now find the join to push it into
|
255
|
+
auto node = result_operator.get();
|
256
|
+
if (node->type == LogicalOperatorType::LOGICAL_FILTER) {
|
257
|
+
node = node->children[0].get();
|
258
|
+
}
|
259
|
+
if (node->type == LogicalOperatorType::LOGICAL_CROSS_PRODUCT) {
|
260
|
+
// turn into comparison join
|
261
|
+
auto comp_join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
|
262
|
+
comp_join->children.push_back(std::move(node->children[0]));
|
263
|
+
comp_join->children.push_back(std::move(node->children[1]));
|
264
|
+
comp_join->conditions.push_back(std::move(cond));
|
265
|
+
if (node == result_operator.get()) {
|
266
|
+
result_operator = std::move(comp_join);
|
267
|
+
} else {
|
268
|
+
D_ASSERT(result_operator->type == LogicalOperatorType::LOGICAL_FILTER);
|
269
|
+
result_operator->children[0] = std::move(comp_join);
|
270
|
+
}
|
271
|
+
} else {
|
272
|
+
D_ASSERT(node->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
|
273
|
+
node->type == LogicalOperatorType::LOGICAL_ASOF_JOIN);
|
274
|
+
auto &comp_join = node->Cast<LogicalComparisonJoin>();
|
275
|
+
comp_join.conditions.push_back(std::move(cond));
|
276
|
+
}
|
277
|
+
}
|
278
|
+
}
|
279
|
+
}
|
280
|
+
auto result = GenerateJoinRelation(result_relation, std::move(result_operator));
|
281
|
+
return result;
|
282
|
+
}
|
283
|
+
|
284
|
+
const QueryGraphEdges &QueryGraphManager::GetQueryGraphEdges() const {
|
285
|
+
return query_graph;
|
286
|
+
}
|
287
|
+
|
288
|
+
void QueryGraphManager::CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right) {
|
289
|
+
query_graph.CreateEdge(left, right, nullptr);
|
290
|
+
query_graph.CreateEdge(right, left, nullptr);
|
291
|
+
}
|
292
|
+
|
293
|
+
unique_ptr<LogicalOperator> QueryGraphManager::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node) {
|
294
|
+
// now we have to rewrite the plan
|
295
|
+
bool root_is_join = plan->children.size() > 1;
|
296
|
+
|
297
|
+
// first we will extract all relations from the main plan
|
298
|
+
vector<unique_ptr<LogicalOperator>> extracted_relations;
|
299
|
+
extracted_relations.reserve(relation_manager.NumRelations());
|
300
|
+
for (auto &relation : relation_manager.GetRelations()) {
|
301
|
+
extracted_relations.push_back(ExtractJoinRelation(relation));
|
302
|
+
}
|
303
|
+
|
304
|
+
// now we generate the actual joins
|
305
|
+
auto join_tree = GenerateJoins(extracted_relations, node);
|
306
|
+
// perform the final pushdown of remaining filters
|
307
|
+
for (auto &filter : filters_and_bindings) {
|
308
|
+
// check if the filter has already been extracted
|
309
|
+
if (filter->filter) {
|
310
|
+
// if not we need to push it
|
311
|
+
join_tree.op = PushFilter(std::move(join_tree.op), std::move(filter->filter));
|
312
|
+
}
|
313
|
+
}
|
314
|
+
|
315
|
+
// find the first join in the relation to know where to place this node
|
316
|
+
if (root_is_join) {
|
317
|
+
// first node is the join, return it immediately
|
318
|
+
return std::move(join_tree.op);
|
319
|
+
}
|
320
|
+
D_ASSERT(plan->children.size() == 1);
|
321
|
+
// have to move up through the relations
|
322
|
+
auto op = plan.get();
|
323
|
+
auto parent = plan.get();
|
324
|
+
while (op->type != LogicalOperatorType::LOGICAL_CROSS_PRODUCT &&
|
325
|
+
op->type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN &&
|
326
|
+
op->type != LogicalOperatorType::LOGICAL_ASOF_JOIN) {
|
327
|
+
D_ASSERT(op->children.size() == 1);
|
328
|
+
parent = op;
|
329
|
+
op = op->children[0].get();
|
330
|
+
}
|
331
|
+
// have to replace at this node
|
332
|
+
parent->children[0] = std::move(join_tree.op);
|
333
|
+
return plan;
|
334
|
+
}
|
335
|
+
|
336
|
+
bool QueryGraphManager::LeftCardLessThanRight(LogicalOperator &op) {
|
337
|
+
D_ASSERT(op.children.size() == 2);
|
338
|
+
if (op.children[0]->has_estimated_cardinality && op.children[1]->has_estimated_cardinality) {
|
339
|
+
return op.children[0]->estimated_cardinality < op.children[1]->estimated_cardinality;
|
340
|
+
}
|
341
|
+
return op.children[0]->EstimateCardinality(context) < op.children[1]->EstimateCardinality(context);
|
342
|
+
}
|
343
|
+
|
344
|
+
unique_ptr<LogicalOperator> QueryGraphManager::LeftRightOptimizations(unique_ptr<LogicalOperator> input_op) {
|
345
|
+
auto op = input_op.get();
|
346
|
+
// pass through single child operators
|
347
|
+
while (!op->children.empty()) {
|
348
|
+
if (op->children.size() == 2) {
|
349
|
+
switch (op->type) {
|
350
|
+
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
|
351
|
+
auto &join = op->Cast<LogicalComparisonJoin>();
|
352
|
+
if (join.join_type == JoinType::INNER) {
|
353
|
+
if (LeftCardLessThanRight(*op)) {
|
354
|
+
std::swap(op->children[0], op->children[1]);
|
355
|
+
for (auto &cond : join.conditions) {
|
356
|
+
std::swap(cond.left, cond.right);
|
357
|
+
cond.comparison = FlipComparisonExpression(cond.comparison);
|
358
|
+
}
|
359
|
+
}
|
360
|
+
} else if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
|
361
|
+
auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
|
362
|
+
auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
|
363
|
+
if (rhs_cardinality > lhs_cardinality * 2) {
|
364
|
+
join.join_type = JoinType::RIGHT;
|
365
|
+
std::swap(join.children[0], join.children[1]);
|
366
|
+
for (auto &cond : join.conditions) {
|
367
|
+
std::swap(cond.left, cond.right);
|
368
|
+
cond.comparison = FlipComparisonExpression(cond.comparison);
|
369
|
+
}
|
370
|
+
}
|
371
|
+
}
|
372
|
+
break;
|
373
|
+
}
|
374
|
+
case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: {
|
375
|
+
if (LeftCardLessThanRight(*op)) {
|
376
|
+
std::swap(op->children[0], op->children[1]);
|
377
|
+
}
|
378
|
+
break;
|
379
|
+
}
|
380
|
+
case LogicalOperatorType::LOGICAL_ANY_JOIN: {
|
381
|
+
auto &join = op->Cast<LogicalAnyJoin>();
|
382
|
+
if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
|
383
|
+
auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
|
384
|
+
auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
|
385
|
+
if (rhs_cardinality > lhs_cardinality * 2) {
|
386
|
+
join.join_type = JoinType::RIGHT;
|
387
|
+
std::swap(join.children[0], join.children[1]);
|
388
|
+
}
|
389
|
+
} else if (join.join_type == JoinType::INNER && LeftCardLessThanRight(*op)) {
|
390
|
+
std::swap(join.children[0], join.children[1]);
|
391
|
+
}
|
392
|
+
break;
|
393
|
+
}
|
394
|
+
default:
|
395
|
+
break;
|
396
|
+
}
|
397
|
+
op->children[0] = LeftRightOptimizations(std::move(op->children[0]));
|
398
|
+
op->children[1] = LeftRightOptimizations(std::move(op->children[1]));
|
399
|
+
// break from while loop
|
400
|
+
break;
|
401
|
+
}
|
402
|
+
if (op->children.size() == 1) {
|
403
|
+
op = op->children[0].get();
|
404
|
+
}
|
405
|
+
}
|
406
|
+
return input_op;
|
407
|
+
}
|
408
|
+
|
409
|
+
} // namespace duckdb
|