duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
  5. package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
  6. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  7. package/src/duckdb/extension/json/json_serializer.cpp +11 -10
  8. package/src/duckdb/extension/json/serialize_json.cpp +44 -44
  9. package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
  10. package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
  11. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  12. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  13. package/src/duckdb/src/common/enum_util.cpp +5 -0
  14. package/src/duckdb/src/common/extra_type_info.cpp +2 -2
  15. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
  16. package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
  17. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  18. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  19. package/src/duckdb/src/common/types/value.cpp +33 -33
  20. package/src/duckdb/src/common/types/vector.cpp +20 -20
  21. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
  22. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
  23. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
  24. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
  25. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  26. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  27. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  28. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  29. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  30. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  31. package/src/duckdb/src/function/table/read_csv.cpp +4 -4
  32. package/src/duckdb/src/function/table/table_scan.cpp +14 -14
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  34. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
  38. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
  39. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
  40. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
  41. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
  42. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  43. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  44. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
  45. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  50. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  51. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  52. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  53. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  54. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  55. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  56. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  57. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  58. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  59. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  60. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  61. package/src/duckdb/src/include/duckdb.h +11 -1
  62. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  63. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  64. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  65. package/src/duckdb/src/main/relation.cpp +4 -4
  66. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  67. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  68. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  70. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  71. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  72. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  73. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  74. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  75. package/src/duckdb/src/parallel/executor.cpp +6 -0
  76. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  77. package/src/duckdb/src/parser/parser.cpp +18 -3
  78. package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
  79. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  80. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
  81. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
  82. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
  83. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
  84. package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
  85. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
  86. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
  87. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
  88. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
  89. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
  90. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
  91. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
  92. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
  93. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
  94. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
  95. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
  96. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
  97. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
  98. package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
  99. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  100. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -1,1116 +1,86 @@
1
1
  #include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
2
+ #include "duckdb/optimizer/join_order/cost_model.hpp"
3
+ #include "duckdb/optimizer/join_order/plan_enumerator.hpp"
2
4
 
3
5
  #include "duckdb/common/limits.hpp"
4
6
  #include "duckdb/common/pair.hpp"
5
7
  #include "duckdb/planner/expression/list.hpp"
6
8
  #include "duckdb/planner/expression_iterator.hpp"
7
9
  #include "duckdb/planner/operator/list.hpp"
8
- #include "duckdb/common/queue.hpp"
9
10
 
10
11
  #include <algorithm>
11
12
  #include <cmath>
12
13
 
13
- namespace std {
14
-
15
- //! A JoinNode is defined by the relations it joins.
16
- template <>
17
- struct hash<duckdb::JoinNode> {
18
- inline string operator()(const duckdb::JoinNode &join_node) const {
19
- return join_node.set.ToString();
20
- }
21
- };
22
- } // namespace std
23
-
24
14
  namespace duckdb {
25
15
 
26
- //! Returns true if A and B are disjoint, false otherwise
27
- template <class T>
28
- static bool Disjoint(const unordered_set<T> &a, const unordered_set<T> &b) {
29
- return std::all_of(a.begin(), a.end(), [&b](typename std::unordered_set<T>::const_reference entry) {
30
- return b.find(entry) == b.end();
31
- });
32
- }
33
-
34
- //! Extract the set of relations referred to inside an expression
35
- bool JoinOrderOptimizer::ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings) {
36
- if (expression.type == ExpressionType::BOUND_COLUMN_REF) {
37
- auto &colref = expression.Cast<BoundColumnRefExpression>();
38
- D_ASSERT(colref.depth == 0);
39
- D_ASSERT(colref.binding.table_index != DConstants::INVALID_INDEX);
40
- // map the base table index to the relation index used by the JoinOrderOptimizer
41
- D_ASSERT(relation_mapping.find(colref.binding.table_index) != relation_mapping.end());
42
- auto catalog_table = relation_mapping[colref.binding.table_index];
43
- auto column_index = colref.binding.column_index;
44
- cardinality_estimator.AddColumnToRelationMap(catalog_table, column_index);
45
- bindings.insert(relation_mapping[colref.binding.table_index]);
46
- }
47
- if (expression.type == ExpressionType::BOUND_REF) {
48
- // bound expression
49
- bindings.clear();
50
- return false;
51
- }
52
- D_ASSERT(expression.type != ExpressionType::SUBQUERY);
53
- bool can_reorder = true;
54
- ExpressionIterator::EnumerateChildren(expression, [&](Expression &expr) {
55
- if (!ExtractBindings(expr, bindings)) {
56
- can_reorder = false;
57
- return;
58
- }
59
- });
60
- return can_reorder;
61
- }
62
-
63
- void JoinOrderOptimizer::GetColumnBinding(Expression &expression, ColumnBinding &binding) {
64
- if (expression.type == ExpressionType::BOUND_COLUMN_REF) {
65
- // Here you have a filter on a single column in a table. Return a binding for the column
66
- // being filtered on so the filter estimator knows what HLL count to pull
67
- auto &colref = expression.Cast<BoundColumnRefExpression>();
68
- D_ASSERT(colref.depth == 0);
69
- D_ASSERT(colref.binding.table_index != DConstants::INVALID_INDEX);
70
- // map the base table index to the relation index used by the JoinOrderOptimizer
71
- D_ASSERT(relation_mapping.find(colref.binding.table_index) != relation_mapping.end());
72
- binding = ColumnBinding(relation_mapping[colref.binding.table_index], colref.binding.column_index);
73
- }
74
- // TODO: handle inequality filters with functions.
75
- ExpressionIterator::EnumerateChildren(expression, [&](Expression &expr) { GetColumnBinding(expr, binding); });
76
- }
77
-
78
- static unique_ptr<LogicalOperator> PushFilter(unique_ptr<LogicalOperator> node, unique_ptr<Expression> expr) {
79
- // push an expression into a filter
80
- // first check if we have any filter to push it into
81
- if (node->type != LogicalOperatorType::LOGICAL_FILTER) {
82
- // we don't, we need to create one
83
- auto filter = make_uniq<LogicalFilter>();
84
- filter->children.push_back(std::move(node));
85
- node = std::move(filter);
86
- }
87
- // push the filter into the LogicalFilter
88
- D_ASSERT(node->type == LogicalOperatorType::LOGICAL_FILTER);
89
- auto &filter = node->Cast<LogicalFilter>();
90
- filter.expressions.push_back(std::move(expr));
91
- return node;
92
- }
93
-
94
- bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
95
- vector<reference<LogicalOperator>> &filter_operators,
96
- optional_ptr<LogicalOperator> parent) {
97
- LogicalOperator *op = &input_op;
98
- while (op->children.size() == 1 &&
99
- (op->type != LogicalOperatorType::LOGICAL_PROJECTION &&
100
- op->type != LogicalOperatorType::LOGICAL_EXPRESSION_GET && op->type != LogicalOperatorType::LOGICAL_GET)) {
101
- if (op->type == LogicalOperatorType::LOGICAL_FILTER) {
102
- // extract join conditions from filter
103
- filter_operators.push_back(*op);
104
- }
105
- if (op->type == LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY ||
106
- op->type == LogicalOperatorType::LOGICAL_WINDOW) {
107
- // don't push filters through projection or aggregate and group by
108
- JoinOrderOptimizer optimizer(context);
109
- op->children[0] = optimizer.Optimize(std::move(op->children[0]));
110
- return false;
111
- }
112
- op = op->children[0].get();
113
- }
114
- bool non_reorderable_operation = false;
115
- if (op->type == LogicalOperatorType::LOGICAL_UNION || op->type == LogicalOperatorType::LOGICAL_EXCEPT ||
116
- op->type == LogicalOperatorType::LOGICAL_INTERSECT || op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN ||
117
- op->type == LogicalOperatorType::LOGICAL_ANY_JOIN || op->type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
118
- // set operation, optimize separately in children
119
- non_reorderable_operation = true;
120
- }
121
-
122
- if (op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
123
- auto &join = op->Cast<LogicalComparisonJoin>();
124
- if (join.join_type == JoinType::INNER) {
125
- // extract join conditions from inner join
126
- filter_operators.push_back(*op);
127
- } else {
128
- // non-inner join, not reorderable yet
129
- non_reorderable_operation = true;
130
- if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
131
- // for left joins; if the RHS cardinality is significantly larger than the LHS (2x)
132
- // we convert to doing a RIGHT OUTER JOIN
133
- // FIXME: for now we don't swap if the right_projection_map is not empty
134
- // this can be fixed once we implement the left_projection_map properly...
135
- auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
136
- auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
137
- if (rhs_cardinality > lhs_cardinality * 2) {
138
- join.join_type = JoinType::RIGHT;
139
- std::swap(join.children[0], join.children[1]);
140
- for (auto &cond : join.conditions) {
141
- std::swap(cond.left, cond.right);
142
- cond.comparison = FlipComparisonExpression(cond.comparison);
143
- }
144
- }
145
- }
146
- }
147
- }
148
- if (op->type == LogicalOperatorType::LOGICAL_ANY_JOIN && non_reorderable_operation) {
149
- auto &join = op->Cast<LogicalAnyJoin>();
150
- if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
151
- auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
152
- auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
153
- if (rhs_cardinality > lhs_cardinality * 2) {
154
- join.join_type = JoinType::RIGHT;
155
- std::swap(join.children[0], join.children[1]);
156
- }
16
+ static bool HasJoin(LogicalOperator *op) {
17
+ while (!op->children.empty()) {
18
+ if (op->children.size() == 1) {
19
+ op = op->children[0].get();
157
20
  }
158
- }
159
-
160
- if (non_reorderable_operation) {
161
- // we encountered a non-reordable operation (setop or non-inner join)
162
- // we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
163
- // non-inner joins are also tricky because we can't freely make conditions through them
164
- // e.g. suppose we have (left LEFT OUTER JOIN right WHERE right IS NOT NULL), the join can generate
165
- // new NULL values in the right side, so pushing this condition through the join leads to incorrect results
166
- // for this reason, we just start a new JoinOptimizer pass in each of the children of the join
167
-
168
- // Keep track of all filter bindings the new join order optimizer makes
169
- vector<column_binding_map_t<ColumnBinding>> child_binding_maps;
170
- idx_t child_bindings_it = 0;
171
- for (auto &child : op->children) {
172
- child_binding_maps.emplace_back();
173
- JoinOrderOptimizer optimizer(context);
174
- child = optimizer.Optimize(std::move(child));
175
- // save the relation bindings from the optimized child. These later all get added to the
176
- // parent cardinality_estimator relation column binding map.
177
- optimizer.cardinality_estimator.CopyRelationMap(child_binding_maps.at(child_bindings_it));
178
- child_bindings_it += 1;
179
- }
180
- // after this we want to treat this node as one "end node" (like e.g. a base relation)
181
- // however the join refers to multiple base relations
182
- // enumerate all base relations obtained from this join and add them to the relation mapping
183
- // also, we have to resolve the join conditions for the joins here
184
- // get the left and right bindings
185
- unordered_set<idx_t> bindings;
186
- LogicalJoin::GetTableReferences(*op, bindings);
187
- // now create the relation that refers to all these bindings
188
- auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
189
- auto relation_id = relations.size();
190
- // Add binding information from the nonreorderable join to this relation.
191
- for (idx_t it : bindings) {
192
- cardinality_estimator.MergeBindings(it, relation_id, child_binding_maps);
193
- relation_mapping[it] = relation_id;
194
- }
195
- relations.push_back(std::move(relation));
196
- return true;
197
- }
198
-
199
- switch (op->type) {
200
- case LogicalOperatorType::LOGICAL_ASOF_JOIN:
201
- case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
202
- case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: {
203
- // inner join or cross product
204
- bool can_reorder_left = ExtractJoinRelations(*op->children[0], filter_operators, op);
205
- bool can_reorder_right = ExtractJoinRelations(*op->children[1], filter_operators, op);
206
- return can_reorder_left && can_reorder_right;
207
- }
208
- case LogicalOperatorType::LOGICAL_EXPRESSION_GET: {
209
- // base table scan, add to set of relations
210
- auto &get = op->Cast<LogicalExpressionGet>();
211
- auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
212
- //! make sure the optimizer has knowledge of the exact column bindings as well.
213
- relation_mapping[get.table_index] = relations.size();
214
- relations.push_back(std::move(relation));
215
- return true;
216
- }
217
- case LogicalOperatorType::LOGICAL_DUMMY_SCAN: {
218
- // table function call, add to set of relations
219
- auto &dummy_scan = op->Cast<LogicalDummyScan>();
220
- auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
221
- relation_mapping[dummy_scan.table_index] = relations.size();
222
- relations.push_back(std::move(relation));
223
- return true;
224
- }
225
- case LogicalOperatorType::LOGICAL_GET:
226
- case LogicalOperatorType::LOGICAL_PROJECTION: {
227
- auto table_index = op->GetTableIndex()[0];
228
- auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
229
- auto relation_id = relations.size();
230
-
231
- // If the children are empty, operator can't ge a logical get.
232
- if (op->children.empty() && op->type == LogicalOperatorType::LOGICAL_GET) {
233
- auto &get = op->Cast<LogicalGet>();
234
- cardinality_estimator.AddRelationColumnMapping(get, relation_id);
235
- relation_mapping[table_index] = relation_id;
236
- relations.push_back(std::move(relation));
21
+ if (op->children.size() == 2) {
237
22
  return true;
238
23
  }
239
-
240
- // we run the join order optimizer within the subquery as well
241
- JoinOrderOptimizer optimizer(context);
242
- op->children[0] = optimizer.Optimize(std::move(op->children[0]));
243
- // push one child column binding map back.
244
- vector<column_binding_map_t<ColumnBinding>> child_binding_maps;
245
- child_binding_maps.emplace_back();
246
- optimizer.cardinality_estimator.CopyRelationMap(child_binding_maps.at(0));
247
- // This logical projection/get may sit on top of a logical comparison join that has been pushed down
248
- // we want to copy the binding info of both tables
249
- relation_mapping[table_index] = relation_id;
250
- for (auto &binding_info : child_binding_maps.at(0)) {
251
- cardinality_estimator.AddRelationToColumnMapping(
252
- ColumnBinding(table_index, binding_info.first.column_index), binding_info.second);
253
- cardinality_estimator.AddColumnToRelationMap(binding_info.second.table_index,
254
- binding_info.second.column_index);
255
- }
256
- relations.push_back(std::move(relation));
257
- return true;
258
- }
259
- default:
260
- return false;
261
24
  }
25
+ return false;
262
26
  }
263
27
 
264
- //! Update the exclusion set with all entries in the subgraph
265
- static void UpdateExclusionSet(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
266
- for (idx_t i = 0; i < node.count; i++) {
267
- exclusion_set.insert(node.relations[i]);
268
- }
269
- }
28
+ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOperator> plan,
29
+ optional_ptr<RelationStats> stats) {
270
30
 
271
- //! Create a new JoinTree node by joining together two previous JoinTree nodes
272
- unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet &set,
273
- const vector<reference<NeighborInfo>> &possible_connections,
274
- JoinNode &left, JoinNode &right) {
275
- // for the hash join we want the right side (build side) to have the smallest cardinality
276
- // also just a heuristic but for now...
277
- // FIXME: we should probably actually benchmark that as well
278
- // FIXME: should consider different join algorithms, should we pick a join algorithm here as well? (probably)
279
- double expected_cardinality;
280
- optional_ptr<NeighborInfo> best_connection;
281
- auto plan = plans.find(&set);
282
- // if we have already calculated an expected cardinality for this set,
283
- // just re-use that cardinality
284
- if (left.GetCardinality<double>() < right.GetCardinality<double>()) {
285
- return CreateJoinTree(set, possible_connections, right, left);
286
- }
287
- if (plan != plans.end()) {
288
- if (!plan->second) {
289
- throw InternalException("No plan: internal error in join order optimizer");
290
- }
291
- expected_cardinality = plan->second->GetCardinality<double>();
292
- best_connection = &possible_connections.back().get();
293
- } else if (possible_connections.empty()) {
294
- // cross product
295
- expected_cardinality = cardinality_estimator.EstimateCrossProduct(left, right);
296
- } else {
297
- // normal join, expect foreign key join
298
- expected_cardinality = cardinality_estimator.EstimateCardinalityWithSet(set);
299
- best_connection = &possible_connections.back().get();
300
- }
301
-
302
- auto cost = CardinalityEstimator::ComputeCost(left, right, expected_cardinality);
303
- auto result = make_uniq<JoinNode>(set, best_connection, left, right, expected_cardinality, cost);
304
- D_ASSERT(cost >= expected_cardinality);
305
- return result;
306
- }
307
-
308
- bool JoinOrderOptimizer::NodeInFullPlan(JoinNode &node) {
309
- return join_nodes_in_full_plan.find(node.set.ToString()) != join_nodes_in_full_plan.end();
310
- }
311
-
312
- void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode &node) {
313
- if (node.set.count == relations.size()) {
314
- join_nodes_in_full_plan.clear();
315
- }
316
- if (node.set.count < relations.size()) {
317
- join_nodes_in_full_plan.insert(node.set.ToString());
318
- }
319
- if (node.left) {
320
- UpdateJoinNodesInFullPlan(*node.left);
321
- }
322
- if (node.right) {
323
- UpdateJoinNodesInFullPlan(*node.right);
324
- }
325
- }
326
-
327
- static vector<unordered_set<idx_t>> AddSuperSets(const vector<unordered_set<idx_t>> &current,
328
- const vector<idx_t> &all_neighbors) {
329
- vector<unordered_set<idx_t>> ret;
330
-
331
- for (const auto &neighbor_set : current) {
332
- auto max_val = std::max_element(neighbor_set.begin(), neighbor_set.end());
333
- for (const auto &neighbor : all_neighbors) {
334
- if (*max_val >= neighbor) {
335
- continue;
336
- }
337
- if (neighbor_set.count(neighbor) == 0) {
338
- unordered_set<idx_t> new_set;
339
- for (auto &n : neighbor_set) {
340
- new_set.insert(n);
341
- }
342
- new_set.insert(neighbor);
343
- ret.push_back(new_set);
344
- }
345
- }
346
- }
347
-
348
- return ret;
349
- }
350
-
351
- // works by first creating all sets with cardinality 1
352
- // then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
353
- // is greater than all relations in the set.
354
- static vector<unordered_set<idx_t>> GetAllNeighborSets(vector<idx_t> neighbors) {
355
- vector<unordered_set<idx_t>> ret;
356
- sort(neighbors.begin(), neighbors.end());
357
- vector<unordered_set<idx_t>> added;
358
- for (auto &neighbor : neighbors) {
359
- added.push_back(unordered_set<idx_t>({neighbor}));
360
- ret.push_back(unordered_set<idx_t>({neighbor}));
361
- }
362
- do {
363
- added = AddSuperSets(added, neighbors);
364
- for (auto &d : added) {
365
- ret.push_back(d);
366
- }
367
- } while (!added.empty());
368
- #if DEBUG
369
- // drive by test to make sure we have an accurate amount of
370
- // subsets, and that each neighbor is in a correct amount
371
- // of those subsets.
372
- D_ASSERT(ret.size() == pow(2, neighbors.size()) - 1);
373
- for (auto &n : neighbors) {
374
- idx_t count = 0;
375
- for (auto &set : ret) {
376
- if (set.count(n) >= 1) {
377
- count += 1;
378
- }
379
- }
380
- D_ASSERT(count == pow(2, neighbors.size() - 1));
381
- }
382
- #endif
383
- return ret;
384
- }
385
-
386
- JoinNode &JoinOrderOptimizer::EmitPair(JoinRelationSet &left, JoinRelationSet &right,
387
- const vector<reference<NeighborInfo>> &info) {
388
- // get the left and right join plans
389
- auto &left_plan = plans[&left];
390
- auto &right_plan = plans[&right];
391
- if (!left_plan || !right_plan) {
392
- throw InternalException("No left or right plan: internal error in join order optimizer");
393
- }
394
- auto &new_set = set_manager.Union(left, right);
395
- // create the join tree based on combining the two plans
396
- auto new_plan = CreateJoinTree(new_set, info, *left_plan, *right_plan);
397
- // check if this plan is the optimal plan we found for this set of relations
398
- auto entry = plans.find(&new_set);
399
- if (entry == plans.end() || new_plan->GetCost() < entry->second->GetCost()) {
400
- // the plan is the optimal plan, move it into the dynamic programming tree
401
- auto &result = *new_plan;
402
-
403
- //! make sure plans are symmetric for cardinality estimation
404
- if (entry != plans.end()) {
405
- cardinality_estimator.VerifySymmetry(result, *entry->second);
406
- }
407
- if (full_plan_found &&
408
- join_nodes_in_full_plan.find(new_plan->set.ToString()) != join_nodes_in_full_plan.end()) {
409
- must_update_full_plan = true;
410
- }
411
- if (new_set.count == relations.size()) {
412
- full_plan_found = true;
413
- // If we find a full plan, we need to keep track of which nodes are in the full plan.
414
- // It's possible the DP algorithm updates one of these nodes, then goes on to solve
415
- // the order approximately. In the approximate algorithm, it's not guaranteed that the
416
- // node references are updated. If the original full plan is determined to still have
417
- // the lowest cost, it's possible to get use-after-free errors.
418
- // If we know a node in the full plan is updated, we can prevent ourselves from exiting the
419
- // DP algorithm until the last plan updated is a full plan
420
- UpdateJoinNodesInFullPlan(result);
421
- if (must_update_full_plan) {
422
- must_update_full_plan = false;
423
- }
424
- }
425
-
426
- D_ASSERT(new_plan);
427
- plans[&new_set] = std::move(new_plan);
428
- return result;
429
- }
430
- return *entry->second;
431
- }
432
-
433
- bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet &left, JoinRelationSet &right,
434
- const vector<reference<NeighborInfo>> &info) {
435
- pairs++;
436
- // If a full plan is created, it's possible a node in the plan gets updated. When this happens, make sure you keep
437
- // emitting pairs until you emit another final plan. Another final plan is guaranteed to be produced because of
438
- // our symmetry guarantees.
439
- if (pairs >= 10000 && !must_update_full_plan) {
440
- // when the amount of pairs gets too large we exit the dynamic programming and resort to a greedy algorithm
441
- // FIXME: simple heuristic currently
442
- // at 10K pairs stop searching exactly and switch to heuristic
443
- return false;
444
- }
445
- EmitPair(left, right, info);
446
- return true;
447
- }
448
-
449
- bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
450
- if (node.count == relations.size()) {
451
- return true;
452
- }
453
- // create the exclusion set as everything inside the subgraph AND anything with members BELOW it
454
- unordered_set<idx_t> exclusion_set;
455
- for (idx_t i = 0; i < node.relations[0]; i++) {
456
- exclusion_set.insert(i);
457
- }
458
- UpdateExclusionSet(node, exclusion_set);
459
- // find the neighbors given this exclusion set
460
- auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
461
- if (neighbors.empty()) {
462
- return true;
463
- }
464
-
465
- //! Neighbors should be reversed when iterating over them.
466
- std::sort(neighbors.begin(), neighbors.end(), std::greater_equal<idx_t>());
467
- for (idx_t i = 0; i < neighbors.size() - 1; i++) {
468
- D_ASSERT(neighbors[i] > neighbors[i + 1]);
469
- }
470
-
471
- // Dphyp paper missiing this.
472
- // Because we are traversing in reverse order, we need to add neighbors whose number is smaller than the current
473
- // node to exclusion_set
474
- // This avoids duplicated enumeration
475
- unordered_set<idx_t> new_exclusion_set = exclusion_set;
476
- for (idx_t i = 0; i < neighbors.size(); ++i) {
477
- D_ASSERT(new_exclusion_set.find(neighbors[i]) == new_exclusion_set.end());
478
- new_exclusion_set.insert(neighbors[i]);
479
- }
480
-
481
- for (auto neighbor : neighbors) {
482
- // since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
483
- // (only!) this neighbor, hence we have to do a connectedness check before we can emit it
484
- auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
485
- auto connections = query_graph.GetConnections(node, neighbor_relation);
486
- if (!connections.empty()) {
487
- if (!TryEmitPair(node, neighbor_relation, connections)) {
488
- return false;
489
- }
490
- }
491
-
492
- if (!EnumerateCmpRecursive(node, neighbor_relation, new_exclusion_set)) {
493
- return false;
494
- }
495
-
496
- new_exclusion_set.erase(neighbor);
497
- }
498
- return true;
499
- }
500
-
501
- bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right,
502
- unordered_set<idx_t> &exclusion_set) {
503
- // get the neighbors of the second relation under the exclusion set
504
- auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
505
- if (neighbors.empty()) {
506
- return true;
507
- }
508
-
509
- auto all_subset = GetAllNeighborSets(neighbors);
510
- vector<reference<JoinRelationSet>> union_sets;
511
- union_sets.reserve(all_subset.size());
512
- for (const auto &rel_set : all_subset) {
513
- auto &neighbor = set_manager.GetJoinRelation(rel_set);
514
- // emit the combinations of this node and its neighbors
515
- auto &combined_set = set_manager.Union(right, neighbor);
516
- // If combined_set.count == right.count, This means we found a neighbor that has been present before
517
- // This means we didn't set exclusion_set correctly.
518
- D_ASSERT(combined_set.count > right.count);
519
- if (plans.find(&combined_set) != plans.end()) {
520
- auto connections = query_graph.GetConnections(left, combined_set);
521
- if (!connections.empty()) {
522
- if (!TryEmitPair(left, combined_set, connections)) {
523
- return false;
524
- }
525
- }
526
- }
527
- union_sets.push_back(combined_set);
528
- }
529
-
530
- unordered_set<idx_t> new_exclusion_set = exclusion_set;
531
- for (const auto &neighbor : neighbors) {
532
- new_exclusion_set.insert(neighbor);
533
- }
534
-
535
- // recursively enumerate the sets
536
- for (idx_t i = 0; i < union_sets.size(); i++) {
537
- // updated the set of excluded entries with this neighbor
538
- if (!EnumerateCmpRecursive(left, union_sets[i], new_exclusion_set)) {
539
- return false;
540
- }
541
- }
542
- return true;
543
- }
544
-
545
- bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
546
- // find neighbors of S under the exclusion set
547
- auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
548
- if (neighbors.empty()) {
549
- return true;
550
- }
551
-
552
- auto all_subset = GetAllNeighborSets(neighbors);
553
- vector<reference<JoinRelationSet>> union_sets;
554
- union_sets.reserve(all_subset.size());
555
- for (const auto &rel_set : all_subset) {
556
- auto &neighbor = set_manager.GetJoinRelation(rel_set);
557
- // emit the combinations of this node and its neighbors
558
- auto &new_set = set_manager.Union(node, neighbor);
559
- D_ASSERT(new_set.count > node.count);
560
- if (plans.find(&new_set) != plans.end()) {
561
- if (!EmitCSG(new_set)) {
562
- return false;
563
- }
564
- }
565
- union_sets.push_back(new_set);
566
- }
567
-
568
- unordered_set<idx_t> new_exclusion_set = exclusion_set;
569
- for (const auto &neighbor : neighbors) {
570
- new_exclusion_set.insert(neighbor);
571
- }
572
-
573
- // recursively enumerate the sets
574
- for (idx_t i = 0; i < union_sets.size(); i++) {
575
- // updated the set of excluded entries with this neighbor
576
- if (!EnumerateCSGRecursive(union_sets[i], new_exclusion_set)) {
577
- return false;
578
- }
579
- }
580
- return true;
581
- }
582
-
583
- bool JoinOrderOptimizer::SolveJoinOrderExactly() {
584
- // now we perform the actual dynamic programming to compute the final result
585
- // we enumerate over all the possible pairs in the neighborhood
586
- for (idx_t i = relations.size(); i > 0; i--) {
587
- // for every node in the set, we consider it as the start node once
588
- auto &start_node = set_manager.GetJoinRelation(i - 1);
589
- // emit the start node
590
- if (!EmitCSG(start_node)) {
591
- return false;
592
- }
593
- // initialize the set of exclusion_set as all the nodes with a number below this
594
- unordered_set<idx_t> exclusion_set;
595
- for (idx_t j = 0; j < i; j++) {
596
- exclusion_set.insert(j);
597
- }
598
- // then we recursively search for neighbors that do not belong to the banned entries
599
- if (!EnumerateCSGRecursive(start_node, exclusion_set)) {
600
- return false;
601
- }
602
- }
603
- return true;
604
- }
605
-
606
- void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
607
- if (!NodeInFullPlan(new_plan)) {
608
- // if the new node is not in the full plan, feel free to return
609
- // because you won't be updating the full plan.
610
- return;
611
- }
612
- auto &new_set = new_plan.set;
613
- // now update every plan that uses this plan
614
- unordered_set<idx_t> exclusion_set;
615
- for (idx_t i = 0; i < new_set.count; i++) {
616
- exclusion_set.insert(new_set.relations[i]);
617
- }
618
- auto neighbors = query_graph.GetNeighbors(new_set, exclusion_set);
619
- auto all_neighbors = GetAllNeighborSets(neighbors);
620
- for (const auto &neighbor : all_neighbors) {
621
- auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
622
- auto &combined_set = set_manager.Union(new_set, neighbor_relation);
623
-
624
- auto combined_set_plan = plans.find(&combined_set);
625
- if (combined_set_plan == plans.end()) {
626
- continue;
627
- }
628
-
629
- double combined_set_plan_cost = combined_set_plan->second->GetCost();
630
- auto connections = query_graph.GetConnections(new_set, neighbor_relation);
631
- // recurse and update up the tree if the combined set produces a plan with a lower cost
632
- // only recurse on neighbor relations that have plans.
633
- auto right_plan = plans.find(&neighbor_relation);
634
- if (right_plan == plans.end()) {
635
- continue;
636
- }
637
- auto &updated_plan = EmitPair(new_set, neighbor_relation, connections);
638
- // <= because the child node has already been replaced. You need to
639
- // replace the parent node as well in this case
640
- if (updated_plan.GetCost() < combined_set_plan_cost) {
641
- UpdateDPTree(updated_plan);
642
- }
643
- }
644
- }
645
-
646
- void JoinOrderOptimizer::SolveJoinOrderApproximately() {
647
- // at this point, we exited the dynamic programming but did not compute the final join order because it took too
648
- // long instead, we use a greedy heuristic to obtain a join ordering now we use Greedy Operator Ordering to
649
- // construct the result tree first we start out with all the base relations (the to-be-joined relations)
650
- vector<reference<JoinRelationSet>> join_relations; // T in the paper
651
- for (idx_t i = 0; i < relations.size(); i++) {
652
- join_relations.push_back(set_manager.GetJoinRelation(i));
653
- }
654
- while (join_relations.size() > 1) {
655
- // now in every step of the algorithm, we greedily pick the join between the to-be-joined relations that has the
656
- // smallest cost. This is O(r^2) per step, and every step will reduce the total amount of relations to-be-joined
657
- // by 1, so the total cost is O(r^3) in the amount of relations
658
- idx_t best_left = 0, best_right = 0;
659
- optional_ptr<JoinNode> best_connection;
660
- for (idx_t i = 0; i < join_relations.size(); i++) {
661
- auto left = join_relations[i];
662
- for (idx_t j = i + 1; j < join_relations.size(); j++) {
663
- auto right = join_relations[j];
664
- // check if we can connect these two relations
665
- auto connection = query_graph.GetConnections(left, right);
666
- if (!connection.empty()) {
667
- // we can check the cost of this connection
668
- auto &node = EmitPair(left, right, connection);
669
-
670
- // update the DP tree in case a plan created by the DP algorithm uses the node
671
- // that was potentially just updated by EmitPair. You will get a use-after-free
672
- // error if future plans rely on the old node that was just replaced.
673
- // if node in FullPath, then updateDP tree.
674
- UpdateDPTree(node);
675
-
676
- if (!best_connection || node.GetCost() < best_connection->GetCost()) {
677
- // best pair found so far
678
- best_connection = &node;
679
- best_left = i;
680
- best_right = j;
681
- }
682
- }
683
- }
684
- }
685
- if (!best_connection) {
686
- // could not find a connection, but we were not done with finding a completed plan
687
- // we have to add a cross product; we add it between the two smallest relations
688
- optional_ptr<JoinNode> smallest_plans[2];
689
- idx_t smallest_index[2];
690
- D_ASSERT(join_relations.size() >= 2);
691
-
692
- // first just add the first two join relations. It doesn't matter the cost as the JOO
693
- // will swap them on estimated cardinality anyway.
694
- for (idx_t i = 0; i < 2; i++) {
695
- auto current_plan = plans[&join_relations[i].get()].get();
696
- smallest_plans[i] = current_plan;
697
- smallest_index[i] = i;
698
- }
699
-
700
- // if there are any other join relations that don't have connections
701
- // add them if they have lower estimated cardinality.
702
- for (idx_t i = 2; i < join_relations.size(); i++) {
703
- // get the plan for this relation
704
- auto current_plan = plans[&join_relations[i].get()].get();
705
- // check if the cardinality is smaller than the smallest two found so far
706
- for (idx_t j = 0; j < 2; j++) {
707
- if (!smallest_plans[j] ||
708
- smallest_plans[j]->GetCardinality<double>() > current_plan->GetCardinality<double>()) {
709
- smallest_plans[j] = current_plan;
710
- smallest_index[j] = i;
711
- break;
712
- }
713
- }
714
- }
715
- if (!smallest_plans[0] || !smallest_plans[1]) {
716
- throw InternalException("Internal error in join order optimizer");
717
- }
718
- D_ASSERT(smallest_plans[0] && smallest_plans[1]);
719
- D_ASSERT(smallest_index[0] != smallest_index[1]);
720
- auto &left = smallest_plans[0]->set;
721
- auto &right = smallest_plans[1]->set;
722
- // create a cross product edge (i.e. edge with empty filter) between these two sets in the query graph
723
- query_graph.CreateEdge(left, right, nullptr);
724
- // now emit the pair and continue with the algorithm
725
- auto connections = query_graph.GetConnections(left, right);
726
- D_ASSERT(!connections.empty());
727
-
728
- best_connection = &EmitPair(left, right, connections);
729
- best_left = smallest_index[0];
730
- best_right = smallest_index[1];
731
-
732
- UpdateDPTree(*best_connection);
733
- // the code below assumes best_right > best_left
734
- if (best_left > best_right) {
735
- std::swap(best_left, best_right);
736
- }
737
- }
738
- // now update the to-be-checked pairs
739
- // remove left and right, and add the combination
31
+ // make sure query graph manager has not extracted a relation graph already
32
+ LogicalOperator *op = plan.get();
740
33
 
741
- // important to erase the biggest element first
742
- // if we erase the smallest element first the index of the biggest element changes
743
- D_ASSERT(best_right > best_left);
744
- join_relations.erase(join_relations.begin() + best_right);
745
- join_relations.erase(join_relations.begin() + best_left);
746
- join_relations.push_back(best_connection->set);
747
- }
748
- }
34
+ // extract the relations that go into the hyper graph.
35
+ // We optimize the children of any non-reorderable operations we come across.
36
+ bool reorderable = query_graph_manager.Build(*op);
749
37
 
750
- void JoinOrderOptimizer::SolveJoinOrder() {
751
- // first try to solve the join order exactly
752
- if (!SolveJoinOrderExactly()) {
753
- // otherwise, if that times out we resort to a greedy algorithm
754
- SolveJoinOrderApproximately();
755
- }
756
- }
38
+ // get relation_stats here since the reconstruction process will move all of the relations.
39
+ auto relation_stats = query_graph_manager.relation_manager.GetRelationStats();
40
+ unique_ptr<LogicalOperator> new_logical_plan = nullptr;
757
41
 
758
- void JoinOrderOptimizer::GenerateCrossProducts() {
759
- // generate a set of cross products to combine the currently available plans into a full join plan
760
- // we create edges between every relation with a high cost
761
- for (idx_t i = 0; i < relations.size(); i++) {
762
- auto &left = set_manager.GetJoinRelation(i);
763
- for (idx_t j = 0; j < relations.size(); j++) {
764
- if (i != j) {
765
- auto &right = set_manager.GetJoinRelation(j);
766
- query_graph.CreateEdge(left, right, nullptr);
767
- query_graph.CreateEdge(right, left, nullptr);
768
- }
769
- }
770
- }
771
- }
42
+ if (reorderable) {
43
+ // query graph now has filters and relations
44
+ auto cost_model = CostModel(query_graph_manager);
772
45
 
773
- static unique_ptr<LogicalOperator> ExtractJoinRelation(SingleJoinRelation &rel) {
774
- auto &children = rel.parent->children;
775
- for (idx_t i = 0; i < children.size(); i++) {
776
- if (children[i].get() == &rel.op) {
777
- // found it! take ownership of it from the parent
778
- auto result = std::move(children[i]);
779
- children.erase(children.begin() + i);
780
- return result;
781
- }
782
- }
783
- throw Exception("Could not find relation in parent node (?)");
784
- }
46
+ // Initialize a plan enumerator.
47
+ auto plan_enumerator =
48
+ PlanEnumerator(query_graph_manager, cost_model, query_graph_manager.GetQueryGraphEdges());
785
49
 
786
- GenerateJoinRelation JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations,
787
- JoinNode &node) {
788
- optional_ptr<JoinRelationSet> left_node;
789
- optional_ptr<JoinRelationSet> right_node;
790
- optional_ptr<JoinRelationSet> result_relation;
791
- unique_ptr<LogicalOperator> result_operator;
792
- if (node.left && node.right && node.info) {
793
- // generate the left and right children
794
- auto left = GenerateJoins(extracted_relations, *node.left);
795
- auto right = GenerateJoins(extracted_relations, *node.right);
50
+ // Initialize the leaf/single node plans
51
+ plan_enumerator.InitLeafPlans();
796
52
 
797
- if (node.info->filters.empty()) {
798
- // no filters, create a cross product
799
- result_operator = LogicalCrossProduct::Create(std::move(left.op), std::move(right.op));
800
- } else {
801
- // we have filters, create a join node
802
- auto join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
803
- join->children.push_back(std::move(left.op));
804
- join->children.push_back(std::move(right.op));
805
- // set the join conditions from the join node
806
- for (auto &filter_ref : node.info->filters) {
807
- auto &f = filter_ref.get();
808
- // extract the filter from the operator it originally belonged to
809
- D_ASSERT(filters[f.filter_index]);
810
- auto condition = std::move(filters[f.filter_index]);
811
- // now create the actual join condition
812
- D_ASSERT((JoinRelationSet::IsSubset(left.set, *f.left_set) &&
813
- JoinRelationSet::IsSubset(right.set, *f.right_set)) ||
814
- (JoinRelationSet::IsSubset(left.set, *f.right_set) &&
815
- JoinRelationSet::IsSubset(right.set, *f.left_set)));
816
- JoinCondition cond;
817
- D_ASSERT(condition->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
818
- auto &comparison = condition->Cast<BoundComparisonExpression>();
819
- // we need to figure out which side is which by looking at the relations available to us
820
- bool invert = !JoinRelationSet::IsSubset(left.set, *f.left_set);
821
- cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
822
- cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
823
- cond.comparison = condition->type;
53
+ // Ask the plan enumerator to enumerate a number of join orders
54
+ auto final_plan = plan_enumerator.SolveJoinOrder();
55
+ // TODO: add in the check that if no plan exists, you have to add a cross product.
824
56
 
825
- if (invert) {
826
- // reverse comparison expression if we reverse the order of the children
827
- cond.comparison = FlipComparisonExpression(cond.comparison);
828
- }
829
- join->conditions.push_back(std::move(cond));
830
- }
831
- D_ASSERT(!join->conditions.empty());
832
- result_operator = std::move(join);
833
- }
834
- left_node = &left.set;
835
- right_node = &right.set;
836
- right_node = &right.set;
837
- result_relation = &set_manager.Union(*left_node, *right_node);
57
+ // now reconstruct a logical plan from the query graph plan
58
+ new_logical_plan = query_graph_manager.Reconstruct(std::move(plan), *final_plan);
838
59
  } else {
839
- // base node, get the entry from the list of extracted relations
840
- D_ASSERT(node.set.count == 1);
841
- D_ASSERT(extracted_relations[node.set.relations[0]]);
842
- result_relation = &node.set;
843
- result_operator = std::move(extracted_relations[node.set.relations[0]]);
844
- }
845
- result_operator->estimated_props = node.estimated_props->Copy();
846
- result_operator->estimated_cardinality = result_operator->estimated_props->GetCardinality<idx_t>();
847
- result_operator->has_estimated_cardinality = true;
848
- if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER &&
849
- result_operator->children[0]->type == LogicalOperatorType::LOGICAL_GET) {
850
- // FILTER on top of GET, add estimated properties to both
851
- auto &filter_props = *result_operator->estimated_props;
852
- auto &child_operator = *result_operator->children[0];
853
- child_operator.estimated_props = make_uniq<EstimatedProperties>(filter_props.GetCardinality<double>() /
854
- CardinalityEstimator::DEFAULT_SELECTIVITY,
855
- filter_props.GetCost<double>());
856
- child_operator.estimated_cardinality = child_operator.estimated_props->GetCardinality<idx_t>();
857
- child_operator.has_estimated_cardinality = true;
858
- }
859
- // check if we should do a pushdown on this node
860
- // basically, any remaining filter that is a subset of the current relation will no longer be used in joins
861
- // hence we should push it here
862
- for (auto &filter_info : filter_infos) {
863
- // check if the filter has already been extracted
864
- auto &info = *filter_info;
865
- if (filters[info.filter_index]) {
866
- // now check if the filter is a subset of the current relation
867
- // note that infos with an empty relation set are a special case and we do not push them down
868
- if (info.set.count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) {
869
- auto filter = std::move(filters[info.filter_index]);
870
- // if it is, we can push the filter
871
- // we can push it either into a join or as a filter
872
- // check if we are in a join or in a base table
873
- if (!left_node || !info.left_set) {
874
- // base table or non-comparison expression, push it as a filter
875
- result_operator = PushFilter(std::move(result_operator), std::move(filter));
876
- continue;
877
- }
878
- // the node below us is a join or cross product and the expression is a comparison
879
- // check if the nodes can be split up into left/right
880
- bool found_subset = false;
881
- bool invert = false;
882
- if (JoinRelationSet::IsSubset(*left_node, *info.left_set) &&
883
- JoinRelationSet::IsSubset(*right_node, *info.right_set)) {
884
- found_subset = true;
885
- } else if (JoinRelationSet::IsSubset(*right_node, *info.left_set) &&
886
- JoinRelationSet::IsSubset(*left_node, *info.right_set)) {
887
- invert = true;
888
- found_subset = true;
889
- }
890
- if (!found_subset) {
891
- // could not be split up into left/right
892
- result_operator = PushFilter(std::move(result_operator), std::move(filter));
893
- continue;
894
- }
895
- // create the join condition
896
- JoinCondition cond;
897
- D_ASSERT(filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
898
- auto &comparison = filter->Cast<BoundComparisonExpression>();
899
- // we need to figure out which side is which by looking at the relations available to us
900
- cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
901
- cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
902
- cond.comparison = comparison.type;
903
- if (invert) {
904
- // reverse comparison expression if we reverse the order of the children
905
- cond.comparison = FlipComparisonExpression(comparison.type);
906
- }
907
- // now find the join to push it into
908
- auto node = result_operator.get();
909
- if (node->type == LogicalOperatorType::LOGICAL_FILTER) {
910
- node = node->children[0].get();
911
- }
912
- if (node->type == LogicalOperatorType::LOGICAL_CROSS_PRODUCT) {
913
- // turn into comparison join
914
- auto comp_join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
915
- comp_join->children.push_back(std::move(node->children[0]));
916
- comp_join->children.push_back(std::move(node->children[1]));
917
- comp_join->conditions.push_back(std::move(cond));
918
- if (node == result_operator.get()) {
919
- result_operator = std::move(comp_join);
920
- } else {
921
- D_ASSERT(result_operator->type == LogicalOperatorType::LOGICAL_FILTER);
922
- result_operator->children[0] = std::move(comp_join);
923
- }
924
- } else {
925
- D_ASSERT(node->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
926
- node->type == LogicalOperatorType::LOGICAL_ASOF_JOIN);
927
- auto &comp_join = node->Cast<LogicalComparisonJoin>();
928
- comp_join.conditions.push_back(std::move(cond));
929
- }
930
- }
931
- }
932
- }
933
- return GenerateJoinRelation(*result_relation, std::move(result_operator));
934
- }
935
-
936
- unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node) {
937
- // now we have to rewrite the plan
938
- bool root_is_join = plan->children.size() > 1;
939
-
940
- // first we will extract all relations from the main plan
941
- vector<unique_ptr<LogicalOperator>> extracted_relations;
942
- extracted_relations.reserve(relations.size());
943
- for (auto &relation : relations) {
944
- extracted_relations.push_back(ExtractJoinRelation(*relation));
945
- }
946
-
947
- // now we generate the actual joins
948
- auto join_tree = GenerateJoins(extracted_relations, node);
949
- // perform the final pushdown of remaining filters
950
- for (auto &filter : filters) {
951
- // check if the filter has already been extracted
952
- if (filter) {
953
- // if not we need to push it
954
- join_tree.op = PushFilter(std::move(join_tree.op), std::move(filter));
60
+ new_logical_plan = std::move(plan);
61
+ if (relation_stats.size() == 1) {
62
+ new_logical_plan->estimated_cardinality = relation_stats.at(0).cardinality;
955
63
  }
956
64
  }
957
65
 
958
- // find the first join in the relation to know where to place this node
959
- if (root_is_join) {
960
- // first node is the join, return it immediately
961
- return std::move(join_tree.op);
962
- }
963
- D_ASSERT(plan->children.size() == 1);
964
- // have to move up through the relations
965
- auto op = plan.get();
966
- auto parent = plan.get();
967
- while (op->type != LogicalOperatorType::LOGICAL_CROSS_PRODUCT &&
968
- op->type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN &&
969
- op->type != LogicalOperatorType::LOGICAL_ASOF_JOIN) {
970
- D_ASSERT(op->children.size() == 1);
971
- parent = op;
972
- op = op->children[0].get();
973
- }
974
- // have to replace at this node
975
- parent->children[0] = std::move(join_tree.op);
976
- return plan;
977
- }
978
-
979
- // the join ordering is pretty much a straight implementation of the paper "Dynamic Programming Strikes Back" by Guido
980
- // Moerkotte and Thomas Neumannn, see that paper for additional info/documentation bonus slides:
981
- // https://db.in.tum.de/teaching/ws1415/queryopt/chapter3.pdf?lang=de
982
- // FIXME: incorporate cardinality estimation into the plans, possibly by pushing samples?
983
- unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOperator> plan) {
984
- D_ASSERT(filters.empty() && relations.empty()); // assert that the JoinOrderOptimizer has not been used before
985
- LogicalOperator *op = plan.get();
986
- // now we optimize the current plan
987
- // we skip past until we find the first projection, we do this because the HAVING clause inserts a Filter AFTER the
988
- // group by and this filter cannot be reordered
989
- // extract a list of all relations that have to be joined together
990
- // and a list of all conditions that is applied to them
991
- vector<reference<LogicalOperator>> filter_operators;
992
- if (!ExtractJoinRelations(*op, filter_operators)) {
993
- // do not support reordering this type of plan
994
- return plan;
66
+ // only perform left right optimizations when stats is null (means we have the top level optimize call)
67
+ // Don't check reorderability because non-reorderable joins will result in 1 relation, but we can
68
+ // still switch the children.
69
+ // TODO: put this in a different optimizer maybe?
70
+ if (stats == nullptr && HasJoin(new_logical_plan.get())) {
71
+ new_logical_plan = query_graph_manager.LeftRightOptimizations(std::move(new_logical_plan));
995
72
  }
996
- if (relations.size() <= 1) {
997
- // at most one relation, nothing to reorder
998
- return plan;
999
- }
1000
- // now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate
1001
- // filters in the process
1002
- expression_set_t filter_set;
1003
- for (auto &filter_op : filter_operators) {
1004
- auto &f_op = filter_op.get();
1005
- if (f_op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
1006
- f_op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
1007
- auto &join = f_op.Cast<LogicalComparisonJoin>();
1008
- D_ASSERT(join.join_type == JoinType::INNER);
1009
- D_ASSERT(join.expressions.empty());
1010
- for (auto &cond : join.conditions) {
1011
- auto comparison =
1012
- make_uniq<BoundComparisonExpression>(cond.comparison, std::move(cond.left), std::move(cond.right));
1013
- if (filter_set.find(*comparison) == filter_set.end()) {
1014
- filter_set.insert(*comparison);
1015
- filters.push_back(std::move(comparison));
1016
- }
1017
- }
1018
- join.conditions.clear();
1019
- } else {
1020
- for (auto &expression : f_op.expressions) {
1021
- if (filter_set.find(*expression) == filter_set.end()) {
1022
- filter_set.insert(*expression);
1023
- filters.push_back(std::move(expression));
1024
- }
1025
- }
1026
- f_op.expressions.clear();
1027
- }
1028
- }
1029
- // create potential edges from the comparisons
1030
- for (idx_t i = 0; i < filters.size(); i++) {
1031
- auto &filter = filters[i];
1032
- // first extract the relation set for the entire filter
1033
- unordered_set<idx_t> bindings;
1034
- ExtractBindings(*filter, bindings);
1035
- auto &set = set_manager.GetJoinRelation(bindings);
1036
73
 
1037
- auto info = make_uniq<FilterInfo>(set, i);
1038
- auto filter_info = info.get();
1039
- filter_infos.push_back(std::move(info));
1040
-
1041
- // now check if it can be used as a join predicate
1042
- if (filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON) {
1043
- auto &comparison = filter->Cast<BoundComparisonExpression>();
1044
- // extract the bindings that are required for the left and right side of the comparison
1045
- unordered_set<idx_t> left_bindings, right_bindings;
1046
- ExtractBindings(*comparison.left, left_bindings);
1047
- ExtractBindings(*comparison.right, right_bindings);
1048
- GetColumnBinding(*comparison.left, filter_info->left_binding);
1049
- GetColumnBinding(*comparison.right, filter_info->right_binding);
1050
- if (!left_bindings.empty() && !right_bindings.empty()) {
1051
- // both the left and the right side have bindings
1052
- // first create the relation sets, if they do not exist
1053
- filter_info->left_set = &set_manager.GetJoinRelation(left_bindings);
1054
- filter_info->right_set = &set_manager.GetJoinRelation(right_bindings);
1055
- // we can only create a meaningful edge if the sets are not exactly the same
1056
- if (filter_info->left_set != filter_info->right_set) {
1057
- // check if the sets are disjoint
1058
- if (Disjoint(left_bindings, right_bindings)) {
1059
- // they are disjoint, we only need to create one set of edges in the join graph
1060
- query_graph.CreateEdge(*filter_info->left_set, *filter_info->right_set, filter_info);
1061
- query_graph.CreateEdge(*filter_info->right_set, *filter_info->left_set, filter_info);
1062
- } else {
1063
- continue;
1064
- }
1065
- continue;
1066
- }
1067
- }
1068
- }
1069
- }
1070
- // now use dynamic programming to figure out the optimal join order
1071
- // First we initialize each of the single-node plans with themselves and with their cardinalities these are the leaf
1072
- // nodes of the join tree NOTE: we can just use pointers to JoinRelationSet* here because the GetJoinRelation
1073
- // function ensures that a unique combination of relations will have a unique JoinRelationSet object.
1074
- vector<NodeOp> nodes_ops;
1075
- for (idx_t i = 0; i < relations.size(); i++) {
1076
- auto &rel = *relations[i];
1077
- auto &node = set_manager.GetJoinRelation(i);
1078
- nodes_ops.emplace_back(make_uniq<JoinNode>(node, 0), rel.op);
74
+ // Propagate up a stats object from the top of the new_logical_plan if stats exist.
75
+ if (stats) {
76
+ auto cardinality = new_logical_plan->EstimateCardinality(context);
77
+ auto bindings = new_logical_plan->GetColumnBindings();
78
+ auto new_stats = RelationStatisticsHelper::CombineStatsOfReorderableOperator(bindings, relation_stats);
79
+ new_stats.cardinality = MaxValue(cardinality, new_stats.cardinality);
80
+ RelationStatisticsHelper::CopyRelationStats(*stats, new_stats);
1079
81
  }
1080
82
 
1081
- cardinality_estimator.InitCardinalityEstimatorProps(nodes_ops, filter_infos);
1082
-
1083
- for (auto &node_op : nodes_ops) {
1084
- D_ASSERT(node_op.node);
1085
- plans[&node_op.node->set] = std::move(node_op.node);
1086
- }
1087
- // now we perform the actual dynamic programming to compute the final result
1088
- SolveJoinOrder();
1089
- // now the optimal join path should have been found
1090
- // get it from the node
1091
- unordered_set<idx_t> bindings;
1092
- for (idx_t i = 0; i < relations.size(); i++) {
1093
- bindings.insert(i);
1094
- }
1095
- auto &total_relation = set_manager.GetJoinRelation(bindings);
1096
- auto final_plan = plans.find(&total_relation);
1097
- if (final_plan == plans.end()) {
1098
- // could not find the final plan
1099
- // this should only happen in case the sets are actually disjunct
1100
- // in this case we need to generate cross product to connect the disjoint sets
1101
- if (context.config.force_no_cross_product) {
1102
- throw InvalidInputException(
1103
- "Query requires a cross-product, but 'force_no_cross_product' PRAGMA is enabled");
1104
- }
1105
- GenerateCrossProducts();
1106
- //! solve the join order again
1107
- SolveJoinOrder();
1108
- // now we can obtain the final plan!
1109
- final_plan = plans.find(&total_relation);
1110
- D_ASSERT(final_plan != plans.end());
1111
- }
1112
- // now perform the actual reordering
1113
- return RewritePlan(std::move(plan), *final_plan->second);
83
+ return new_logical_plan;
1114
84
  }
1115
85
 
1116
86
  } // namespace duckdb