duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
  5. package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
  6. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  7. package/src/duckdb/extension/json/json_serializer.cpp +11 -10
  8. package/src/duckdb/extension/json/serialize_json.cpp +44 -44
  9. package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
  10. package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
  11. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  12. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  13. package/src/duckdb/src/common/enum_util.cpp +5 -0
  14. package/src/duckdb/src/common/extra_type_info.cpp +2 -2
  15. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
  16. package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
  17. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  18. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  19. package/src/duckdb/src/common/types/value.cpp +33 -33
  20. package/src/duckdb/src/common/types/vector.cpp +20 -20
  21. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
  22. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
  23. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
  24. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
  25. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  26. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  27. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  28. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  29. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  30. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  31. package/src/duckdb/src/function/table/read_csv.cpp +4 -4
  32. package/src/duckdb/src/function/table/table_scan.cpp +14 -14
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  34. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
  38. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
  39. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
  40. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
  41. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
  42. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  43. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  44. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
  45. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  50. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  51. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  52. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  53. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  54. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  55. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  56. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  57. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  58. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  59. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  60. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  61. package/src/duckdb/src/include/duckdb.h +11 -1
  62. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  63. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  64. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  65. package/src/duckdb/src/main/relation.cpp +4 -4
  66. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  67. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  68. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  70. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  71. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  72. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  73. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  74. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  75. package/src/duckdb/src/parallel/executor.cpp +6 -0
  76. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  77. package/src/duckdb/src/parser/parser.cpp +18 -3
  78. package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
  79. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  80. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
  81. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
  82. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
  83. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
  84. package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
  85. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
  86. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
  87. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
  88. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
  89. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
  90. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
  91. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
  92. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
  93. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
  94. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
  95. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
  96. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
  97. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
  98. package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
  99. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  100. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -3,11 +3,10 @@
3
3
  #include "duckdb/common/printer.hpp"
4
4
  #include "duckdb/common/string_util.hpp"
5
5
  #include "duckdb/common/assert.hpp"
6
- #include "duckdb/common/to_string.hpp"
7
6
 
8
7
  namespace duckdb {
9
8
 
10
- using QueryEdge = QueryGraph::QueryEdge;
9
+ using QueryEdge = QueryGraphEdges::QueryEdge;
11
10
 
12
11
  // LCOV_EXCL_START
13
12
  static string QueryEdgeToString(const QueryEdge *info, vector<idx_t> prefix) {
@@ -18,7 +17,7 @@ static string QueryEdgeToString(const QueryEdge *info, vector<idx_t> prefix) {
18
17
  }
19
18
  source += "]";
20
19
  for (auto &entry : info->neighbors) {
21
- result += StringUtil::Format("%s -> %s\n", source.c_str(), entry->neighbor.ToString().c_str());
20
+ result += StringUtil::Format("%s -> %s\n", source.c_str(), entry->neighbor->ToString().c_str());
22
21
  }
23
22
  for (auto &entry : info->children) {
24
23
  vector<idx_t> new_prefix = prefix;
@@ -28,56 +27,58 @@ static string QueryEdgeToString(const QueryEdge *info, vector<idx_t> prefix) {
28
27
  return result;
29
28
  }
30
29
 
31
- string QueryGraph::ToString() const {
30
+ string QueryGraphEdges::ToString() const {
32
31
  return QueryEdgeToString(&root, {});
33
32
  }
34
33
 
35
- void QueryGraph::Print() {
34
+ void QueryGraphEdges::Print() {
36
35
  Printer::Print(ToString());
37
36
  }
38
37
  // LCOV_EXCL_STOP
39
38
 
40
- QueryEdge &QueryGraph::GetQueryEdge(JoinRelationSet &left) {
39
+ optional_ptr<QueryEdge> QueryGraphEdges::GetQueryEdge(JoinRelationSet &left) {
41
40
  D_ASSERT(left.count > 0);
42
41
  // find the EdgeInfo corresponding to the left set
43
- reference<QueryEdge> info(root);
42
+ optional_ptr<QueryEdge> info(&root);
44
43
  for (idx_t i = 0; i < left.count; i++) {
45
- auto entry = info.get().children.find(left.relations[i]);
46
- if (entry == info.get().children.end()) {
44
+ auto entry = info.get()->children.find(left.relations[i]);
45
+ if (entry == info.get()->children.end()) {
47
46
  // node not found, create it
48
- auto insert_it = info.get().children.insert(make_pair(left.relations[i], make_uniq<QueryEdge>()));
47
+ auto insert_it = info.get()->children.insert(make_pair(left.relations[i], make_uniq<QueryEdge>()));
49
48
  entry = insert_it.first;
50
49
  }
51
50
  // move to the next node
52
- info = *entry->second;
51
+ info = entry->second;
53
52
  }
54
53
  return info;
55
54
  }
56
55
 
57
- void QueryGraph::CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> filter_info) {
56
+ void QueryGraphEdges::CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> filter_info) {
58
57
  D_ASSERT(left.count > 0 && right.count > 0);
59
58
  // find the EdgeInfo corresponding to the left set
60
- auto &info = GetQueryEdge(left);
59
+ auto info = GetQueryEdge(left);
61
60
  // now insert the edge to the right relation, if it does not exist
62
- for (idx_t i = 0; i < info.neighbors.size(); i++) {
63
- if (&info.neighbors[i]->neighbor == &right) {
61
+ for (idx_t i = 0; i < info->neighbors.size(); i++) {
62
+ if (info->neighbors[i]->neighbor == &right) {
64
63
  if (filter_info) {
65
64
  // neighbor already exists just add the filter, if we have any
66
- info.neighbors[i]->filters.push_back(*filter_info);
65
+ info->neighbors[i]->filters.push_back(filter_info);
67
66
  }
68
67
  return;
69
68
  }
70
69
  }
71
70
  // neighbor does not exist, create it
72
- auto n = make_uniq<NeighborInfo>(right);
73
- if (filter_info) {
74
- n->filters.push_back(*filter_info);
71
+ auto n = make_uniq<NeighborInfo>(&right);
72
+ // if the edge represents a cross product, filter_info is null. The easiest way then to determine
73
+ // if an edge is for a cross product is if the filters are empty
74
+ if (info && filter_info) {
75
+ n->filters.push_back(filter_info);
75
76
  }
76
- info.neighbors.push_back(std::move(n));
77
+ info->neighbors.push_back(std::move(n));
77
78
  }
78
79
 
79
- void QueryGraph::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
80
- const std::function<bool(NeighborInfo &)> &callback) {
80
+ void QueryGraphEdges::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
81
+ const std::function<bool(NeighborInfo &)> &callback) const {
81
82
 
82
83
  for (auto &neighbor : info.get().neighbors) {
83
84
  if (callback(*neighbor)) {
@@ -94,7 +95,8 @@ void QueryGraph::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdg
94
95
  }
95
96
  }
96
97
 
97
- void QueryGraph::EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) {
98
+ void QueryGraphEdges::EnumerateNeighbors(JoinRelationSet &node,
99
+ const std::function<bool(NeighborInfo &)> &callback) const {
98
100
  for (idx_t j = 0; j < node.count; j++) {
99
101
  auto iter = root.children.find(node.relations[j]);
100
102
  if (iter != root.children.end()) {
@@ -105,16 +107,16 @@ void QueryGraph::EnumerateNeighbors(JoinRelationSet &node, const std::function<b
105
107
  }
106
108
 
107
109
  //! Returns true if a JoinRelationSet is banned by the list of exclusion_set, false otherwise
108
- static bool JoinRelationSetIsExcluded(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
109
- return exclusion_set.find(node.relations[0]) != exclusion_set.end();
110
+ static bool JoinRelationSetIsExcluded(optional_ptr<JoinRelationSet> node, unordered_set<idx_t> &exclusion_set) {
111
+ return exclusion_set.find(node->relations[0]) != exclusion_set.end();
110
112
  }
111
113
 
112
- vector<idx_t> QueryGraph::GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
114
+ const vector<idx_t> QueryGraphEdges::GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) const {
113
115
  unordered_set<idx_t> result;
114
116
  EnumerateNeighbors(node, [&](NeighborInfo &info) -> bool {
115
117
  if (!JoinRelationSetIsExcluded(info.neighbor, exclusion_set)) {
116
118
  // add the smallest node of the neighbor to the set
117
- result.insert(info.neighbor.relations[0]);
119
+ result.insert(info.neighbor->relations[0]);
118
120
  }
119
121
  return false;
120
122
  });
@@ -123,10 +125,11 @@ vector<idx_t> QueryGraph::GetNeighbors(JoinRelationSet &node, unordered_set<idx_
123
125
  return neighbors;
124
126
  }
125
127
 
126
- vector<reference<NeighborInfo>> QueryGraph::GetConnections(JoinRelationSet &node, JoinRelationSet &other) {
128
+ const vector<reference<NeighborInfo>> QueryGraphEdges::GetConnections(JoinRelationSet &node,
129
+ JoinRelationSet &other) const {
127
130
  vector<reference<NeighborInfo>> connections;
128
131
  EnumerateNeighbors(node, [&](NeighborInfo &info) -> bool {
129
- if (JoinRelationSet::IsSubset(other, info.neighbor)) {
132
+ if (JoinRelationSet::IsSubset(other, *info.neighbor)) {
130
133
  connections.push_back(info);
131
134
  }
132
135
  return false;
@@ -0,0 +1,409 @@
1
+ #include "duckdb/optimizer/join_order/query_graph_manager.hpp"
2
+ #include "duckdb/planner/logical_operator.hpp"
3
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
4
+ #include "duckdb/common/enums/join_type.hpp"
5
+ #include "duckdb/planner/operator/list.hpp"
6
+ #include "duckdb/planner/expression_iterator.hpp"
7
+ #include "duckdb/planner/expression/bound_comparison_expression.hpp"
8
+ #include "duckdb/common/printer.hpp"
9
+ #include "duckdb/common/string_util.hpp"
10
+ #include "duckdb/common/assert.hpp"
11
+
12
+ namespace duckdb {
13
+
14
+ //! Returns true if A and B are disjoint, false otherwise
15
+ template <class T>
16
+ static bool Disjoint(const unordered_set<T> &a, const unordered_set<T> &b) {
17
+ return std::all_of(a.begin(), a.end(), [&b](typename std::unordered_set<T>::const_reference entry) {
18
+ return b.find(entry) == b.end();
19
+ });
20
+ }
21
+
22
+ bool QueryGraphManager::Build(LogicalOperator &op) {
23
+ vector<reference<LogicalOperator>> filter_operators;
24
+ // have the relation manager extract the join relations and create a reference list of all the
25
+ // filter operators.
26
+ auto can_reorder = relation_manager.ExtractJoinRelations(op, filter_operators);
27
+ auto num_relations = relation_manager.NumRelations();
28
+ if (num_relations <= 1 || !can_reorder) {
29
+ // nothing to optimize/reorder
30
+ return false;
31
+ }
32
+ // extract the edges of the hypergraph, creating a list of filters and their associated bindings.
33
+ filters_and_bindings = relation_manager.ExtractEdges(op, filter_operators, set_manager);
34
+ // Create the query_graph hyper edges
35
+ CreateHyperGraphEdges();
36
+ return true;
37
+ }
38
+
39
+ void QueryGraphManager::GetColumnBinding(Expression &expression, ColumnBinding &binding) {
40
+ if (expression.type == ExpressionType::BOUND_COLUMN_REF) {
41
+ // Here you have a filter on a single column in a table. Return a binding for the column
42
+ // being filtered on so the filter estimator knows what HLL count to pull
43
+ auto &colref = expression.Cast<BoundColumnRefExpression>();
44
+ D_ASSERT(colref.depth == 0);
45
+ D_ASSERT(colref.binding.table_index != DConstants::INVALID_INDEX);
46
+ // map the base table index to the relation index used by the JoinOrderOptimizer
47
+ D_ASSERT(relation_manager.relation_mapping.find(colref.binding.table_index) !=
48
+ relation_manager.relation_mapping.end());
49
+ binding =
50
+ ColumnBinding(relation_manager.relation_mapping[colref.binding.table_index], colref.binding.column_index);
51
+ }
52
+ // TODO: handle inequality filters with functions.
53
+ ExpressionIterator::EnumerateChildren(expression, [&](Expression &expr) { GetColumnBinding(expr, binding); });
54
+ }
55
+
56
+ const vector<unique_ptr<FilterInfo>> &QueryGraphManager::GetFilterBindings() const {
57
+ return filters_and_bindings;
58
+ }
59
+
60
+ static unique_ptr<LogicalOperator> PushFilter(unique_ptr<LogicalOperator> node, unique_ptr<Expression> expr) {
61
+ // push an expression into a filter
62
+ // first check if we have any filter to push it into
63
+ if (node->type != LogicalOperatorType::LOGICAL_FILTER) {
64
+ // we don't, we need to create one
65
+ auto filter = make_uniq<LogicalFilter>();
66
+ filter->children.push_back(std::move(node));
67
+ node = std::move(filter);
68
+ }
69
+ // push the filter into the LogicalFilter
70
+ D_ASSERT(node->type == LogicalOperatorType::LOGICAL_FILTER);
71
+ auto &filter = node->Cast<LogicalFilter>();
72
+ filter.expressions.push_back(std::move(expr));
73
+ return node;
74
+ }
75
+
76
+ void QueryGraphManager::CreateHyperGraphEdges() {
77
+ // create potential edges from the comparisons
78
+ for (auto &filter_info : filters_and_bindings) {
79
+ auto &filter = filter_info->filter;
80
+ // now check if it can be used as a join predicate
81
+ if (filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON) {
82
+ auto &comparison = filter->Cast<BoundComparisonExpression>();
83
+ // extract the bindings that are required for the left and right side of the comparison
84
+ unordered_set<idx_t> left_bindings, right_bindings;
85
+ relation_manager.ExtractBindings(*comparison.left, left_bindings);
86
+ relation_manager.ExtractBindings(*comparison.right, right_bindings);
87
+ GetColumnBinding(*comparison.left, filter_info->left_binding);
88
+ GetColumnBinding(*comparison.right, filter_info->right_binding);
89
+ if (!left_bindings.empty() && !right_bindings.empty()) {
90
+ // both the left and the right side have bindings
91
+ // first create the relation sets, if they do not exist
92
+ filter_info->left_set = &set_manager.GetJoinRelation(left_bindings);
93
+ filter_info->right_set = &set_manager.GetJoinRelation(right_bindings);
94
+ // we can only create a meaningful edge if the sets are not exactly the same
95
+ if (filter_info->left_set != filter_info->right_set) {
96
+ // check if the sets are disjoint
97
+ if (Disjoint(left_bindings, right_bindings)) {
98
+ // they are disjoint, we only need to create one set of edges in the join graph
99
+ query_graph.CreateEdge(*filter_info->left_set, *filter_info->right_set, filter_info);
100
+ query_graph.CreateEdge(*filter_info->right_set, *filter_info->left_set, filter_info);
101
+ } else {
102
+ continue;
103
+ }
104
+ continue;
105
+ }
106
+ }
107
+ }
108
+ }
109
+ }
110
+
111
+ static unique_ptr<LogicalOperator> ExtractJoinRelation(unique_ptr<SingleJoinRelation> &rel) {
112
+ auto &children = rel->parent->children;
113
+ for (idx_t i = 0; i < children.size(); i++) {
114
+ if (children[i].get() == &rel->op) {
115
+ // found it! take ownership o/**/f it from the parent
116
+ auto result = std::move(children[i]);
117
+ children.erase(children.begin() + i);
118
+ return result;
119
+ }
120
+ }
121
+ throw Exception("Could not find relation in parent node (?)");
122
+ }
123
+
124
+ unique_ptr<LogicalOperator> QueryGraphManager::Reconstruct(unique_ptr<LogicalOperator> plan, JoinNode &node) {
125
+ return RewritePlan(std::move(plan), node);
126
+ }
127
+
128
+ GenerateJoinRelation QueryGraphManager::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations,
129
+ JoinNode &node) {
130
+ optional_ptr<JoinRelationSet> left_node;
131
+ optional_ptr<JoinRelationSet> right_node;
132
+ optional_ptr<JoinRelationSet> result_relation;
133
+ unique_ptr<LogicalOperator> result_operator;
134
+ if (node.left && node.right && node.info) {
135
+ // generate the left and right children
136
+ auto left = GenerateJoins(extracted_relations, *node.left);
137
+ auto right = GenerateJoins(extracted_relations, *node.right);
138
+
139
+ if (node.info->filters.empty()) {
140
+ // no filters, create a cross product
141
+ result_operator = LogicalCrossProduct::Create(std::move(left.op), std::move(right.op));
142
+ } else {
143
+ // we have filters, create a join node
144
+ auto join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
145
+ // Here we optimize build side probe side. Our build side is the right side
146
+ // So the right plans should have lower cardinalities.
147
+ join->children.push_back(std::move(left.op));
148
+ join->children.push_back(std::move(right.op));
149
+
150
+ // set the join conditions from the join node
151
+ for (auto &filter_ref : node.info->filters) {
152
+ auto f = filter_ref.get();
153
+ // extract the filter from the operator it originally belonged to
154
+ D_ASSERT(filters_and_bindings[f->filter_index]->filter);
155
+ auto &filter_and_binding = filters_and_bindings.at(f->filter_index);
156
+ auto condition = std::move(filter_and_binding->filter);
157
+ // now create the actual join condition
158
+ D_ASSERT((JoinRelationSet::IsSubset(*left.set, *f->left_set) &&
159
+ JoinRelationSet::IsSubset(*right.set, *f->right_set)) ||
160
+ (JoinRelationSet::IsSubset(*left.set, *f->right_set) &&
161
+ JoinRelationSet::IsSubset(*right.set, *f->left_set)));
162
+ JoinCondition cond;
163
+ D_ASSERT(condition->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
164
+ auto &comparison = condition->Cast<BoundComparisonExpression>();
165
+
166
+ // we need to figure out which side is which by looking at the relations available to us
167
+ bool invert = !JoinRelationSet::IsSubset(*left.set, *f->left_set);
168
+ cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
169
+ cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
170
+ cond.comparison = condition->type;
171
+
172
+ if (invert) {
173
+ // reverse comparison expression if we reverse the order of the children
174
+ cond.comparison = FlipComparisonExpression(cond.comparison);
175
+ }
176
+ join->conditions.push_back(std::move(cond));
177
+ }
178
+ D_ASSERT(!join->conditions.empty());
179
+ result_operator = std::move(join);
180
+ }
181
+ left_node = left.set;
182
+ right_node = right.set;
183
+ result_relation = &set_manager.Union(*left.set, *right.set);
184
+ } else {
185
+ // base node, get the entry from the list of extracted relations
186
+ D_ASSERT(node.set.count == 1);
187
+ D_ASSERT(extracted_relations[node.set.relations[0]]);
188
+ result_relation = &node.set;
189
+ result_operator = std::move(extracted_relations[node.set.relations[0]]);
190
+ }
191
+ // TODO: this is where estimated properties start coming into play.
192
+ // when creating the result operator, we should ask the cost model and cardinality estimator what
193
+ // the cost and cardinality are
194
+ // result_operator->estimated_props = node.estimated_props->Copy();
195
+ result_operator->estimated_cardinality = node.cardinality;
196
+ result_operator->has_estimated_cardinality = true;
197
+ if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER &&
198
+ result_operator->children[0]->type == LogicalOperatorType::LOGICAL_GET) {
199
+ // FILTER on top of GET, add estimated properties to both
200
+ // auto &filter_props = *result_operator->estimated_props;
201
+ auto &child_operator = *result_operator->children[0];
202
+ child_operator.estimated_cardinality = node.cardinality;
203
+ child_operator.has_estimated_cardinality = true;
204
+ }
205
+ // check if we should do a pushdown on this node
206
+ // basically, any remaining filter that is a subset of the current relation will no longer be used in joins
207
+ // hence we should push it here
208
+ for (auto &filter_info : filters_and_bindings) {
209
+ // check if the filter has already been extracted
210
+ auto &info = *filter_info;
211
+ if (filters_and_bindings[info.filter_index]->filter) {
212
+ // now check if the filter is a subset of the current relation
213
+ // note that infos with an empty relation set are a special case and we do not push them down
214
+ if (info.set.count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) {
215
+ auto &filter_and_binding = filters_and_bindings[info.filter_index];
216
+ auto filter = std::move(filter_and_binding->filter);
217
+ // if it is, we can push the filter
218
+ // we can push it either into a join or as a filter
219
+ // check if we are in a join or in a base table
220
+ if (!left_node || !info.left_set) {
221
+ // base table or non-comparison expression, push it as a filter
222
+ result_operator = PushFilter(std::move(result_operator), std::move(filter));
223
+ continue;
224
+ }
225
+ // the node below us is a join or cross product and the expression is a comparison
226
+ // check if the nodes can be split up into left/right
227
+ bool found_subset = false;
228
+ bool invert = false;
229
+ if (JoinRelationSet::IsSubset(*left_node, *info.left_set) &&
230
+ JoinRelationSet::IsSubset(*right_node, *info.right_set)) {
231
+ found_subset = true;
232
+ } else if (JoinRelationSet::IsSubset(*right_node, *info.left_set) &&
233
+ JoinRelationSet::IsSubset(*left_node, *info.right_set)) {
234
+ invert = true;
235
+ found_subset = true;
236
+ }
237
+ if (!found_subset) {
238
+ // could not be split up into left/right
239
+ result_operator = PushFilter(std::move(result_operator), std::move(filter));
240
+ continue;
241
+ }
242
+ // create the join condition
243
+ JoinCondition cond;
244
+ D_ASSERT(filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
245
+ auto &comparison = filter->Cast<BoundComparisonExpression>();
246
+ // we need to figure out which side is which by looking at the relations available to us
247
+ cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
248
+ cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
249
+ cond.comparison = comparison.type;
250
+ if (invert) {
251
+ // reverse comparison expression if we reverse the order of the children
252
+ cond.comparison = FlipComparisonExpression(comparison.type);
253
+ }
254
+ // now find the join to push it into
255
+ auto node = result_operator.get();
256
+ if (node->type == LogicalOperatorType::LOGICAL_FILTER) {
257
+ node = node->children[0].get();
258
+ }
259
+ if (node->type == LogicalOperatorType::LOGICAL_CROSS_PRODUCT) {
260
+ // turn into comparison join
261
+ auto comp_join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
262
+ comp_join->children.push_back(std::move(node->children[0]));
263
+ comp_join->children.push_back(std::move(node->children[1]));
264
+ comp_join->conditions.push_back(std::move(cond));
265
+ if (node == result_operator.get()) {
266
+ result_operator = std::move(comp_join);
267
+ } else {
268
+ D_ASSERT(result_operator->type == LogicalOperatorType::LOGICAL_FILTER);
269
+ result_operator->children[0] = std::move(comp_join);
270
+ }
271
+ } else {
272
+ D_ASSERT(node->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
273
+ node->type == LogicalOperatorType::LOGICAL_ASOF_JOIN);
274
+ auto &comp_join = node->Cast<LogicalComparisonJoin>();
275
+ comp_join.conditions.push_back(std::move(cond));
276
+ }
277
+ }
278
+ }
279
+ }
280
+ auto result = GenerateJoinRelation(result_relation, std::move(result_operator));
281
+ return result;
282
+ }
283
+
284
+ const QueryGraphEdges &QueryGraphManager::GetQueryGraphEdges() const {
285
+ return query_graph;
286
+ }
287
+
288
+ void QueryGraphManager::CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right) {
289
+ query_graph.CreateEdge(left, right, nullptr);
290
+ query_graph.CreateEdge(right, left, nullptr);
291
+ }
292
+
293
+ unique_ptr<LogicalOperator> QueryGraphManager::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node) {
294
+ // now we have to rewrite the plan
295
+ bool root_is_join = plan->children.size() > 1;
296
+
297
+ // first we will extract all relations from the main plan
298
+ vector<unique_ptr<LogicalOperator>> extracted_relations;
299
+ extracted_relations.reserve(relation_manager.NumRelations());
300
+ for (auto &relation : relation_manager.GetRelations()) {
301
+ extracted_relations.push_back(ExtractJoinRelation(relation));
302
+ }
303
+
304
+ // now we generate the actual joins
305
+ auto join_tree = GenerateJoins(extracted_relations, node);
306
+ // perform the final pushdown of remaining filters
307
+ for (auto &filter : filters_and_bindings) {
308
+ // check if the filter has already been extracted
309
+ if (filter->filter) {
310
+ // if not we need to push it
311
+ join_tree.op = PushFilter(std::move(join_tree.op), std::move(filter->filter));
312
+ }
313
+ }
314
+
315
+ // find the first join in the relation to know where to place this node
316
+ if (root_is_join) {
317
+ // first node is the join, return it immediately
318
+ return std::move(join_tree.op);
319
+ }
320
+ D_ASSERT(plan->children.size() == 1);
321
+ // have to move up through the relations
322
+ auto op = plan.get();
323
+ auto parent = plan.get();
324
+ while (op->type != LogicalOperatorType::LOGICAL_CROSS_PRODUCT &&
325
+ op->type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN &&
326
+ op->type != LogicalOperatorType::LOGICAL_ASOF_JOIN) {
327
+ D_ASSERT(op->children.size() == 1);
328
+ parent = op;
329
+ op = op->children[0].get();
330
+ }
331
+ // have to replace at this node
332
+ parent->children[0] = std::move(join_tree.op);
333
+ return plan;
334
+ }
335
+
336
+ bool QueryGraphManager::LeftCardLessThanRight(LogicalOperator &op) {
337
+ D_ASSERT(op.children.size() == 2);
338
+ if (op.children[0]->has_estimated_cardinality && op.children[1]->has_estimated_cardinality) {
339
+ return op.children[0]->estimated_cardinality < op.children[1]->estimated_cardinality;
340
+ }
341
+ return op.children[0]->EstimateCardinality(context) < op.children[1]->EstimateCardinality(context);
342
+ }
343
+
344
+ unique_ptr<LogicalOperator> QueryGraphManager::LeftRightOptimizations(unique_ptr<LogicalOperator> input_op) {
345
+ auto op = input_op.get();
346
+ // pass through single child operators
347
+ while (!op->children.empty()) {
348
+ if (op->children.size() == 2) {
349
+ switch (op->type) {
350
+ case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
351
+ auto &join = op->Cast<LogicalComparisonJoin>();
352
+ if (join.join_type == JoinType::INNER) {
353
+ if (LeftCardLessThanRight(*op)) {
354
+ std::swap(op->children[0], op->children[1]);
355
+ for (auto &cond : join.conditions) {
356
+ std::swap(cond.left, cond.right);
357
+ cond.comparison = FlipComparisonExpression(cond.comparison);
358
+ }
359
+ }
360
+ } else if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
361
+ auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
362
+ auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
363
+ if (rhs_cardinality > lhs_cardinality * 2) {
364
+ join.join_type = JoinType::RIGHT;
365
+ std::swap(join.children[0], join.children[1]);
366
+ for (auto &cond : join.conditions) {
367
+ std::swap(cond.left, cond.right);
368
+ cond.comparison = FlipComparisonExpression(cond.comparison);
369
+ }
370
+ }
371
+ }
372
+ break;
373
+ }
374
+ case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: {
375
+ if (LeftCardLessThanRight(*op)) {
376
+ std::swap(op->children[0], op->children[1]);
377
+ }
378
+ break;
379
+ }
380
+ case LogicalOperatorType::LOGICAL_ANY_JOIN: {
381
+ auto &join = op->Cast<LogicalAnyJoin>();
382
+ if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
383
+ auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
384
+ auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
385
+ if (rhs_cardinality > lhs_cardinality * 2) {
386
+ join.join_type = JoinType::RIGHT;
387
+ std::swap(join.children[0], join.children[1]);
388
+ }
389
+ } else if (join.join_type == JoinType::INNER && LeftCardLessThanRight(*op)) {
390
+ std::swap(join.children[0], join.children[1]);
391
+ }
392
+ break;
393
+ }
394
+ default:
395
+ break;
396
+ }
397
+ op->children[0] = LeftRightOptimizations(std::move(op->children[0]));
398
+ op->children[1] = LeftRightOptimizations(std::move(op->children[1]));
399
+ // break from while loop
400
+ break;
401
+ }
402
+ if (op->children.size() == 1) {
403
+ op = op->children[0].get();
404
+ }
405
+ }
406
+ return input_op;
407
+ }
408
+
409
+ } // namespace duckdb