duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
  5. package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
  6. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  7. package/src/duckdb/extension/json/json_serializer.cpp +11 -10
  8. package/src/duckdb/extension/json/serialize_json.cpp +44 -44
  9. package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
  10. package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
  11. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  12. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  13. package/src/duckdb/src/common/enum_util.cpp +5 -0
  14. package/src/duckdb/src/common/extra_type_info.cpp +2 -2
  15. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
  16. package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
  17. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  18. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  19. package/src/duckdb/src/common/types/value.cpp +33 -33
  20. package/src/duckdb/src/common/types/vector.cpp +20 -20
  21. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
  22. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
  23. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
  24. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
  25. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  26. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  27. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  28. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  29. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  30. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  31. package/src/duckdb/src/function/table/read_csv.cpp +4 -4
  32. package/src/duckdb/src/function/table/table_scan.cpp +14 -14
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  34. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
  38. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
  39. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
  40. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
  41. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
  42. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  43. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  44. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
  45. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  50. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  51. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  52. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  53. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  54. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  55. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  56. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  57. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  58. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  59. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  60. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  61. package/src/duckdb/src/include/duckdb.h +11 -1
  62. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  63. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  64. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  65. package/src/duckdb/src/main/relation.cpp +4 -4
  66. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  67. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  68. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  70. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  71. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  72. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  73. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  74. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  75. package/src/duckdb/src/parallel/executor.cpp +6 -0
  76. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  77. package/src/duckdb/src/parser/parser.cpp +18 -3
  78. package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
  79. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  80. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
  81. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
  82. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
  83. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
  84. package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
  85. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
  86. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
  87. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
  88. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
  89. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
  90. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
  91. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
  92. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
  93. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
  94. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
  95. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
  96. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
  97. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
  98. package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
  99. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  100. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -13,11 +13,28 @@ AggregateRelation::AggregateRelation(shared_ptr<Relation> child_p,
13
13
  context.GetContext()->TryBindRelation(*this, this->columns);
14
14
  }
15
15
 
16
+ AggregateRelation::AggregateRelation(shared_ptr<Relation> child_p,
17
+ vector<unique_ptr<ParsedExpression>> parsed_expressions, GroupByNode groups_p)
18
+ : Relation(child_p->context, RelationType::AGGREGATE_RELATION), expressions(std::move(parsed_expressions)),
19
+ groups(std::move(groups_p)), child(std::move(child_p)) {
20
+ // bind the expressions
21
+ context.GetContext()->TryBindRelation(*this, this->columns);
22
+ }
23
+
16
24
  AggregateRelation::AggregateRelation(shared_ptr<Relation> child_p,
17
25
  vector<unique_ptr<ParsedExpression>> parsed_expressions,
18
26
  vector<unique_ptr<ParsedExpression>> groups_p)
19
27
  : Relation(child_p->context, RelationType::AGGREGATE_RELATION), expressions(std::move(parsed_expressions)),
20
- groups(std::move(groups_p)), child(std::move(child_p)) {
28
+ child(std::move(child_p)) {
29
+ if (!groups_p.empty()) {
30
+ // explicit groups provided: use standard handling
31
+ GroupingSet grouping_set;
32
+ for (idx_t i = 0; i < groups_p.size(); i++) {
33
+ groups.group_expressions.push_back(std::move(groups_p[i]));
34
+ grouping_set.insert(i);
35
+ }
36
+ groups.grouping_sets.push_back(std::move(grouping_set));
37
+ }
21
38
  // bind the expressions
22
39
  context.GetContext()->TryBindRelation(*this, this->columns);
23
40
  }
@@ -39,16 +56,9 @@ unique_ptr<QueryNode> AggregateRelation::GetQueryNode() {
39
56
  }
40
57
  D_ASSERT(result->type == QueryNodeType::SELECT_NODE);
41
58
  auto &select_node = result->Cast<SelectNode>();
42
- if (!groups.empty()) {
43
- // explicit groups provided: use standard handling
59
+ if (!groups.group_expressions.empty()) {
44
60
  select_node.aggregate_handling = AggregateHandling::STANDARD_HANDLING;
45
- select_node.groups.group_expressions.clear();
46
- GroupingSet grouping_set;
47
- for (idx_t i = 0; i < groups.size(); i++) {
48
- select_node.groups.group_expressions.push_back(groups[i]->Copy());
49
- grouping_set.insert(i);
50
- }
51
- select_node.groups.grouping_sets.push_back(std::move(grouping_set));
61
+ select_node.groups = groups.Copy();
52
62
  } else {
53
63
  // no groups provided: automatically figure out groups (if any)
54
64
  select_node.aggregate_handling = AggregateHandling::FORCE_AGGREGATES;
@@ -169,7 +169,7 @@ shared_ptr<Relation> Relation::Aggregate(const string &aggregate_list) {
169
169
 
170
170
  shared_ptr<Relation> Relation::Aggregate(const string &aggregate_list, const string &group_list) {
171
171
  auto expression_list = Parser::ParseExpressionList(aggregate_list, context.GetContext()->GetParserOptions());
172
- auto groups = Parser::ParseExpressionList(group_list, context.GetContext()->GetParserOptions());
172
+ auto groups = Parser::ParseGroupByList(group_list, context.GetContext()->GetParserOptions());
173
173
  return make_shared<AggregateRelation>(shared_from_this(), std::move(expression_list), std::move(groups));
174
174
  }
175
175
 
@@ -179,9 +179,9 @@ shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates) {
179
179
  }
180
180
 
181
181
  shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates, const vector<string> &groups) {
182
- auto aggregate_list = StringListToExpressionList(*context.GetContext(), aggregates);
183
- auto group_list = StringListToExpressionList(*context.GetContext(), groups);
184
- return make_shared<AggregateRelation>(shared_from_this(), std::move(aggregate_list), std::move(group_list));
182
+ auto aggregate_list = StringUtil::Join(aggregates, ", ");
183
+ auto group_list = StringUtil::Join(groups, ", ");
184
+ return this->Aggregate(aggregate_list, group_list);
185
185
  }
186
186
 
187
187
  string Relation::GetAlias() {
@@ -2,22 +2,15 @@
2
2
  #include "duckdb/optimizer/join_order/join_node.hpp"
3
3
  #include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
4
4
  #include "duckdb/planner/filter/conjunction_filter.hpp"
5
- #include "duckdb/planner/filter/constant_filter.hpp"
6
5
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
7
6
  #include "duckdb/planner/operator/logical_get.hpp"
8
7
  #include "duckdb/storage/data_table.hpp"
9
8
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
10
-
11
- #include <cmath>
9
+ #include "duckdb/common/printer.hpp"
10
+ #include "duckdb/common/limits.hpp"
12
11
 
13
12
  namespace duckdb {
14
13
 
15
- static optional_ptr<TableCatalogEntry> GetCatalogTableEntry(LogicalOperator &op) {
16
- D_ASSERT(op.type == LogicalOperatorType::LOGICAL_GET);
17
- auto &get = op.Cast<LogicalGet>();
18
- return get.GetTable();
19
- }
20
-
21
14
  // The filter was made on top of a logical sample or other projection,
22
15
  // but no specific columns are referenced. See issue 4978 number 4.
23
16
  bool CardinalityEstimator::EmptyFilter(FilterInfo &filter_info) {
@@ -36,8 +29,11 @@ void CardinalityEstimator::AddRelationTdom(FilterInfo &filter_info) {
36
29
  return;
37
30
  }
38
31
  }
32
+
39
33
  auto key = ColumnBinding(filter_info.left_binding.table_index, filter_info.left_binding.column_index);
40
- relations_to_tdoms.emplace_back(column_binding_set_t({key}));
34
+ RelationsToTDom new_r2tdom(column_binding_set_t({key}));
35
+
36
+ relations_to_tdoms.emplace_back(new_r2tdom);
41
37
  }
42
38
 
43
39
  bool CardinalityEstimator::SingleColumnFilter(FilterInfo &filter_info) {
@@ -72,13 +68,17 @@ vector<idx_t> CardinalityEstimator::DetermineMatchingEquivalentSets(FilterInfo *
72
68
  void CardinalityEstimator::AddToEquivalenceSets(FilterInfo *filter_info, vector<idx_t> matching_equivalent_sets) {
73
69
  D_ASSERT(matching_equivalent_sets.size() <= 2);
74
70
  if (matching_equivalent_sets.size() > 1) {
75
- // an equivalence relation is connecting to sets of equivalence relations
71
+ // an equivalence relation is connecting two sets of equivalence relations
76
72
  // so push all relations from the second set into the first. Later we will delete
77
73
  // the second set.
78
74
  for (ColumnBinding i : relations_to_tdoms.at(matching_equivalent_sets[1]).equivalent_relations) {
79
75
  relations_to_tdoms.at(matching_equivalent_sets[0]).equivalent_relations.insert(i);
80
76
  }
77
+ for (auto &column_name : relations_to_tdoms.at(matching_equivalent_sets[1]).column_names) {
78
+ relations_to_tdoms.at(matching_equivalent_sets[0]).column_names.push_back(column_name);
79
+ }
81
80
  relations_to_tdoms.at(matching_equivalent_sets[1]).equivalent_relations.clear();
81
+ relations_to_tdoms.at(matching_equivalent_sets[1]).column_names.clear();
82
82
  relations_to_tdoms.at(matching_equivalent_sets[0]).filters.push_back(filter_info);
83
83
  // add all values of one set to the other, delete the empty one
84
84
  } else if (matching_equivalent_sets.size() == 1) {
@@ -95,22 +95,7 @@ void CardinalityEstimator::AddToEquivalenceSets(FilterInfo *filter_info, vector<
95
95
  }
96
96
  }
97
97
 
98
- void CardinalityEstimator::AddRelationToColumnMapping(ColumnBinding key, ColumnBinding value) {
99
- relation_column_to_original_column[key] = value;
100
- }
101
-
102
- void CardinalityEstimator::CopyRelationMap(column_binding_map_t<ColumnBinding> &child_binding_map) {
103
- for (auto &binding_map : relation_column_to_original_column) {
104
- D_ASSERT(child_binding_map.find(binding_map.first) == child_binding_map.end());
105
- child_binding_map[binding_map.first] = binding_map.second;
106
- }
107
- }
108
-
109
- void CardinalityEstimator::AddColumnToRelationMap(idx_t table_index, idx_t column_index) {
110
- relation_attributes[table_index].columns.insert(column_index);
111
- }
112
-
113
- void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>> &filter_infos) {
98
+ void CardinalityEstimator::InitEquivalentRelations(const vector<unique_ptr<FilterInfo>> &filter_infos) {
114
99
  // For each filter, we fill keep track of the index of the equivalent relation set
115
100
  // the left and right relation needs to be added to.
116
101
  for (auto &filter : filter_infos) {
@@ -128,46 +113,15 @@ void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>
128
113
  auto matching_equivalent_sets = DetermineMatchingEquivalentSets(filter.get());
129
114
  AddToEquivalenceSets(filter.get(), matching_equivalent_sets);
130
115
  }
116
+ RemoveEmptyTotalDomains();
131
117
  }
132
118
 
133
- void CardinalityEstimator::VerifySymmetry(JoinNode &result, JoinNode &entry) {
134
- if (result.GetCardinality<double>() != entry.GetCardinality<double>()) {
135
- // Currently it's possible that some entries are cartesian joins.
136
- // When this is the case, you don't always have symmetry, but
137
- // if the cost of the result is less, then just assure the cardinality
138
- // is also less, then you have the same effect of symmetry.
139
- D_ASSERT(ceil(result.GetCardinality<double>()) <= ceil(entry.GetCardinality<double>()) ||
140
- floor(result.GetCardinality<double>()) <= floor(entry.GetCardinality<double>()));
141
- }
142
- }
143
-
144
- void CardinalityEstimator::InitTotalDomains() {
119
+ void CardinalityEstimator::RemoveEmptyTotalDomains() {
145
120
  auto remove_start = std::remove_if(relations_to_tdoms.begin(), relations_to_tdoms.end(),
146
121
  [](RelationsToTDom &r_2_tdom) { return r_2_tdom.equivalent_relations.empty(); });
147
122
  relations_to_tdoms.erase(remove_start, relations_to_tdoms.end());
148
123
  }
149
124
 
150
- double CardinalityEstimator::ComputeCost(JoinNode &left, JoinNode &right, double expected_cardinality) {
151
- return expected_cardinality + left.GetCost() + right.GetCost();
152
- }
153
-
154
- double CardinalityEstimator::EstimateCrossProduct(const JoinNode &left, const JoinNode &right) {
155
- // need to explicity use double here, otherwise auto converts it to an int, then
156
- // there is an autocast in the return.
157
- if (left.GetCardinality<double>() >= (NumericLimits<double>::Maximum() / right.GetCardinality<double>())) {
158
- return NumericLimits<double>::Maximum();
159
- }
160
- return left.GetCardinality<double>() * right.GetCardinality<double>();
161
- }
162
-
163
- void CardinalityEstimator::AddRelationColumnMapping(LogicalGet &get, idx_t relation_id) {
164
- for (idx_t it = 0; it < get.column_ids.size(); it++) {
165
- auto key = ColumnBinding(relation_id, it);
166
- auto value = ColumnBinding(get.table_index, get.column_ids[it]);
167
- AddRelationToColumnMapping(key, value);
168
- }
169
- }
170
-
171
125
  void UpdateDenom(Subgraph2Denominator &relation_2_denom, RelationsToTDom &relation_to_tdom) {
172
126
  relation_2_denom.denom *= relation_to_tdom.has_tdom_hll ? relation_to_tdom.tdom_hll : relation_to_tdom.tdom_no_hll;
173
127
  }
@@ -187,13 +141,22 @@ void FindSubgraphMatchAndMerge(Subgraph2Denominator &merge_to, idx_t find_me,
187
141
  }
188
142
  }
189
143
 
144
+ template <>
190
145
  double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) {
146
+
147
+ if (relation_set_2_cardinality.find(new_set.ToString()) != relation_set_2_cardinality.end()) {
148
+ return relation_set_2_cardinality[new_set.ToString()].cardinality_before_filters;
149
+ }
191
150
  double numerator = 1;
192
151
  unordered_set<idx_t> actual_set;
152
+
193
153
  for (idx_t i = 0; i < new_set.count; i++) {
194
- numerator *= relation_attributes[new_set.relations[i]].cardinality;
154
+ auto &single_node_set = set_manager.GetJoinRelation(new_set.relations[i]);
155
+ auto card_helper = relation_set_2_cardinality[single_node_set.ToString()];
156
+ numerator *= card_helper.cardinality_before_filters;
195
157
  actual_set.insert(new_set.relations[i]);
196
158
  }
159
+
197
160
  vector<Subgraph2Denominator> subgraphs;
198
161
  bool done = false;
199
162
  bool found_match = false;
@@ -279,77 +242,26 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set
279
242
  // TODO: It's possible cross-products were added and are not present in the filters in the relation_2_tdom
280
243
  // structures. When that's the case, multiply the denom structures that have no intersection
281
244
  for (auto &match : subgraphs) {
282
- // It's possible that in production, one of the D_ASSERTS above will fail and not all subgraphs
283
- // were connected. When this happens, just use the largest denominator of all the subgraphs.
284
- if (match.denom > denom) {
285
- denom = match.denom;
286
- }
245
+ denom *= match.denom;
287
246
  }
288
247
  // can happen if a table has cardinality 0, or a tdom is set to 0
289
248
  if (denom == 0) {
290
249
  denom = 1;
291
250
  }
292
- return numerator / denom;
251
+ auto result = numerator / denom;
252
+ auto new_entry = CardinalityHelper((double)result, 1);
253
+ relation_set_2_cardinality[new_set.ToString()] = new_entry;
254
+ return result;
293
255
  }
294
256
 
295
- static bool IsLogicalFilter(LogicalOperator &op) {
296
- return op.type == LogicalOperatorType::LOGICAL_FILTER;
297
- }
298
-
299
- static optional_ptr<LogicalGet> GetLogicalGet(LogicalOperator &op, idx_t table_index = DConstants::INVALID_INDEX) {
300
- optional_ptr<LogicalGet> get;
301
- switch (op.type) {
302
- case LogicalOperatorType::LOGICAL_GET:
303
- get = &op.Cast<LogicalGet>();
304
- break;
305
- case LogicalOperatorType::LOGICAL_FILTER:
306
- get = GetLogicalGet(*op.children.at(0), table_index);
307
- break;
308
- case LogicalOperatorType::LOGICAL_PROJECTION:
309
- get = GetLogicalGet(*op.children.at(0), table_index);
310
- break;
311
- case LogicalOperatorType::LOGICAL_ASOF_JOIN:
312
- case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
313
- auto &join = op.Cast<LogicalComparisonJoin>();
314
- // We should never be calling GetLogicalGet without a valid table_index.
315
- // We are attempting to get the catalog table for a relation (for statistics/cardinality estimation)
316
- // A logical join means there is a non-reorderable relation in the join plan. This means we need
317
- // to know the exact table index to return.
318
- D_ASSERT(table_index != DConstants::INVALID_INDEX);
319
- if (join.join_type == JoinType::MARK || join.join_type == JoinType::LEFT) {
320
- auto &left_child = *join.children.at(0);
321
- get = GetLogicalGet(left_child, table_index);
322
- if (get && get->table_index == table_index) {
323
- return get;
324
- }
325
- auto &right_child = *join.children.at(1);
326
- get = GetLogicalGet(right_child, table_index);
327
- if (get && get->table_index == table_index) {
328
- return get;
329
- }
330
- }
331
- break;
332
- }
333
- default:
334
- // return null pointer, maybe there is no logical get under this child
335
- break;
336
- }
337
- return get;
338
- }
339
-
340
- void CardinalityEstimator::MergeBindings(idx_t binding_index, idx_t relation_id,
341
- vector<column_binding_map_t<ColumnBinding>> &child_binding_maps) {
342
- for (auto &map_set : child_binding_maps) {
343
- for (auto &mapping : map_set) {
344
- ColumnBinding relation_bindings = mapping.first;
345
- ColumnBinding actual_bindings = mapping.second;
346
-
347
- if (actual_bindings.table_index == binding_index) {
348
- auto key = ColumnBinding(relation_id, relation_bindings.column_index);
349
- AddRelationToColumnMapping(key, actual_bindings);
350
- }
351
- }
257
+ template <>
258
+ idx_t CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) {
259
+ auto cardinality_as_double = EstimateCardinalityWithSet<double>(new_set);
260
+ auto max = NumericLimits<idx_t>::Maximum();
261
+ if (cardinality_as_double > max) {
262
+ return max;
352
263
  }
264
+ return (idx_t)cardinality_as_double;
353
265
  }
354
266
 
355
267
  bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
@@ -365,236 +277,78 @@ bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
365
277
  return a.tdom_no_hll > b.tdom_no_hll;
366
278
  }
367
279
 
368
- void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> &node_ops,
369
- vector<unique_ptr<FilterInfo>> &filter_infos) {
370
- InitEquivalentRelations(filter_infos);
371
- InitTotalDomains();
372
- for (idx_t i = 0; i < node_ops.size(); i++) {
373
- auto &join_node = *node_ops[i].node;
374
- auto &op = node_ops[i].op;
375
- join_node.SetBaseTableCardinality(op.EstimateCardinality(context));
376
- if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
377
- auto &join = op.Cast<LogicalComparisonJoin>();
378
- if (join.join_type == JoinType::LEFT) {
379
- // If a base op is a Logical Comparison join it is probably a left join,
380
- // so the cost of the larger table is a fine estimate.
381
- // TODO: provide better estimates for cost of mark joins
382
- // MARK joins are used for anti and semi joins, so the cost can conceivably be
383
- // less than the base table cardinality.
384
- join_node.SetCost(join_node.GetBaseTableCardinality());
385
- }
386
- } else if (op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
387
- // AsOf joins have the cardinality of the LHS
388
- join_node.SetCost(join_node.GetBaseTableCardinality());
389
- }
390
- // Total domains can be affected by filters. So we update base table cardinality first
391
- EstimateBaseTableCardinality(join_node, op);
392
- // Then update total domains.
393
- UpdateTotalDomains(join_node, op);
394
- }
280
+ void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr<JoinRelationSet> set, RelationStats &stats) {
281
+ // Get the join relation set
282
+ D_ASSERT(stats.stats_initialized);
283
+ auto relation_cardinality = stats.cardinality;
284
+ auto relation_filter = stats.filter_strength;
285
+
286
+ auto card_helper = CardinalityHelper(relation_cardinality, relation_filter);
287
+ relation_set_2_cardinality[set->ToString()] = card_helper;
288
+
289
+ UpdateTotalDomains(set, stats);
395
290
 
396
291
  // sort relations from greatest tdom to lowest tdom.
397
292
  std::sort(relations_to_tdoms.begin(), relations_to_tdoms.end(), SortTdoms);
398
293
  }
399
294
 
400
- void CardinalityEstimator::UpdateTotalDomains(JoinNode &node, LogicalOperator &op) {
401
- auto relation_id = node.set.relations[0];
402
- relation_attributes[relation_id].cardinality = node.GetCardinality<double>();
295
+ void CardinalityEstimator::UpdateTotalDomains(optional_ptr<JoinRelationSet> set, RelationStats &stats) {
296
+ D_ASSERT(set->count == 1);
297
+ auto relation_id = set->relations[0];
403
298
  //! Initialize the distinct count for all columns used in joins with the current relation.
404
- idx_t distinct_count = node.GetBaseTableCardinality();
405
- optional_ptr<TableCatalogEntry> catalog_table;
299
+ // D_ASSERT(stats.column_distinct_count.size() >= 1);
406
300
 
407
- optional_ptr<LogicalGet> get;
408
- bool get_updated = true;
409
- for (auto &column : relation_attributes[relation_id].columns) {
301
+ for (idx_t i = 0; i < stats.column_distinct_count.size(); i++) {
410
302
  //! for every column used in a filter in the relation, get the distinct count via HLL, or assume it to be
411
303
  //! the cardinality
412
- ColumnBinding key = ColumnBinding(relation_id, column);
413
- auto actual_binding = relation_column_to_original_column.find(key);
414
- // each relation has columns that are either projected or used as filters
415
- // In order to get column statistics we need to make sure the actual binding still
416
- // refers to the same base table relation, as non-reorderable joins may involve 2+
417
- // base table relations and therefore the columns may also refer to 2 different
418
- // base table relations
419
- if (actual_binding != relation_column_to_original_column.end() &&
420
- (!get || get->table_index != actual_binding->second.table_index)) {
421
- get = GetLogicalGet(op, actual_binding->second.table_index);
422
- get_updated = true;
423
- } else {
424
- get_updated = false;
425
- }
426
-
427
- if (get_updated) {
428
- if (get) {
429
- catalog_table = GetCatalogTableEntry(*get);
430
- } else {
431
- catalog_table = nullptr;
432
- }
433
- }
434
-
435
- if (catalog_table && actual_binding != relation_column_to_original_column.end()) {
436
- // Get HLL stats here
437
- auto base_stats = catalog_table->GetStatistics(context, actual_binding->second.column_index);
438
- if (base_stats) {
439
- distinct_count = base_stats->GetDistinctCount();
440
- }
441
-
442
- // HLL has estimation error, distinct_count can't be greater than cardinality of the table before filters
443
- if (distinct_count > node.GetBaseTableCardinality()) {
444
- distinct_count = node.GetBaseTableCardinality();
445
- }
446
- } else {
447
- distinct_count = node.GetBaseTableCardinality();
448
- }
449
304
  // Update the relation_to_tdom set with the estimated distinct count (or tdom) calculated above
305
+ auto key = ColumnBinding(relation_id, i);
450
306
  for (auto &relation_to_tdom : relations_to_tdoms) {
451
307
  column_binding_set_t i_set = relation_to_tdom.equivalent_relations;
452
- if (i_set.count(key) != 1) {
308
+ if (i_set.find(key) == i_set.end()) {
453
309
  continue;
454
310
  }
455
- if (catalog_table) {
456
- if (relation_to_tdom.tdom_hll < distinct_count) {
457
- relation_to_tdom.tdom_hll = distinct_count;
458
- relation_to_tdom.has_tdom_hll = true;
459
- }
460
- if (relation_to_tdom.tdom_no_hll > distinct_count) {
461
- relation_to_tdom.tdom_no_hll = distinct_count;
462
- }
311
+ auto distinct_count = stats.column_distinct_count.at(i);
312
+ if (distinct_count.from_hll && relation_to_tdom.has_tdom_hll) {
313
+ relation_to_tdom.tdom_hll = MaxValue(relation_to_tdom.tdom_hll, distinct_count.distinct_count);
314
+ } else if (distinct_count.from_hll && !relation_to_tdom.has_tdom_hll) {
315
+ relation_to_tdom.has_tdom_hll = true;
316
+ relation_to_tdom.tdom_hll = distinct_count.distinct_count;
463
317
  } else {
464
- // Here we don't have catalog statistics, and the following is how we determine
465
- // the tdom
466
- // 1. If there is any hll data in the equivalence set, use that
467
- // 2. Otherwise, use the table with the smallest cardinality
468
- if (relation_to_tdom.tdom_no_hll > distinct_count && !relation_to_tdom.has_tdom_hll) {
469
- relation_to_tdom.tdom_no_hll = distinct_count;
470
- }
318
+ relation_to_tdom.tdom_no_hll = MinValue(distinct_count.distinct_count, relation_to_tdom.tdom_no_hll);
471
319
  }
472
320
  break;
473
321
  }
474
322
  }
475
323
  }
476
324
 
477
- optional_ptr<TableFilterSet> CardinalityEstimator::GetTableFilters(LogicalOperator &op, idx_t table_index) {
478
- auto get = GetLogicalGet(op, table_index);
479
- return get ? &get->table_filters : nullptr;
480
- }
325
+ // LCOV_EXCL_START
481
326
 
482
- idx_t CardinalityEstimator::InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &filter,
483
- unique_ptr<BaseStatistics> base_stats) {
484
- auto has_equality_filter = false;
485
- auto cardinality_after_filters = cardinality;
486
- for (auto &child_filter : filter.child_filters) {
487
- if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
488
- continue;
489
- }
490
- auto &comparison_filter = child_filter->Cast<ConstantFilter>();
491
- if (comparison_filter.comparison_type != ExpressionType::COMPARE_EQUAL) {
492
- continue;
493
- }
494
- auto column_count = 0;
495
- if (base_stats) {
496
- column_count = base_stats->GetDistinctCount();
497
- }
498
- auto filtered_card = cardinality;
499
- // column_count = 0 when there is no column count (i.e parquet scans)
500
- if (column_count > 0) {
501
- // we want the ceil of cardinality/column_count. We also want to avoid compiler errors
502
- filtered_card = (cardinality + column_count - 1) / column_count;
503
- cardinality_after_filters = filtered_card;
504
- }
505
- if (has_equality_filter) {
506
- cardinality_after_filters = MinValue(filtered_card, cardinality_after_filters);
507
- }
508
- has_equality_filter = true;
509
- }
510
- return cardinality_after_filters;
511
- }
512
-
513
- idx_t CardinalityEstimator::InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
514
- unique_ptr<BaseStatistics> base_stats) {
515
- auto has_equality_filter = false;
516
- auto cardinality_after_filters = cardinality;
517
- for (auto &child_filter : filter.child_filters) {
518
- if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
519
- continue;
520
- }
521
- auto &comparison_filter = child_filter->Cast<ConstantFilter>();
522
- if (comparison_filter.comparison_type == ExpressionType::COMPARE_EQUAL) {
523
- auto column_count = cardinality_after_filters;
524
- if (base_stats) {
525
- column_count = base_stats->GetDistinctCount();
526
- }
527
- auto increment = MaxValue<idx_t>(((cardinality + column_count - 1) / column_count), 1);
528
- if (has_equality_filter) {
529
- cardinality_after_filters += increment;
530
- } else {
531
- cardinality_after_filters = increment;
532
- }
533
- has_equality_filter = true;
327
+ void CardinalityEstimator::AddRelationNamesToTdoms(vector<RelationStats> &stats) {
328
+ #ifdef DEBUG
329
+ for (auto &total_domain : relations_to_tdoms) {
330
+ for (auto &binding : total_domain.equivalent_relations) {
331
+ D_ASSERT(binding.table_index < stats.size());
332
+ D_ASSERT(binding.column_index < stats.at(binding.table_index).column_names.size());
333
+ string column_name = stats.at(binding.table_index).column_names.at(binding.column_index);
334
+ total_domain.column_names.push_back(column_name);
534
335
  }
535
336
  }
536
- D_ASSERT(cardinality_after_filters > 0);
537
- return cardinality_after_filters;
337
+ #endif
538
338
  }
539
339
 
540
- idx_t CardinalityEstimator::InspectTableFilters(idx_t cardinality, LogicalOperator &op, TableFilterSet &table_filters,
541
- idx_t table_index) {
542
- idx_t cardinality_after_filters = cardinality;
543
- auto get = GetLogicalGet(op, table_index);
544
- unique_ptr<BaseStatistics> column_statistics;
545
- for (auto &it : table_filters.filters) {
546
- column_statistics = nullptr;
547
- if (get->bind_data && get->function.name.compare("seq_scan") == 0) {
548
- auto &table_scan_bind_data = get->bind_data->Cast<TableScanBindData>();
549
- column_statistics = get->function.statistics(context, &table_scan_bind_data, it.first);
550
- }
551
- if (it.second->filter_type == TableFilterType::CONJUNCTION_AND) {
552
- auto &filter = it.second->Cast<ConjunctionAndFilter>();
553
- idx_t cardinality_with_and_filter =
554
- InspectConjunctionAND(cardinality, it.first, filter, std::move(column_statistics));
555
- cardinality_after_filters = MinValue(cardinality_after_filters, cardinality_with_and_filter);
556
- } else if (it.second->filter_type == TableFilterType::CONJUNCTION_OR) {
557
- auto &filter = it.second->Cast<ConjunctionOrFilter>();
558
- idx_t cardinality_with_or_filter =
559
- InspectConjunctionOR(cardinality, it.first, filter, std::move(column_statistics));
560
- cardinality_after_filters = MinValue(cardinality_after_filters, cardinality_with_or_filter);
340
+ void CardinalityEstimator::PrintRelationToTdomInfo() {
341
+ for (auto &total_domain : relations_to_tdoms) {
342
+ string domain = "Following columns have the same distinct count: ";
343
+ for (auto &column_name : total_domain.column_names) {
344
+ domain += column_name + ", ";
561
345
  }
346
+ bool have_hll = total_domain.has_tdom_hll;
347
+ domain += "\n TOTAL DOMAIN = " + to_string(have_hll ? total_domain.tdom_hll : total_domain.tdom_no_hll);
348
+ Printer::Print(domain);
562
349
  }
563
- // if the above code didn't find an equality filter (i.e country_code = "[us]")
564
- // and there are other table filters, use default selectivity.
565
- bool has_equality_filter = (cardinality_after_filters != cardinality);
566
- if (!has_equality_filter && !table_filters.filters.empty()) {
567
- cardinality_after_filters = MaxValue<idx_t>(cardinality * DEFAULT_SELECTIVITY, 1);
568
- }
569
- return cardinality_after_filters;
570
350
  }
571
351
 
572
- void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalOperator &op) {
573
- auto has_logical_filter = IsLogicalFilter(op);
574
- D_ASSERT(node.set.count == 1);
575
- auto relation_id = node.set.relations[0];
576
-
577
- double lowest_card_found = node.GetBaseTableCardinality();
578
- for (auto &column : relation_attributes[relation_id].columns) {
579
- auto card_after_filters = node.GetBaseTableCardinality();
580
- ColumnBinding key = ColumnBinding(relation_id, column);
581
- optional_ptr<TableFilterSet> table_filters;
582
- auto actual_binding = relation_column_to_original_column.find(key);
583
- if (actual_binding != relation_column_to_original_column.end()) {
584
- table_filters = GetTableFilters(op, actual_binding->second.table_index);
585
- }
586
-
587
- if (table_filters) {
588
- double inspect_result =
589
- (double)InspectTableFilters(card_after_filters, op, *table_filters, actual_binding->second.table_index);
590
- card_after_filters = MinValue(inspect_result, (double)card_after_filters);
591
- }
592
- if (has_logical_filter) {
593
- card_after_filters *= DEFAULT_SELECTIVITY;
594
- }
595
- lowest_card_found = MinValue(card_after_filters, lowest_card_found);
596
- }
597
- node.SetEstimatedCardinality(lowest_card_found);
598
- }
352
+ // LCOV_EXCL_STOP
599
353
 
600
354
  } // namespace duckdb
@@ -0,0 +1,19 @@
1
+ #include "duckdb/optimizer/join_order/join_node.hpp"
2
+ #include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
3
+ #include "duckdb/optimizer/join_order/cost_model.hpp"
4
+ #include <cmath>
5
+
6
+ namespace duckdb {
7
+
8
+ CostModel::CostModel(QueryGraphManager &query_graph_manager)
9
+ : query_graph_manager(query_graph_manager), cardinality_estimator() {
10
+ }
11
+
12
+ double CostModel::ComputeCost(JoinNode &left, JoinNode &right) {
13
+ auto &combination = query_graph_manager.set_manager.Union(left.set, right.set);
14
+ auto join_card = cardinality_estimator.EstimateCardinalityWithSet<double>(combination);
15
+ auto join_cost = join_card;
16
+ return join_cost + left.cost + right.cost;
17
+ }
18
+
19
+ } // namespace duckdb