duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
- package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
- package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
- package/src/duckdb/extension/json/json_scan.cpp +2 -2
- package/src/duckdb/extension/json/json_serializer.cpp +11 -10
- package/src/duckdb/extension/json/serialize_json.cpp +44 -44
- package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
- package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
- package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/extra_type_info.cpp +2 -2
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/common/types/value.cpp +33 -33
- package/src/duckdb/src/common/types/vector.cpp +20 -20
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
- package/src/duckdb/src/execution/window_executor.cpp +10 -1
- package/src/duckdb/src/function/table/read_csv.cpp +4 -4
- package/src/duckdb/src/function/table/table_scan.cpp +14 -14
- package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb.h +11 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
- package/src/duckdb/src/main/relation.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/parallel/executor.cpp +6 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +18 -3
- package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
- package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
- package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -13,11 +13,28 @@ AggregateRelation::AggregateRelation(shared_ptr<Relation> child_p,
|
|
13
13
|
context.GetContext()->TryBindRelation(*this, this->columns);
|
14
14
|
}
|
15
15
|
|
16
|
+
AggregateRelation::AggregateRelation(shared_ptr<Relation> child_p,
|
17
|
+
vector<unique_ptr<ParsedExpression>> parsed_expressions, GroupByNode groups_p)
|
18
|
+
: Relation(child_p->context, RelationType::AGGREGATE_RELATION), expressions(std::move(parsed_expressions)),
|
19
|
+
groups(std::move(groups_p)), child(std::move(child_p)) {
|
20
|
+
// bind the expressions
|
21
|
+
context.GetContext()->TryBindRelation(*this, this->columns);
|
22
|
+
}
|
23
|
+
|
16
24
|
AggregateRelation::AggregateRelation(shared_ptr<Relation> child_p,
|
17
25
|
vector<unique_ptr<ParsedExpression>> parsed_expressions,
|
18
26
|
vector<unique_ptr<ParsedExpression>> groups_p)
|
19
27
|
: Relation(child_p->context, RelationType::AGGREGATE_RELATION), expressions(std::move(parsed_expressions)),
|
20
|
-
|
28
|
+
child(std::move(child_p)) {
|
29
|
+
if (!groups_p.empty()) {
|
30
|
+
// explicit groups provided: use standard handling
|
31
|
+
GroupingSet grouping_set;
|
32
|
+
for (idx_t i = 0; i < groups_p.size(); i++) {
|
33
|
+
groups.group_expressions.push_back(std::move(groups_p[i]));
|
34
|
+
grouping_set.insert(i);
|
35
|
+
}
|
36
|
+
groups.grouping_sets.push_back(std::move(grouping_set));
|
37
|
+
}
|
21
38
|
// bind the expressions
|
22
39
|
context.GetContext()->TryBindRelation(*this, this->columns);
|
23
40
|
}
|
@@ -39,16 +56,9 @@ unique_ptr<QueryNode> AggregateRelation::GetQueryNode() {
|
|
39
56
|
}
|
40
57
|
D_ASSERT(result->type == QueryNodeType::SELECT_NODE);
|
41
58
|
auto &select_node = result->Cast<SelectNode>();
|
42
|
-
if (!groups.empty()) {
|
43
|
-
// explicit groups provided: use standard handling
|
59
|
+
if (!groups.group_expressions.empty()) {
|
44
60
|
select_node.aggregate_handling = AggregateHandling::STANDARD_HANDLING;
|
45
|
-
select_node.groups.
|
46
|
-
GroupingSet grouping_set;
|
47
|
-
for (idx_t i = 0; i < groups.size(); i++) {
|
48
|
-
select_node.groups.group_expressions.push_back(groups[i]->Copy());
|
49
|
-
grouping_set.insert(i);
|
50
|
-
}
|
51
|
-
select_node.groups.grouping_sets.push_back(std::move(grouping_set));
|
61
|
+
select_node.groups = groups.Copy();
|
52
62
|
} else {
|
53
63
|
// no groups provided: automatically figure out groups (if any)
|
54
64
|
select_node.aggregate_handling = AggregateHandling::FORCE_AGGREGATES;
|
@@ -169,7 +169,7 @@ shared_ptr<Relation> Relation::Aggregate(const string &aggregate_list) {
|
|
169
169
|
|
170
170
|
shared_ptr<Relation> Relation::Aggregate(const string &aggregate_list, const string &group_list) {
|
171
171
|
auto expression_list = Parser::ParseExpressionList(aggregate_list, context.GetContext()->GetParserOptions());
|
172
|
-
auto groups = Parser::
|
172
|
+
auto groups = Parser::ParseGroupByList(group_list, context.GetContext()->GetParserOptions());
|
173
173
|
return make_shared<AggregateRelation>(shared_from_this(), std::move(expression_list), std::move(groups));
|
174
174
|
}
|
175
175
|
|
@@ -179,9 +179,9 @@ shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates) {
|
|
179
179
|
}
|
180
180
|
|
181
181
|
shared_ptr<Relation> Relation::Aggregate(const vector<string> &aggregates, const vector<string> &groups) {
|
182
|
-
auto aggregate_list =
|
183
|
-
auto group_list =
|
184
|
-
return
|
182
|
+
auto aggregate_list = StringUtil::Join(aggregates, ", ");
|
183
|
+
auto group_list = StringUtil::Join(groups, ", ");
|
184
|
+
return this->Aggregate(aggregate_list, group_list);
|
185
185
|
}
|
186
186
|
|
187
187
|
string Relation::GetAlias() {
|
@@ -2,22 +2,15 @@
|
|
2
2
|
#include "duckdb/optimizer/join_order/join_node.hpp"
|
3
3
|
#include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
|
4
4
|
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
5
|
-
#include "duckdb/planner/filter/constant_filter.hpp"
|
6
5
|
#include "duckdb/planner/operator/logical_comparison_join.hpp"
|
7
6
|
#include "duckdb/planner/operator/logical_get.hpp"
|
8
7
|
#include "duckdb/storage/data_table.hpp"
|
9
8
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
10
|
-
|
11
|
-
#include
|
9
|
+
#include "duckdb/common/printer.hpp"
|
10
|
+
#include "duckdb/common/limits.hpp"
|
12
11
|
|
13
12
|
namespace duckdb {
|
14
13
|
|
15
|
-
static optional_ptr<TableCatalogEntry> GetCatalogTableEntry(LogicalOperator &op) {
|
16
|
-
D_ASSERT(op.type == LogicalOperatorType::LOGICAL_GET);
|
17
|
-
auto &get = op.Cast<LogicalGet>();
|
18
|
-
return get.GetTable();
|
19
|
-
}
|
20
|
-
|
21
14
|
// The filter was made on top of a logical sample or other projection,
|
22
15
|
// but no specific columns are referenced. See issue 4978 number 4.
|
23
16
|
bool CardinalityEstimator::EmptyFilter(FilterInfo &filter_info) {
|
@@ -36,8 +29,11 @@ void CardinalityEstimator::AddRelationTdom(FilterInfo &filter_info) {
|
|
36
29
|
return;
|
37
30
|
}
|
38
31
|
}
|
32
|
+
|
39
33
|
auto key = ColumnBinding(filter_info.left_binding.table_index, filter_info.left_binding.column_index);
|
40
|
-
|
34
|
+
RelationsToTDom new_r2tdom(column_binding_set_t({key}));
|
35
|
+
|
36
|
+
relations_to_tdoms.emplace_back(new_r2tdom);
|
41
37
|
}
|
42
38
|
|
43
39
|
bool CardinalityEstimator::SingleColumnFilter(FilterInfo &filter_info) {
|
@@ -72,13 +68,17 @@ vector<idx_t> CardinalityEstimator::DetermineMatchingEquivalentSets(FilterInfo *
|
|
72
68
|
void CardinalityEstimator::AddToEquivalenceSets(FilterInfo *filter_info, vector<idx_t> matching_equivalent_sets) {
|
73
69
|
D_ASSERT(matching_equivalent_sets.size() <= 2);
|
74
70
|
if (matching_equivalent_sets.size() > 1) {
|
75
|
-
// an equivalence relation is connecting
|
71
|
+
// an equivalence relation is connecting two sets of equivalence relations
|
76
72
|
// so push all relations from the second set into the first. Later we will delete
|
77
73
|
// the second set.
|
78
74
|
for (ColumnBinding i : relations_to_tdoms.at(matching_equivalent_sets[1]).equivalent_relations) {
|
79
75
|
relations_to_tdoms.at(matching_equivalent_sets[0]).equivalent_relations.insert(i);
|
80
76
|
}
|
77
|
+
for (auto &column_name : relations_to_tdoms.at(matching_equivalent_sets[1]).column_names) {
|
78
|
+
relations_to_tdoms.at(matching_equivalent_sets[0]).column_names.push_back(column_name);
|
79
|
+
}
|
81
80
|
relations_to_tdoms.at(matching_equivalent_sets[1]).equivalent_relations.clear();
|
81
|
+
relations_to_tdoms.at(matching_equivalent_sets[1]).column_names.clear();
|
82
82
|
relations_to_tdoms.at(matching_equivalent_sets[0]).filters.push_back(filter_info);
|
83
83
|
// add all values of one set to the other, delete the empty one
|
84
84
|
} else if (matching_equivalent_sets.size() == 1) {
|
@@ -95,22 +95,7 @@ void CardinalityEstimator::AddToEquivalenceSets(FilterInfo *filter_info, vector<
|
|
95
95
|
}
|
96
96
|
}
|
97
97
|
|
98
|
-
void CardinalityEstimator::
|
99
|
-
relation_column_to_original_column[key] = value;
|
100
|
-
}
|
101
|
-
|
102
|
-
void CardinalityEstimator::CopyRelationMap(column_binding_map_t<ColumnBinding> &child_binding_map) {
|
103
|
-
for (auto &binding_map : relation_column_to_original_column) {
|
104
|
-
D_ASSERT(child_binding_map.find(binding_map.first) == child_binding_map.end());
|
105
|
-
child_binding_map[binding_map.first] = binding_map.second;
|
106
|
-
}
|
107
|
-
}
|
108
|
-
|
109
|
-
void CardinalityEstimator::AddColumnToRelationMap(idx_t table_index, idx_t column_index) {
|
110
|
-
relation_attributes[table_index].columns.insert(column_index);
|
111
|
-
}
|
112
|
-
|
113
|
-
void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>> &filter_infos) {
|
98
|
+
void CardinalityEstimator::InitEquivalentRelations(const vector<unique_ptr<FilterInfo>> &filter_infos) {
|
114
99
|
// For each filter, we fill keep track of the index of the equivalent relation set
|
115
100
|
// the left and right relation needs to be added to.
|
116
101
|
for (auto &filter : filter_infos) {
|
@@ -128,46 +113,15 @@ void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>
|
|
128
113
|
auto matching_equivalent_sets = DetermineMatchingEquivalentSets(filter.get());
|
129
114
|
AddToEquivalenceSets(filter.get(), matching_equivalent_sets);
|
130
115
|
}
|
116
|
+
RemoveEmptyTotalDomains();
|
131
117
|
}
|
132
118
|
|
133
|
-
void CardinalityEstimator::
|
134
|
-
if (result.GetCardinality<double>() != entry.GetCardinality<double>()) {
|
135
|
-
// Currently it's possible that some entries are cartesian joins.
|
136
|
-
// When this is the case, you don't always have symmetry, but
|
137
|
-
// if the cost of the result is less, then just assure the cardinality
|
138
|
-
// is also less, then you have the same effect of symmetry.
|
139
|
-
D_ASSERT(ceil(result.GetCardinality<double>()) <= ceil(entry.GetCardinality<double>()) ||
|
140
|
-
floor(result.GetCardinality<double>()) <= floor(entry.GetCardinality<double>()));
|
141
|
-
}
|
142
|
-
}
|
143
|
-
|
144
|
-
void CardinalityEstimator::InitTotalDomains() {
|
119
|
+
void CardinalityEstimator::RemoveEmptyTotalDomains() {
|
145
120
|
auto remove_start = std::remove_if(relations_to_tdoms.begin(), relations_to_tdoms.end(),
|
146
121
|
[](RelationsToTDom &r_2_tdom) { return r_2_tdom.equivalent_relations.empty(); });
|
147
122
|
relations_to_tdoms.erase(remove_start, relations_to_tdoms.end());
|
148
123
|
}
|
149
124
|
|
150
|
-
double CardinalityEstimator::ComputeCost(JoinNode &left, JoinNode &right, double expected_cardinality) {
|
151
|
-
return expected_cardinality + left.GetCost() + right.GetCost();
|
152
|
-
}
|
153
|
-
|
154
|
-
double CardinalityEstimator::EstimateCrossProduct(const JoinNode &left, const JoinNode &right) {
|
155
|
-
// need to explicity use double here, otherwise auto converts it to an int, then
|
156
|
-
// there is an autocast in the return.
|
157
|
-
if (left.GetCardinality<double>() >= (NumericLimits<double>::Maximum() / right.GetCardinality<double>())) {
|
158
|
-
return NumericLimits<double>::Maximum();
|
159
|
-
}
|
160
|
-
return left.GetCardinality<double>() * right.GetCardinality<double>();
|
161
|
-
}
|
162
|
-
|
163
|
-
void CardinalityEstimator::AddRelationColumnMapping(LogicalGet &get, idx_t relation_id) {
|
164
|
-
for (idx_t it = 0; it < get.column_ids.size(); it++) {
|
165
|
-
auto key = ColumnBinding(relation_id, it);
|
166
|
-
auto value = ColumnBinding(get.table_index, get.column_ids[it]);
|
167
|
-
AddRelationToColumnMapping(key, value);
|
168
|
-
}
|
169
|
-
}
|
170
|
-
|
171
125
|
void UpdateDenom(Subgraph2Denominator &relation_2_denom, RelationsToTDom &relation_to_tdom) {
|
172
126
|
relation_2_denom.denom *= relation_to_tdom.has_tdom_hll ? relation_to_tdom.tdom_hll : relation_to_tdom.tdom_no_hll;
|
173
127
|
}
|
@@ -187,13 +141,22 @@ void FindSubgraphMatchAndMerge(Subgraph2Denominator &merge_to, idx_t find_me,
|
|
187
141
|
}
|
188
142
|
}
|
189
143
|
|
144
|
+
template <>
|
190
145
|
double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) {
|
146
|
+
|
147
|
+
if (relation_set_2_cardinality.find(new_set.ToString()) != relation_set_2_cardinality.end()) {
|
148
|
+
return relation_set_2_cardinality[new_set.ToString()].cardinality_before_filters;
|
149
|
+
}
|
191
150
|
double numerator = 1;
|
192
151
|
unordered_set<idx_t> actual_set;
|
152
|
+
|
193
153
|
for (idx_t i = 0; i < new_set.count; i++) {
|
194
|
-
|
154
|
+
auto &single_node_set = set_manager.GetJoinRelation(new_set.relations[i]);
|
155
|
+
auto card_helper = relation_set_2_cardinality[single_node_set.ToString()];
|
156
|
+
numerator *= card_helper.cardinality_before_filters;
|
195
157
|
actual_set.insert(new_set.relations[i]);
|
196
158
|
}
|
159
|
+
|
197
160
|
vector<Subgraph2Denominator> subgraphs;
|
198
161
|
bool done = false;
|
199
162
|
bool found_match = false;
|
@@ -279,77 +242,26 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set
|
|
279
242
|
// TODO: It's possible cross-products were added and are not present in the filters in the relation_2_tdom
|
280
243
|
// structures. When that's the case, multiply the denom structures that have no intersection
|
281
244
|
for (auto &match : subgraphs) {
|
282
|
-
|
283
|
-
// were connected. When this happens, just use the largest denominator of all the subgraphs.
|
284
|
-
if (match.denom > denom) {
|
285
|
-
denom = match.denom;
|
286
|
-
}
|
245
|
+
denom *= match.denom;
|
287
246
|
}
|
288
247
|
// can happen if a table has cardinality 0, or a tdom is set to 0
|
289
248
|
if (denom == 0) {
|
290
249
|
denom = 1;
|
291
250
|
}
|
292
|
-
|
251
|
+
auto result = numerator / denom;
|
252
|
+
auto new_entry = CardinalityHelper((double)result, 1);
|
253
|
+
relation_set_2_cardinality[new_set.ToString()] = new_entry;
|
254
|
+
return result;
|
293
255
|
}
|
294
256
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
switch (op.type) {
|
302
|
-
case LogicalOperatorType::LOGICAL_GET:
|
303
|
-
get = &op.Cast<LogicalGet>();
|
304
|
-
break;
|
305
|
-
case LogicalOperatorType::LOGICAL_FILTER:
|
306
|
-
get = GetLogicalGet(*op.children.at(0), table_index);
|
307
|
-
break;
|
308
|
-
case LogicalOperatorType::LOGICAL_PROJECTION:
|
309
|
-
get = GetLogicalGet(*op.children.at(0), table_index);
|
310
|
-
break;
|
311
|
-
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
312
|
-
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
|
313
|
-
auto &join = op.Cast<LogicalComparisonJoin>();
|
314
|
-
// We should never be calling GetLogicalGet without a valid table_index.
|
315
|
-
// We are attempting to get the catalog table for a relation (for statistics/cardinality estimation)
|
316
|
-
// A logical join means there is a non-reorderable relation in the join plan. This means we need
|
317
|
-
// to know the exact table index to return.
|
318
|
-
D_ASSERT(table_index != DConstants::INVALID_INDEX);
|
319
|
-
if (join.join_type == JoinType::MARK || join.join_type == JoinType::LEFT) {
|
320
|
-
auto &left_child = *join.children.at(0);
|
321
|
-
get = GetLogicalGet(left_child, table_index);
|
322
|
-
if (get && get->table_index == table_index) {
|
323
|
-
return get;
|
324
|
-
}
|
325
|
-
auto &right_child = *join.children.at(1);
|
326
|
-
get = GetLogicalGet(right_child, table_index);
|
327
|
-
if (get && get->table_index == table_index) {
|
328
|
-
return get;
|
329
|
-
}
|
330
|
-
}
|
331
|
-
break;
|
332
|
-
}
|
333
|
-
default:
|
334
|
-
// return null pointer, maybe there is no logical get under this child
|
335
|
-
break;
|
336
|
-
}
|
337
|
-
return get;
|
338
|
-
}
|
339
|
-
|
340
|
-
void CardinalityEstimator::MergeBindings(idx_t binding_index, idx_t relation_id,
|
341
|
-
vector<column_binding_map_t<ColumnBinding>> &child_binding_maps) {
|
342
|
-
for (auto &map_set : child_binding_maps) {
|
343
|
-
for (auto &mapping : map_set) {
|
344
|
-
ColumnBinding relation_bindings = mapping.first;
|
345
|
-
ColumnBinding actual_bindings = mapping.second;
|
346
|
-
|
347
|
-
if (actual_bindings.table_index == binding_index) {
|
348
|
-
auto key = ColumnBinding(relation_id, relation_bindings.column_index);
|
349
|
-
AddRelationToColumnMapping(key, actual_bindings);
|
350
|
-
}
|
351
|
-
}
|
257
|
+
template <>
|
258
|
+
idx_t CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) {
|
259
|
+
auto cardinality_as_double = EstimateCardinalityWithSet<double>(new_set);
|
260
|
+
auto max = NumericLimits<idx_t>::Maximum();
|
261
|
+
if (cardinality_as_double > max) {
|
262
|
+
return max;
|
352
263
|
}
|
264
|
+
return (idx_t)cardinality_as_double;
|
353
265
|
}
|
354
266
|
|
355
267
|
bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
|
@@ -365,236 +277,78 @@ bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
|
|
365
277
|
return a.tdom_no_hll > b.tdom_no_hll;
|
366
278
|
}
|
367
279
|
|
368
|
-
void CardinalityEstimator::InitCardinalityEstimatorProps(
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
if (join.join_type == JoinType::LEFT) {
|
379
|
-
// If a base op is a Logical Comparison join it is probably a left join,
|
380
|
-
// so the cost of the larger table is a fine estimate.
|
381
|
-
// TODO: provide better estimates for cost of mark joins
|
382
|
-
// MARK joins are used for anti and semi joins, so the cost can conceivably be
|
383
|
-
// less than the base table cardinality.
|
384
|
-
join_node.SetCost(join_node.GetBaseTableCardinality());
|
385
|
-
}
|
386
|
-
} else if (op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
|
387
|
-
// AsOf joins have the cardinality of the LHS
|
388
|
-
join_node.SetCost(join_node.GetBaseTableCardinality());
|
389
|
-
}
|
390
|
-
// Total domains can be affected by filters. So we update base table cardinality first
|
391
|
-
EstimateBaseTableCardinality(join_node, op);
|
392
|
-
// Then update total domains.
|
393
|
-
UpdateTotalDomains(join_node, op);
|
394
|
-
}
|
280
|
+
void CardinalityEstimator::InitCardinalityEstimatorProps(optional_ptr<JoinRelationSet> set, RelationStats &stats) {
|
281
|
+
// Get the join relation set
|
282
|
+
D_ASSERT(stats.stats_initialized);
|
283
|
+
auto relation_cardinality = stats.cardinality;
|
284
|
+
auto relation_filter = stats.filter_strength;
|
285
|
+
|
286
|
+
auto card_helper = CardinalityHelper(relation_cardinality, relation_filter);
|
287
|
+
relation_set_2_cardinality[set->ToString()] = card_helper;
|
288
|
+
|
289
|
+
UpdateTotalDomains(set, stats);
|
395
290
|
|
396
291
|
// sort relations from greatest tdom to lowest tdom.
|
397
292
|
std::sort(relations_to_tdoms.begin(), relations_to_tdoms.end(), SortTdoms);
|
398
293
|
}
|
399
294
|
|
400
|
-
void CardinalityEstimator::UpdateTotalDomains(
|
401
|
-
|
402
|
-
|
295
|
+
void CardinalityEstimator::UpdateTotalDomains(optional_ptr<JoinRelationSet> set, RelationStats &stats) {
|
296
|
+
D_ASSERT(set->count == 1);
|
297
|
+
auto relation_id = set->relations[0];
|
403
298
|
//! Initialize the distinct count for all columns used in joins with the current relation.
|
404
|
-
|
405
|
-
optional_ptr<TableCatalogEntry> catalog_table;
|
299
|
+
// D_ASSERT(stats.column_distinct_count.size() >= 1);
|
406
300
|
|
407
|
-
|
408
|
-
bool get_updated = true;
|
409
|
-
for (auto &column : relation_attributes[relation_id].columns) {
|
301
|
+
for (idx_t i = 0; i < stats.column_distinct_count.size(); i++) {
|
410
302
|
//! for every column used in a filter in the relation, get the distinct count via HLL, or assume it to be
|
411
303
|
//! the cardinality
|
412
|
-
ColumnBinding key = ColumnBinding(relation_id, column);
|
413
|
-
auto actual_binding = relation_column_to_original_column.find(key);
|
414
|
-
// each relation has columns that are either projected or used as filters
|
415
|
-
// In order to get column statistics we need to make sure the actual binding still
|
416
|
-
// refers to the same base table relation, as non-reorderable joins may involve 2+
|
417
|
-
// base table relations and therefore the columns may also refer to 2 different
|
418
|
-
// base table relations
|
419
|
-
if (actual_binding != relation_column_to_original_column.end() &&
|
420
|
-
(!get || get->table_index != actual_binding->second.table_index)) {
|
421
|
-
get = GetLogicalGet(op, actual_binding->second.table_index);
|
422
|
-
get_updated = true;
|
423
|
-
} else {
|
424
|
-
get_updated = false;
|
425
|
-
}
|
426
|
-
|
427
|
-
if (get_updated) {
|
428
|
-
if (get) {
|
429
|
-
catalog_table = GetCatalogTableEntry(*get);
|
430
|
-
} else {
|
431
|
-
catalog_table = nullptr;
|
432
|
-
}
|
433
|
-
}
|
434
|
-
|
435
|
-
if (catalog_table && actual_binding != relation_column_to_original_column.end()) {
|
436
|
-
// Get HLL stats here
|
437
|
-
auto base_stats = catalog_table->GetStatistics(context, actual_binding->second.column_index);
|
438
|
-
if (base_stats) {
|
439
|
-
distinct_count = base_stats->GetDistinctCount();
|
440
|
-
}
|
441
|
-
|
442
|
-
// HLL has estimation error, distinct_count can't be greater than cardinality of the table before filters
|
443
|
-
if (distinct_count > node.GetBaseTableCardinality()) {
|
444
|
-
distinct_count = node.GetBaseTableCardinality();
|
445
|
-
}
|
446
|
-
} else {
|
447
|
-
distinct_count = node.GetBaseTableCardinality();
|
448
|
-
}
|
449
304
|
// Update the relation_to_tdom set with the estimated distinct count (or tdom) calculated above
|
305
|
+
auto key = ColumnBinding(relation_id, i);
|
450
306
|
for (auto &relation_to_tdom : relations_to_tdoms) {
|
451
307
|
column_binding_set_t i_set = relation_to_tdom.equivalent_relations;
|
452
|
-
if (i_set.
|
308
|
+
if (i_set.find(key) == i_set.end()) {
|
453
309
|
continue;
|
454
310
|
}
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
relation_to_tdom.tdom_no_hll = distinct_count;
|
462
|
-
}
|
311
|
+
auto distinct_count = stats.column_distinct_count.at(i);
|
312
|
+
if (distinct_count.from_hll && relation_to_tdom.has_tdom_hll) {
|
313
|
+
relation_to_tdom.tdom_hll = MaxValue(relation_to_tdom.tdom_hll, distinct_count.distinct_count);
|
314
|
+
} else if (distinct_count.from_hll && !relation_to_tdom.has_tdom_hll) {
|
315
|
+
relation_to_tdom.has_tdom_hll = true;
|
316
|
+
relation_to_tdom.tdom_hll = distinct_count.distinct_count;
|
463
317
|
} else {
|
464
|
-
|
465
|
-
// the tdom
|
466
|
-
// 1. If there is any hll data in the equivalence set, use that
|
467
|
-
// 2. Otherwise, use the table with the smallest cardinality
|
468
|
-
if (relation_to_tdom.tdom_no_hll > distinct_count && !relation_to_tdom.has_tdom_hll) {
|
469
|
-
relation_to_tdom.tdom_no_hll = distinct_count;
|
470
|
-
}
|
318
|
+
relation_to_tdom.tdom_no_hll = MinValue(distinct_count.distinct_count, relation_to_tdom.tdom_no_hll);
|
471
319
|
}
|
472
320
|
break;
|
473
321
|
}
|
474
322
|
}
|
475
323
|
}
|
476
324
|
|
477
|
-
|
478
|
-
auto get = GetLogicalGet(op, table_index);
|
479
|
-
return get ? &get->table_filters : nullptr;
|
480
|
-
}
|
325
|
+
// LCOV_EXCL_START
|
481
326
|
|
482
|
-
|
483
|
-
|
484
|
-
auto
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
auto &comparison_filter = child_filter->Cast<ConstantFilter>();
|
491
|
-
if (comparison_filter.comparison_type != ExpressionType::COMPARE_EQUAL) {
|
492
|
-
continue;
|
493
|
-
}
|
494
|
-
auto column_count = 0;
|
495
|
-
if (base_stats) {
|
496
|
-
column_count = base_stats->GetDistinctCount();
|
497
|
-
}
|
498
|
-
auto filtered_card = cardinality;
|
499
|
-
// column_count = 0 when there is no column count (i.e parquet scans)
|
500
|
-
if (column_count > 0) {
|
501
|
-
// we want the ceil of cardinality/column_count. We also want to avoid compiler errors
|
502
|
-
filtered_card = (cardinality + column_count - 1) / column_count;
|
503
|
-
cardinality_after_filters = filtered_card;
|
504
|
-
}
|
505
|
-
if (has_equality_filter) {
|
506
|
-
cardinality_after_filters = MinValue(filtered_card, cardinality_after_filters);
|
507
|
-
}
|
508
|
-
has_equality_filter = true;
|
509
|
-
}
|
510
|
-
return cardinality_after_filters;
|
511
|
-
}
|
512
|
-
|
513
|
-
idx_t CardinalityEstimator::InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
|
514
|
-
unique_ptr<BaseStatistics> base_stats) {
|
515
|
-
auto has_equality_filter = false;
|
516
|
-
auto cardinality_after_filters = cardinality;
|
517
|
-
for (auto &child_filter : filter.child_filters) {
|
518
|
-
if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
|
519
|
-
continue;
|
520
|
-
}
|
521
|
-
auto &comparison_filter = child_filter->Cast<ConstantFilter>();
|
522
|
-
if (comparison_filter.comparison_type == ExpressionType::COMPARE_EQUAL) {
|
523
|
-
auto column_count = cardinality_after_filters;
|
524
|
-
if (base_stats) {
|
525
|
-
column_count = base_stats->GetDistinctCount();
|
526
|
-
}
|
527
|
-
auto increment = MaxValue<idx_t>(((cardinality + column_count - 1) / column_count), 1);
|
528
|
-
if (has_equality_filter) {
|
529
|
-
cardinality_after_filters += increment;
|
530
|
-
} else {
|
531
|
-
cardinality_after_filters = increment;
|
532
|
-
}
|
533
|
-
has_equality_filter = true;
|
327
|
+
void CardinalityEstimator::AddRelationNamesToTdoms(vector<RelationStats> &stats) {
|
328
|
+
#ifdef DEBUG
|
329
|
+
for (auto &total_domain : relations_to_tdoms) {
|
330
|
+
for (auto &binding : total_domain.equivalent_relations) {
|
331
|
+
D_ASSERT(binding.table_index < stats.size());
|
332
|
+
D_ASSERT(binding.column_index < stats.at(binding.table_index).column_names.size());
|
333
|
+
string column_name = stats.at(binding.table_index).column_names.at(binding.column_index);
|
334
|
+
total_domain.column_names.push_back(column_name);
|
534
335
|
}
|
535
336
|
}
|
536
|
-
|
537
|
-
return cardinality_after_filters;
|
337
|
+
#endif
|
538
338
|
}
|
539
339
|
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
for (auto &it : table_filters.filters) {
|
546
|
-
column_statistics = nullptr;
|
547
|
-
if (get->bind_data && get->function.name.compare("seq_scan") == 0) {
|
548
|
-
auto &table_scan_bind_data = get->bind_data->Cast<TableScanBindData>();
|
549
|
-
column_statistics = get->function.statistics(context, &table_scan_bind_data, it.first);
|
550
|
-
}
|
551
|
-
if (it.second->filter_type == TableFilterType::CONJUNCTION_AND) {
|
552
|
-
auto &filter = it.second->Cast<ConjunctionAndFilter>();
|
553
|
-
idx_t cardinality_with_and_filter =
|
554
|
-
InspectConjunctionAND(cardinality, it.first, filter, std::move(column_statistics));
|
555
|
-
cardinality_after_filters = MinValue(cardinality_after_filters, cardinality_with_and_filter);
|
556
|
-
} else if (it.second->filter_type == TableFilterType::CONJUNCTION_OR) {
|
557
|
-
auto &filter = it.second->Cast<ConjunctionOrFilter>();
|
558
|
-
idx_t cardinality_with_or_filter =
|
559
|
-
InspectConjunctionOR(cardinality, it.first, filter, std::move(column_statistics));
|
560
|
-
cardinality_after_filters = MinValue(cardinality_after_filters, cardinality_with_or_filter);
|
340
|
+
void CardinalityEstimator::PrintRelationToTdomInfo() {
|
341
|
+
for (auto &total_domain : relations_to_tdoms) {
|
342
|
+
string domain = "Following columns have the same distinct count: ";
|
343
|
+
for (auto &column_name : total_domain.column_names) {
|
344
|
+
domain += column_name + ", ";
|
561
345
|
}
|
346
|
+
bool have_hll = total_domain.has_tdom_hll;
|
347
|
+
domain += "\n TOTAL DOMAIN = " + to_string(have_hll ? total_domain.tdom_hll : total_domain.tdom_no_hll);
|
348
|
+
Printer::Print(domain);
|
562
349
|
}
|
563
|
-
// if the above code didn't find an equality filter (i.e country_code = "[us]")
|
564
|
-
// and there are other table filters, use default selectivity.
|
565
|
-
bool has_equality_filter = (cardinality_after_filters != cardinality);
|
566
|
-
if (!has_equality_filter && !table_filters.filters.empty()) {
|
567
|
-
cardinality_after_filters = MaxValue<idx_t>(cardinality * DEFAULT_SELECTIVITY, 1);
|
568
|
-
}
|
569
|
-
return cardinality_after_filters;
|
570
350
|
}
|
571
351
|
|
572
|
-
|
573
|
-
auto has_logical_filter = IsLogicalFilter(op);
|
574
|
-
D_ASSERT(node.set.count == 1);
|
575
|
-
auto relation_id = node.set.relations[0];
|
576
|
-
|
577
|
-
double lowest_card_found = node.GetBaseTableCardinality();
|
578
|
-
for (auto &column : relation_attributes[relation_id].columns) {
|
579
|
-
auto card_after_filters = node.GetBaseTableCardinality();
|
580
|
-
ColumnBinding key = ColumnBinding(relation_id, column);
|
581
|
-
optional_ptr<TableFilterSet> table_filters;
|
582
|
-
auto actual_binding = relation_column_to_original_column.find(key);
|
583
|
-
if (actual_binding != relation_column_to_original_column.end()) {
|
584
|
-
table_filters = GetTableFilters(op, actual_binding->second.table_index);
|
585
|
-
}
|
586
|
-
|
587
|
-
if (table_filters) {
|
588
|
-
double inspect_result =
|
589
|
-
(double)InspectTableFilters(card_after_filters, op, *table_filters, actual_binding->second.table_index);
|
590
|
-
card_after_filters = MinValue(inspect_result, (double)card_after_filters);
|
591
|
-
}
|
592
|
-
if (has_logical_filter) {
|
593
|
-
card_after_filters *= DEFAULT_SELECTIVITY;
|
594
|
-
}
|
595
|
-
lowest_card_found = MinValue(card_after_filters, lowest_card_found);
|
596
|
-
}
|
597
|
-
node.SetEstimatedCardinality(lowest_card_found);
|
598
|
-
}
|
352
|
+
// LCOV_EXCL_STOP
|
599
353
|
|
600
354
|
} // namespace duckdb
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
2
|
+
#include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
|
3
|
+
#include "duckdb/optimizer/join_order/cost_model.hpp"
|
4
|
+
#include <cmath>
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
CostModel::CostModel(QueryGraphManager &query_graph_manager)
|
9
|
+
: query_graph_manager(query_graph_manager), cardinality_estimator() {
|
10
|
+
}
|
11
|
+
|
12
|
+
double CostModel::ComputeCost(JoinNode &left, JoinNode &right) {
|
13
|
+
auto &combination = query_graph_manager.set_manager.Union(left.set, right.set);
|
14
|
+
auto join_card = cardinality_estimator.EstimateCardinalityWithSet<double>(combination);
|
15
|
+
auto join_cost = join_card;
|
16
|
+
return join_cost + left.cost + right.cost;
|
17
|
+
}
|
18
|
+
|
19
|
+
} // namespace duckdb
|