duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
- package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
- package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
- package/src/duckdb/extension/json/json_scan.cpp +2 -2
- package/src/duckdb/extension/json/json_serializer.cpp +11 -10
- package/src/duckdb/extension/json/serialize_json.cpp +44 -44
- package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
- package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
- package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/extra_type_info.cpp +2 -2
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/common/types/value.cpp +33 -33
- package/src/duckdb/src/common/types/vector.cpp +20 -20
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
- package/src/duckdb/src/execution/window_executor.cpp +10 -1
- package/src/duckdb/src/function/table/read_csv.cpp +4 -4
- package/src/duckdb/src/function/table/table_scan.cpp +14 -14
- package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb.h +11 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
- package/src/duckdb/src/main/relation.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/parallel/executor.cpp +6 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +18 -3
- package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
- package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
- package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -0,0 +1,351 @@
|
|
1
|
+
#include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
|
2
|
+
#include "duckdb/planner/expression/list.hpp"
|
3
|
+
#include "duckdb/planner/operator/list.hpp"
|
4
|
+
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
5
|
+
#include "duckdb/planner/expression_iterator.hpp"
|
6
|
+
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
7
|
+
#include "duckdb/function/table/table_scan.hpp"
|
8
|
+
#include "duckdb/planner/operator/logical_get.hpp"
|
9
|
+
#include "duckdb/storage/data_table.hpp"
|
10
|
+
#include "duckdb/planner/filter/constant_filter.hpp"
|
11
|
+
|
12
|
+
namespace duckdb {
|
13
|
+
|
14
|
+
static ExpressionBinding GetChildColumnBinding(Expression &expr) {
|
15
|
+
auto ret = ExpressionBinding();
|
16
|
+
switch (expr.expression_class) {
|
17
|
+
case ExpressionClass::BOUND_FUNCTION: {
|
18
|
+
// TODO: Other expression classes that can have 0 children?
|
19
|
+
auto &func = expr.Cast<BoundFunctionExpression>();
|
20
|
+
// no children some sort of gen_random_uuid() or equivalent.
|
21
|
+
if (func.children.empty()) {
|
22
|
+
ret.found_expression = true;
|
23
|
+
ret.expression_is_constant = true;
|
24
|
+
return ret;
|
25
|
+
}
|
26
|
+
break;
|
27
|
+
}
|
28
|
+
case ExpressionClass::BOUND_COLUMN_REF: {
|
29
|
+
ret.found_expression = true;
|
30
|
+
auto &new_col_ref = expr.Cast<BoundColumnRefExpression>();
|
31
|
+
ret.child_binding = ColumnBinding(new_col_ref.binding.table_index, new_col_ref.binding.column_index);
|
32
|
+
return ret;
|
33
|
+
}
|
34
|
+
case ExpressionClass::BOUND_LAMBDA_REF:
|
35
|
+
case ExpressionClass::BOUND_CONSTANT:
|
36
|
+
case ExpressionClass::BOUND_DEFAULT:
|
37
|
+
case ExpressionClass::BOUND_PARAMETER:
|
38
|
+
case ExpressionClass::BOUND_REF:
|
39
|
+
ret.found_expression = true;
|
40
|
+
ret.expression_is_constant = true;
|
41
|
+
return ret;
|
42
|
+
default:
|
43
|
+
break;
|
44
|
+
}
|
45
|
+
ExpressionIterator::EnumerateChildren(expr, [&](unique_ptr<Expression> &child) {
|
46
|
+
auto recursive_result = GetChildColumnBinding(*child);
|
47
|
+
if (recursive_result.found_expression) {
|
48
|
+
ret = recursive_result;
|
49
|
+
}
|
50
|
+
});
|
51
|
+
// we didn't find a Bound Column Ref
|
52
|
+
return ret;
|
53
|
+
}
|
54
|
+
|
55
|
+
RelationStats RelationStatisticsHelper::ExtractGetStats(LogicalGet &get, ClientContext &context) {
|
56
|
+
auto return_stats = RelationStats();
|
57
|
+
|
58
|
+
auto base_table_cardinality = get.EstimateCardinality(context);
|
59
|
+
auto cardinality_after_filters = base_table_cardinality;
|
60
|
+
unique_ptr<BaseStatistics> column_statistics;
|
61
|
+
|
62
|
+
auto table_thing = get.GetTable();
|
63
|
+
auto name = string("some table");
|
64
|
+
if (table_thing) {
|
65
|
+
name = table_thing->name;
|
66
|
+
return_stats.table_name = name;
|
67
|
+
}
|
68
|
+
|
69
|
+
// if we can get the catalog table, then our column statistics will be accurate
|
70
|
+
// parquet readers etc. will still return statistics, but they initialize distinct column
|
71
|
+
// counts to 0.
|
72
|
+
// TODO: fix this, some file formats can encode distinct counts, we don't want to rely on
|
73
|
+
// getting a catalog table to know that we can use statistics.
|
74
|
+
bool have_catalog_table_statistics = false;
|
75
|
+
if (get.GetTable()) {
|
76
|
+
have_catalog_table_statistics = true;
|
77
|
+
}
|
78
|
+
|
79
|
+
// first push back basic distinct counts for each column (if we have them).
|
80
|
+
for (idx_t i = 0; i < get.column_ids.size(); i++) {
|
81
|
+
bool have_distinct_count_stats = false;
|
82
|
+
if (get.function.statistics) {
|
83
|
+
column_statistics = get.function.statistics(context, get.bind_data.get(), get.column_ids[i]);
|
84
|
+
if (column_statistics && have_catalog_table_statistics) {
|
85
|
+
auto column_distinct_count = DistinctCount({column_statistics->GetDistinctCount(), true});
|
86
|
+
return_stats.column_distinct_count.push_back(column_distinct_count);
|
87
|
+
return_stats.column_names.push_back(name + "." + get.names.at(get.column_ids.at(i)));
|
88
|
+
have_distinct_count_stats = true;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
if (!have_distinct_count_stats) {
|
92
|
+
// currently treating the cardinality as the distinct count.
|
93
|
+
// the cardinality estimator will update these distinct counts based
|
94
|
+
// on the extra columns that are joined on.
|
95
|
+
auto column_distinct_count = DistinctCount({cardinality_after_filters, false});
|
96
|
+
return_stats.column_distinct_count.push_back(column_distinct_count);
|
97
|
+
auto column_name = string("column");
|
98
|
+
if (get.column_ids.at(i) < get.names.size()) {
|
99
|
+
column_name = get.names.at(get.column_ids.at(i));
|
100
|
+
}
|
101
|
+
return_stats.column_names.push_back(get.GetName() + "." + column_name);
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
if (!get.table_filters.filters.empty()) {
|
106
|
+
column_statistics = nullptr;
|
107
|
+
for (auto &it : get.table_filters.filters) {
|
108
|
+
if (get.bind_data && get.function.name.compare("seq_scan") == 0) {
|
109
|
+
auto &table_scan_bind_data = get.bind_data->Cast<TableScanBindData>();
|
110
|
+
column_statistics = get.function.statistics(context, &table_scan_bind_data, it.first);
|
111
|
+
}
|
112
|
+
|
113
|
+
if (column_statistics && it.second->filter_type == TableFilterType::CONJUNCTION_AND) {
|
114
|
+
auto &filter = it.second->Cast<ConjunctionAndFilter>();
|
115
|
+
idx_t cardinality_with_and_filter = RelationStatisticsHelper::InspectConjunctionAND(
|
116
|
+
base_table_cardinality, it.first, filter, *column_statistics);
|
117
|
+
cardinality_after_filters = MinValue(cardinality_after_filters, cardinality_with_and_filter);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
// if the above code didn't find an equality filter (i.e country_code = "[us]")
|
121
|
+
// and there are other table filters (i.e cost > 50), use default selectivity.
|
122
|
+
bool has_equality_filter = (cardinality_after_filters != base_table_cardinality);
|
123
|
+
if (!has_equality_filter && !get.table_filters.filters.empty()) {
|
124
|
+
cardinality_after_filters =
|
125
|
+
MaxValue<idx_t>(base_table_cardinality * RelationStatisticsHelper::DEFAULT_SELECTIVITY, 1);
|
126
|
+
}
|
127
|
+
if (base_table_cardinality == 0) {
|
128
|
+
cardinality_after_filters = 0;
|
129
|
+
}
|
130
|
+
}
|
131
|
+
return_stats.cardinality = cardinality_after_filters;
|
132
|
+
// update the estimated cardinality of the get as well.
|
133
|
+
// This is not updated during plan reconstruction.
|
134
|
+
get.estimated_cardinality = cardinality_after_filters;
|
135
|
+
get.has_estimated_cardinality = true;
|
136
|
+
D_ASSERT(base_table_cardinality >= cardinality_after_filters);
|
137
|
+
return_stats.stats_initialized = true;
|
138
|
+
return return_stats;
|
139
|
+
}
|
140
|
+
|
141
|
+
RelationStats RelationStatisticsHelper::ExtractDelimGetStats(LogicalDelimGet &delim_get, ClientContext &context) {
|
142
|
+
RelationStats stats;
|
143
|
+
stats.table_name = delim_get.GetName();
|
144
|
+
idx_t card = delim_get.EstimateCardinality(context);
|
145
|
+
stats.cardinality = card;
|
146
|
+
stats.stats_initialized = true;
|
147
|
+
for (auto &binding : delim_get.GetColumnBindings()) {
|
148
|
+
stats.column_distinct_count.push_back(DistinctCount({1, false}));
|
149
|
+
stats.column_names.push_back("column" + to_string(binding.column_index));
|
150
|
+
}
|
151
|
+
return stats;
|
152
|
+
}
|
153
|
+
|
154
|
+
RelationStats RelationStatisticsHelper::ExtractProjectionStats(LogicalProjection &proj, RelationStats &child_stats) {
|
155
|
+
auto proj_stats = RelationStats();
|
156
|
+
proj_stats.cardinality = child_stats.cardinality;
|
157
|
+
proj_stats.table_name = proj.GetName();
|
158
|
+
for (auto &expr : proj.expressions) {
|
159
|
+
proj_stats.column_names.push_back(expr->GetName());
|
160
|
+
auto res = GetChildColumnBinding(*expr);
|
161
|
+
D_ASSERT(res.found_expression);
|
162
|
+
if (res.expression_is_constant) {
|
163
|
+
proj_stats.column_distinct_count.push_back(DistinctCount({1, true}));
|
164
|
+
} else {
|
165
|
+
auto column_index = res.child_binding.column_index;
|
166
|
+
if (column_index >= child_stats.column_distinct_count.size() && expr->ToString() == "count_star()") {
|
167
|
+
// only one value for a count star
|
168
|
+
proj_stats.column_distinct_count.push_back(DistinctCount({1, true}));
|
169
|
+
} else {
|
170
|
+
// TODO: add this back in
|
171
|
+
// D_ASSERT(column_index < stats.column_distinct_count.size());
|
172
|
+
if (column_index < child_stats.column_distinct_count.size()) {
|
173
|
+
proj_stats.column_distinct_count.push_back(child_stats.column_distinct_count.at(column_index));
|
174
|
+
} else {
|
175
|
+
proj_stats.column_distinct_count.push_back(DistinctCount({proj_stats.cardinality, false}));
|
176
|
+
}
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
proj_stats.stats_initialized = true;
|
181
|
+
return proj_stats;
|
182
|
+
}
|
183
|
+
|
184
|
+
RelationStats RelationStatisticsHelper::ExtractDummyScanStats(LogicalDummyScan &dummy_scan, ClientContext &context) {
|
185
|
+
auto stats = RelationStats();
|
186
|
+
idx_t card = dummy_scan.EstimateCardinality(context);
|
187
|
+
stats.cardinality = card;
|
188
|
+
for (idx_t i = 0; i < dummy_scan.GetColumnBindings().size(); i++) {
|
189
|
+
stats.column_distinct_count.push_back(DistinctCount({card, false}));
|
190
|
+
stats.column_names.push_back("dummy_scan_column");
|
191
|
+
}
|
192
|
+
stats.stats_initialized = true;
|
193
|
+
stats.table_name = "dummy scan";
|
194
|
+
return stats;
|
195
|
+
}
|
196
|
+
|
197
|
+
void RelationStatisticsHelper::CopyRelationStats(RelationStats &to, const RelationStats &from) {
|
198
|
+
to.column_distinct_count = from.column_distinct_count;
|
199
|
+
to.column_names = from.column_names;
|
200
|
+
to.cardinality = from.cardinality;
|
201
|
+
to.table_name = from.table_name;
|
202
|
+
to.stats_initialized = from.stats_initialized;
|
203
|
+
}
|
204
|
+
|
205
|
+
RelationStats RelationStatisticsHelper::CombineStatsOfReorderableOperator(vector<ColumnBinding> &bindings,
|
206
|
+
vector<RelationStats> relation_stats) {
|
207
|
+
RelationStats stats;
|
208
|
+
idx_t max_card = 0;
|
209
|
+
for (auto &child_stats : relation_stats) {
|
210
|
+
for (idx_t i = 0; i < child_stats.column_distinct_count.size(); i++) {
|
211
|
+
stats.column_distinct_count.push_back(child_stats.column_distinct_count.at(i));
|
212
|
+
stats.column_names.push_back(child_stats.column_names.at(i));
|
213
|
+
}
|
214
|
+
stats.table_name += "joined with " + child_stats.table_name;
|
215
|
+
max_card = MaxValue(max_card, child_stats.cardinality);
|
216
|
+
}
|
217
|
+
stats.stats_initialized = true;
|
218
|
+
stats.cardinality = max_card;
|
219
|
+
return stats;
|
220
|
+
}
|
221
|
+
|
222
|
+
RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(LogicalOperator &op,
|
223
|
+
vector<RelationStats> child_stats) {
|
224
|
+
D_ASSERT(child_stats.size() == 2);
|
225
|
+
RelationStats ret;
|
226
|
+
idx_t child_1_card = child_stats[0].stats_initialized ? child_stats[0].cardinality : 0;
|
227
|
+
idx_t child_2_card = child_stats[1].stats_initialized ? child_stats[1].cardinality : 0;
|
228
|
+
ret.cardinality = MaxValue(child_1_card, child_2_card);
|
229
|
+
ret.stats_initialized = true;
|
230
|
+
ret.filter_strength = 1;
|
231
|
+
ret.table_name = child_stats[0].table_name + " joined with " + child_stats[1].table_name;
|
232
|
+
for (auto &stats : child_stats) {
|
233
|
+
// MARK joins are nonreorderable. They won't return initialized stats
|
234
|
+
// continue in this case.
|
235
|
+
if (!stats.stats_initialized) {
|
236
|
+
continue;
|
237
|
+
}
|
238
|
+
for (auto &distinct_count : stats.column_distinct_count) {
|
239
|
+
ret.column_distinct_count.push_back(distinct_count);
|
240
|
+
}
|
241
|
+
for (auto &column_name : stats.column_names) {
|
242
|
+
ret.column_names.push_back(column_name);
|
243
|
+
}
|
244
|
+
}
|
245
|
+
return ret;
|
246
|
+
}
|
247
|
+
|
248
|
+
RelationStats RelationStatisticsHelper::ExtractExpressionGetStats(LogicalExpressionGet &expression_get,
|
249
|
+
ClientContext &context) {
|
250
|
+
auto stats = RelationStats();
|
251
|
+
idx_t card = expression_get.EstimateCardinality(context);
|
252
|
+
stats.cardinality = card;
|
253
|
+
for (idx_t i = 0; i < expression_get.GetColumnBindings().size(); i++) {
|
254
|
+
stats.column_distinct_count.push_back(DistinctCount({card, false}));
|
255
|
+
stats.column_names.push_back("expression_get_column");
|
256
|
+
}
|
257
|
+
stats.stats_initialized = true;
|
258
|
+
stats.table_name = "expression_get";
|
259
|
+
return stats;
|
260
|
+
}
|
261
|
+
|
262
|
+
RelationStats RelationStatisticsHelper::ExtractWindowStats(LogicalWindow &window, RelationStats &child_stats) {
|
263
|
+
RelationStats stats;
|
264
|
+
stats.cardinality = child_stats.cardinality;
|
265
|
+
stats.column_distinct_count = child_stats.column_distinct_count;
|
266
|
+
stats.column_names = child_stats.column_names;
|
267
|
+
stats.stats_initialized = true;
|
268
|
+
auto num_child_columns = window.GetColumnBindings().size();
|
269
|
+
|
270
|
+
for (idx_t column_index = child_stats.column_distinct_count.size(); column_index < num_child_columns;
|
271
|
+
column_index++) {
|
272
|
+
stats.column_distinct_count.push_back(DistinctCount({child_stats.cardinality, false}));
|
273
|
+
stats.column_names.push_back("window");
|
274
|
+
}
|
275
|
+
return stats;
|
276
|
+
}
|
277
|
+
|
278
|
+
RelationStats RelationStatisticsHelper::ExtractAggregationStats(LogicalAggregate &aggr, RelationStats &child_stats) {
|
279
|
+
RelationStats stats;
|
280
|
+
// TODO: look at child distinct count to better estimate cardinality.
|
281
|
+
stats.cardinality = child_stats.cardinality;
|
282
|
+
stats.column_distinct_count = child_stats.column_distinct_count;
|
283
|
+
stats.column_names = child_stats.column_names;
|
284
|
+
stats.stats_initialized = true;
|
285
|
+
auto num_child_columns = aggr.GetColumnBindings().size();
|
286
|
+
|
287
|
+
for (idx_t column_index = child_stats.column_distinct_count.size(); column_index < num_child_columns;
|
288
|
+
column_index++) {
|
289
|
+
stats.column_distinct_count.push_back(DistinctCount({child_stats.cardinality, false}));
|
290
|
+
stats.column_names.push_back("aggregate");
|
291
|
+
}
|
292
|
+
return stats;
|
293
|
+
}
|
294
|
+
|
295
|
+
idx_t RelationStatisticsHelper::InspectConjunctionAND(idx_t cardinality, idx_t column_index,
|
296
|
+
ConjunctionAndFilter &filter, BaseStatistics &base_stats) {
|
297
|
+
auto cardinality_after_filters = cardinality;
|
298
|
+
for (auto &child_filter : filter.child_filters) {
|
299
|
+
if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
|
300
|
+
continue;
|
301
|
+
}
|
302
|
+
auto &comparison_filter = child_filter->Cast<ConstantFilter>();
|
303
|
+
if (comparison_filter.comparison_type != ExpressionType::COMPARE_EQUAL) {
|
304
|
+
continue;
|
305
|
+
}
|
306
|
+
auto column_count = base_stats.GetDistinctCount();
|
307
|
+
auto filtered_card = cardinality;
|
308
|
+
// column_count = 0 when there is no column count (i.e parquet scans)
|
309
|
+
if (column_count > 0) {
|
310
|
+
// we want the ceil of cardinality/column_count. We also want to avoid compiler errors
|
311
|
+
filtered_card = (cardinality + column_count - 1) / column_count;
|
312
|
+
cardinality_after_filters = filtered_card;
|
313
|
+
}
|
314
|
+
}
|
315
|
+
return cardinality_after_filters;
|
316
|
+
}
|
317
|
+
|
318
|
+
// TODO: Currently only simple AND filters are pushed into table scans.
|
319
|
+
// When OR filters are pushed this function can be added
|
320
|
+
// idx_t RelationStatisticsHelper::InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter
|
321
|
+
// &filter,
|
322
|
+
// BaseStatistics &base_stats) {
|
323
|
+
// auto has_equality_filter = false;
|
324
|
+
// auto cardinality_after_filters = cardinality;
|
325
|
+
// for (auto &child_filter : filter.child_filters) {
|
326
|
+
// if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
|
327
|
+
// continue;
|
328
|
+
// }
|
329
|
+
// auto &comparison_filter = child_filter->Cast<ConstantFilter>();
|
330
|
+
// if (comparison_filter.comparison_type == ExpressionType::COMPARE_EQUAL) {
|
331
|
+
// auto column_count = base_stats.GetDistinctCount();
|
332
|
+
// auto increment = MaxValue<idx_t>(((cardinality + column_count - 1) / column_count), 1);
|
333
|
+
// if (has_equality_filter) {
|
334
|
+
// cardinality_after_filters += increment;
|
335
|
+
// } else {
|
336
|
+
// cardinality_after_filters = increment;
|
337
|
+
// }
|
338
|
+
// has_equality_filter = true;
|
339
|
+
// }
|
340
|
+
// if (child_filter->filter_type == TableFilterType::CONJUNCTION_AND) {
|
341
|
+
// auto &and_filter = child_filter->Cast<ConjunctionAndFilter>();
|
342
|
+
// cardinality_after_filters = RelationStatisticsHelper::InspectConjunctionAND(
|
343
|
+
// cardinality_after_filters, column_index, and_filter, base_stats);
|
344
|
+
// continue;
|
345
|
+
// }
|
346
|
+
// }
|
347
|
+
// D_ASSERT(cardinality_after_filters > 0);
|
348
|
+
// return cardinality_after_filters;
|
349
|
+
//}
|
350
|
+
|
351
|
+
} // namespace duckdb
|
@@ -458,6 +458,8 @@ bool Executor::ExecutionIsFinished() {
|
|
458
458
|
}
|
459
459
|
|
460
460
|
PendingExecutionResult Executor::ExecuteTask() {
|
461
|
+
// Only executor should return NO_TASKS_AVAILABLE
|
462
|
+
D_ASSERT(execution_result != PendingExecutionResult::NO_TASKS_AVAILABLE);
|
461
463
|
if (execution_result != PendingExecutionResult::RESULT_NOT_READY) {
|
462
464
|
return execution_result;
|
463
465
|
}
|
@@ -468,6 +470,10 @@ PendingExecutionResult Executor::ExecuteTask() {
|
|
468
470
|
if (!task) {
|
469
471
|
scheduler.GetTaskFromProducer(*producer, task);
|
470
472
|
}
|
473
|
+
if (!task && !HasError()) {
|
474
|
+
// there are no tasks to be scheduled and there are tasks blocked
|
475
|
+
return PendingExecutionResult::NO_TASKS_AVAILABLE;
|
476
|
+
}
|
471
477
|
if (task) {
|
472
478
|
// if we have a task, partially process it
|
473
479
|
auto result = task->Execute(TaskExecutionMode::PROCESS_PARTIAL);
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#include "concurrentqueue.h"
|
10
10
|
#include "duckdb/common/thread.hpp"
|
11
11
|
#include "lightweightsemaphore.h"
|
12
|
+
#include <thread>
|
12
13
|
#else
|
13
14
|
#include <queue>
|
14
15
|
#endif
|
@@ -256,6 +257,12 @@ void TaskScheduler::Signal(idx_t n) {
|
|
256
257
|
#endif
|
257
258
|
}
|
258
259
|
|
260
|
+
void TaskScheduler::YieldThread() {
|
261
|
+
#ifndef DUCKDB_NO_THREADS
|
262
|
+
std::this_thread::yield();
|
263
|
+
#endif
|
264
|
+
}
|
265
|
+
|
259
266
|
void TaskScheduler::SetThreadsInternal(int32_t n) {
|
260
267
|
#ifndef DUCKDB_NO_THREADS
|
261
268
|
if (threads.size() == idx_t(n - 1)) {
|
@@ -8,6 +8,7 @@
|
|
8
8
|
#include "duckdb/parser/statement/extension_statement.hpp"
|
9
9
|
#include "duckdb/parser/statement/select_statement.hpp"
|
10
10
|
#include "duckdb/parser/statement/update_statement.hpp"
|
11
|
+
#include "duckdb/parser/group_by_node.hpp"
|
11
12
|
#include "duckdb/parser/tableref/expressionlistref.hpp"
|
12
13
|
#include "duckdb/parser/transformer.hpp"
|
13
14
|
#include "parser/parser.hpp"
|
@@ -340,6 +341,22 @@ vector<unique_ptr<ParsedExpression>> Parser::ParseExpressionList(const string &s
|
|
340
341
|
return std::move(select_node.select_list);
|
341
342
|
}
|
342
343
|
|
344
|
+
GroupByNode Parser::ParseGroupByList(const string &group_by, ParserOptions options) {
|
345
|
+
// construct a mock SELECT query with our group_by expressions
|
346
|
+
string mock_query = StringUtil::Format("SELECT 42 GROUP BY %s", group_by);
|
347
|
+
// parse the query
|
348
|
+
Parser parser(options);
|
349
|
+
parser.ParseQuery(mock_query);
|
350
|
+
// check the result
|
351
|
+
if (parser.statements.size() != 1 || parser.statements[0]->type != StatementType::SELECT_STATEMENT) {
|
352
|
+
throw ParserException("Expected a single SELECT statement");
|
353
|
+
}
|
354
|
+
auto &select = parser.statements[0]->Cast<SelectStatement>();
|
355
|
+
D_ASSERT(select.node->type == QueryNodeType::SELECT_NODE);
|
356
|
+
auto &select_node = select.node->Cast<SelectNode>();
|
357
|
+
return std::move(select_node.groups);
|
358
|
+
}
|
359
|
+
|
343
360
|
vector<OrderByNode> Parser::ParseOrderList(const string &select_list, ParserOptions options) {
|
344
361
|
// construct a mock query
|
345
362
|
string mock_query = "SELECT * FROM tbl ORDER BY " + select_list;
|
@@ -351,9 +368,7 @@ vector<OrderByNode> Parser::ParseOrderList(const string &select_list, ParserOpti
|
|
351
368
|
throw ParserException("Expected a single SELECT statement");
|
352
369
|
}
|
353
370
|
auto &select = parser.statements[0]->Cast<SelectStatement>();
|
354
|
-
|
355
|
-
throw ParserException("Expected a single SELECT node");
|
356
|
-
}
|
371
|
+
D_ASSERT(select.node->type == QueryNodeType::SELECT_NODE);
|
357
372
|
auto &select_node = select.node->Cast<SelectNode>();
|
358
373
|
if (select_node.modifiers.empty() || select_node.modifiers[0]->type != ResultModifierType::ORDER_MODIFIER ||
|
359
374
|
select_node.modifiers.size() != 1) {
|
@@ -161,9 +161,9 @@ void PivotColumnEntry::Serialize(Serializer &serializer) const {
|
|
161
161
|
}
|
162
162
|
|
163
163
|
void PivotColumnEntry::FormatSerialize(FormatSerializer &serializer) const {
|
164
|
-
serializer.WriteProperty("values", values);
|
165
|
-
serializer.WriteOptionalProperty("star_expr", star_expr);
|
166
|
-
serializer.WriteProperty("alias", alias);
|
164
|
+
serializer.WriteProperty(100, "values", values);
|
165
|
+
serializer.WriteOptionalProperty(101, "star_expr", star_expr);
|
166
|
+
serializer.WriteProperty(102, "alias", alias);
|
167
167
|
}
|
168
168
|
|
169
169
|
PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
|
@@ -178,9 +178,9 @@ PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
|
|
178
178
|
|
179
179
|
PivotColumnEntry PivotColumnEntry::FormatDeserialize(FormatDeserializer &source) {
|
180
180
|
PivotColumnEntry result;
|
181
|
-
source.ReadProperty("values", result.values);
|
182
|
-
source.ReadOptionalProperty("star_expr", result.star_expr);
|
183
|
-
source.ReadProperty("alias", result.alias);
|
181
|
+
source.ReadProperty(100, "values", result.values);
|
182
|
+
source.ReadOptionalProperty(101, "star_expr", result.star_expr);
|
183
|
+
source.ReadProperty(102, "alias", result.alias);
|
184
184
|
return result;
|
185
185
|
}
|
186
186
|
|
@@ -46,7 +46,7 @@ BoundStatement Binder::Bind(ExecuteStatement &stmt) {
|
|
46
46
|
// catalog was modified or statement does not have clear types: rebind the statement before running the execute
|
47
47
|
Planner prepared_planner(context);
|
48
48
|
for (auto &pair : bind_values) {
|
49
|
-
prepared_planner.parameter_data.emplace(pair);
|
49
|
+
prepared_planner.parameter_data.emplace(std::make_pair(pair.first, BoundParameterData(pair.second)));
|
50
50
|
}
|
51
51
|
prepared = prepared_planner.PrepareSQLStatement(entry->second->unbound_statement->Copy());
|
52
52
|
rebound_plan = std::move(prepared_planner.plan);
|
@@ -106,24 +106,24 @@ unique_ptr<Expression> BoundAggregateExpression::Deserialize(ExpressionDeseriali
|
|
106
106
|
|
107
107
|
void BoundAggregateExpression::FormatSerialize(FormatSerializer &serializer) const {
|
108
108
|
Expression::FormatSerialize(serializer);
|
109
|
-
serializer.WriteProperty("return_type", return_type);
|
110
|
-
serializer.WriteProperty("children", children);
|
109
|
+
serializer.WriteProperty(200, "return_type", return_type);
|
110
|
+
serializer.WriteProperty(201, "children", children);
|
111
111
|
FunctionSerializer::FormatSerialize(serializer, function, bind_info.get());
|
112
|
-
serializer.WriteProperty("aggregate_type", aggr_type);
|
113
|
-
serializer.WriteOptionalProperty("filter", filter);
|
114
|
-
serializer.WriteOptionalProperty("order_bys", order_bys);
|
112
|
+
serializer.WriteProperty(203, "aggregate_type", aggr_type);
|
113
|
+
serializer.WriteOptionalProperty(204, "filter", filter);
|
114
|
+
serializer.WriteOptionalProperty(205, "order_bys", order_bys);
|
115
115
|
}
|
116
116
|
|
117
117
|
unique_ptr<Expression> BoundAggregateExpression::FormatDeserialize(FormatDeserializer &deserializer) {
|
118
|
-
auto return_type = deserializer.ReadProperty<LogicalType>("return_type");
|
119
|
-
auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>("children");
|
118
|
+
auto return_type = deserializer.ReadProperty<LogicalType>(200, "return_type");
|
119
|
+
auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>(201, "children");
|
120
120
|
auto entry = FunctionSerializer::FormatDeserialize<AggregateFunction, AggregateFunctionCatalogEntry>(
|
121
121
|
deserializer, CatalogType::AGGREGATE_FUNCTION_ENTRY, children);
|
122
|
-
auto aggregate_type = deserializer.ReadProperty<AggregateType>("aggregate_type");
|
123
|
-
auto filter = deserializer.ReadOptionalProperty<unique_ptr<Expression>>("filter");
|
122
|
+
auto aggregate_type = deserializer.ReadProperty<AggregateType>(203, "aggregate_type");
|
123
|
+
auto filter = deserializer.ReadOptionalProperty<unique_ptr<Expression>>(204, "filter");
|
124
124
|
auto result = make_uniq<BoundAggregateExpression>(std::move(entry.first), std::move(children), std::move(filter),
|
125
125
|
std::move(entry.second), aggregate_type);
|
126
|
-
deserializer.ReadOptionalProperty("order_bys", result->order_bys);
|
126
|
+
deserializer.ReadOptionalProperty(205, "order_bys", result->order_bys);
|
127
127
|
return std::move(result);
|
128
128
|
}
|
129
129
|
|
@@ -97,20 +97,20 @@ unique_ptr<Expression> BoundFunctionExpression::Deserialize(ExpressionDeserializ
|
|
97
97
|
|
98
98
|
void BoundFunctionExpression::FormatSerialize(FormatSerializer &serializer) const {
|
99
99
|
Expression::FormatSerialize(serializer);
|
100
|
-
serializer.WriteProperty("return_type", return_type);
|
101
|
-
serializer.WriteProperty("children", children);
|
100
|
+
serializer.WriteProperty(200, "return_type", return_type);
|
101
|
+
serializer.WriteProperty(201, "children", children);
|
102
102
|
FunctionSerializer::FormatSerialize(serializer, function, bind_info.get());
|
103
|
-
serializer.WriteProperty("is_operator", is_operator);
|
103
|
+
serializer.WriteProperty(202, "is_operator", is_operator);
|
104
104
|
}
|
105
105
|
|
106
106
|
unique_ptr<Expression> BoundFunctionExpression::FormatDeserialize(FormatDeserializer &deserializer) {
|
107
|
-
auto return_type = deserializer.ReadProperty<LogicalType>("return_type");
|
108
|
-
auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>("children");
|
107
|
+
auto return_type = deserializer.ReadProperty<LogicalType>(200, "return_type");
|
108
|
+
auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>(201, "children");
|
109
109
|
auto entry = FunctionSerializer::FormatDeserialize<ScalarFunction, ScalarFunctionCatalogEntry>(
|
110
110
|
deserializer, CatalogType::SCALAR_FUNCTION_ENTRY, children);
|
111
111
|
auto result = make_uniq<BoundFunctionExpression>(std::move(return_type), std::move(entry.first),
|
112
112
|
std::move(children), std::move(entry.second));
|
113
|
-
deserializer.ReadProperty("is_operator", result->is_operator);
|
113
|
+
deserializer.ReadProperty(202, "is_operator", result->is_operator);
|
114
114
|
return std::move(result);
|
115
115
|
}
|
116
116
|
|
@@ -164,28 +164,28 @@ unique_ptr<Expression> BoundWindowExpression::Deserialize(ExpressionDeserializat
|
|
164
164
|
|
165
165
|
void BoundWindowExpression::FormatSerialize(FormatSerializer &serializer) const {
|
166
166
|
Expression::FormatSerialize(serializer);
|
167
|
-
serializer.WriteProperty("return_type", return_type);
|
168
|
-
serializer.WriteProperty("children", children);
|
167
|
+
serializer.WriteProperty(200, "return_type", return_type);
|
168
|
+
serializer.WriteProperty(201, "children", children);
|
169
169
|
if (type == ExpressionType::WINDOW_AGGREGATE) {
|
170
170
|
D_ASSERT(aggregate);
|
171
171
|
FunctionSerializer::FormatSerialize(serializer, *aggregate, bind_info.get());
|
172
172
|
}
|
173
|
-
serializer.WriteProperty("partitions", partitions);
|
174
|
-
serializer.WriteProperty("orders", orders);
|
175
|
-
serializer.WriteOptionalProperty("filters", filter_expr);
|
176
|
-
serializer.WriteProperty("ignore_nulls", ignore_nulls);
|
177
|
-
serializer.WriteProperty("start", start);
|
178
|
-
serializer.WriteProperty("end", end);
|
179
|
-
serializer.WriteOptionalProperty("start_expr", start_expr);
|
180
|
-
serializer.WriteOptionalProperty("end_expr", end_expr);
|
181
|
-
serializer.WriteOptionalProperty("offset_expr", offset_expr);
|
182
|
-
serializer.WriteOptionalProperty("default_expr", default_expr);
|
173
|
+
serializer.WriteProperty(202, "partitions", partitions);
|
174
|
+
serializer.WriteProperty(203, "orders", orders);
|
175
|
+
serializer.WriteOptionalProperty(204, "filters", filter_expr);
|
176
|
+
serializer.WriteProperty(205, "ignore_nulls", ignore_nulls);
|
177
|
+
serializer.WriteProperty(206, "start", start);
|
178
|
+
serializer.WriteProperty(207, "end", end);
|
179
|
+
serializer.WriteOptionalProperty(208, "start_expr", start_expr);
|
180
|
+
serializer.WriteOptionalProperty(209, "end_expr", end_expr);
|
181
|
+
serializer.WriteOptionalProperty(210, "offset_expr", offset_expr);
|
182
|
+
serializer.WriteOptionalProperty(211, "default_expr", default_expr);
|
183
183
|
}
|
184
184
|
|
185
185
|
unique_ptr<Expression> BoundWindowExpression::FormatDeserialize(FormatDeserializer &deserializer) {
|
186
186
|
auto expression_type = deserializer.Get<ExpressionType>();
|
187
|
-
auto return_type = deserializer.ReadProperty<LogicalType>("return_type");
|
188
|
-
auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>("children");
|
187
|
+
auto return_type = deserializer.ReadProperty<LogicalType>(200, "return_type");
|
188
|
+
auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>(201, "children");
|
189
189
|
unique_ptr<AggregateFunction> aggregate;
|
190
190
|
unique_ptr<FunctionData> bind_info;
|
191
191
|
if (expression_type == ExpressionType::WINDOW_AGGREGATE) {
|
@@ -196,16 +196,16 @@ unique_ptr<Expression> BoundWindowExpression::FormatDeserialize(FormatDeserializ
|
|
196
196
|
}
|
197
197
|
auto result =
|
198
198
|
make_uniq<BoundWindowExpression>(expression_type, return_type, std::move(aggregate), std::move(bind_info));
|
199
|
-
deserializer.ReadProperty("partitions", result->partitions);
|
200
|
-
deserializer.ReadProperty("orders", result->orders);
|
201
|
-
deserializer.ReadOptionalProperty("filters", result->filter_expr);
|
202
|
-
deserializer.ReadProperty("ignore_nulls", result->ignore_nulls);
|
203
|
-
deserializer.ReadProperty("start", result->start);
|
204
|
-
deserializer.ReadProperty("end", result->end);
|
205
|
-
deserializer.ReadOptionalProperty("start_expr", result->start_expr);
|
206
|
-
deserializer.ReadOptionalProperty("end_expr", result->end_expr);
|
207
|
-
deserializer.ReadOptionalProperty("offset_expr", result->offset_expr);
|
208
|
-
deserializer.ReadOptionalProperty("default_expr", result->default_expr);
|
199
|
+
deserializer.ReadProperty(202, "partitions", result->partitions);
|
200
|
+
deserializer.ReadProperty(203, "orders", result->orders);
|
201
|
+
deserializer.ReadOptionalProperty(204, "filters", result->filter_expr);
|
202
|
+
deserializer.ReadProperty(205, "ignore_nulls", result->ignore_nulls);
|
203
|
+
deserializer.ReadProperty(206, "start", result->start);
|
204
|
+
deserializer.ReadProperty(207, "end", result->end);
|
205
|
+
deserializer.ReadOptionalProperty(208, "start_expr", result->start_expr);
|
206
|
+
deserializer.ReadOptionalProperty(209, "end_expr", result->end_expr);
|
207
|
+
deserializer.ReadOptionalProperty(210, "offset_expr", result->offset_expr);
|
208
|
+
deserializer.ReadOptionalProperty(211, "default_expr", result->default_expr);
|
209
209
|
return std::move(result);
|
210
210
|
}
|
211
211
|
|
@@ -20,12 +20,12 @@ unique_ptr<LogicalExtensionOperator> LogicalExtensionOperator::Deserialize(Logic
|
|
20
20
|
|
21
21
|
void LogicalExtensionOperator::FormatSerialize(FormatSerializer &serializer) const {
|
22
22
|
LogicalOperator::FormatSerialize(serializer);
|
23
|
-
serializer.WriteProperty("extension_name", GetExtensionName());
|
23
|
+
serializer.WriteProperty(200, "extension_name", GetExtensionName());
|
24
24
|
}
|
25
25
|
|
26
26
|
unique_ptr<LogicalOperator> LogicalExtensionOperator::FormatDeserialize(FormatDeserializer &deserializer) {
|
27
27
|
auto &config = DBConfig::GetConfig(deserializer.Get<ClientContext &>());
|
28
|
-
auto extension_name = deserializer.ReadProperty<string>("extension_name");
|
28
|
+
auto extension_name = deserializer.ReadProperty<string>(200, "extension_name");
|
29
29
|
for (auto &extension : config.operator_extensions) {
|
30
30
|
if (extension->GetName() == extension_name) {
|
31
31
|
return extension->FormatDeserialize(deserializer);
|