duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
  5. package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
  6. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  7. package/src/duckdb/extension/json/json_serializer.cpp +11 -10
  8. package/src/duckdb/extension/json/serialize_json.cpp +44 -44
  9. package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
  10. package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
  11. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  12. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  13. package/src/duckdb/src/common/enum_util.cpp +5 -0
  14. package/src/duckdb/src/common/extra_type_info.cpp +2 -2
  15. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
  16. package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
  17. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  18. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  19. package/src/duckdb/src/common/types/value.cpp +33 -33
  20. package/src/duckdb/src/common/types/vector.cpp +20 -20
  21. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
  22. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
  23. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
  24. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
  25. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  26. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  27. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  28. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  29. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  30. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  31. package/src/duckdb/src/function/table/read_csv.cpp +4 -4
  32. package/src/duckdb/src/function/table/table_scan.cpp +14 -14
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  34. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
  38. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
  39. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
  40. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
  41. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
  42. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  43. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  44. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
  45. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  50. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  51. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  52. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  53. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  54. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  55. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  56. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  57. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  58. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  59. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  60. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  61. package/src/duckdb/src/include/duckdb.h +11 -1
  62. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  63. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  64. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  65. package/src/duckdb/src/main/relation.cpp +4 -4
  66. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  67. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  68. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  70. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  71. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  72. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  73. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  74. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  75. package/src/duckdb/src/parallel/executor.cpp +6 -0
  76. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  77. package/src/duckdb/src/parser/parser.cpp +18 -3
  78. package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
  79. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  80. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
  81. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
  82. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
  83. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
  84. package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
  85. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
  86. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
  87. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
  88. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
  89. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
  90. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
  91. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
  92. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
  93. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
  94. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
  95. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
  96. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
  97. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
  98. package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
  99. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  100. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -0,0 +1,351 @@
1
+ #include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
2
+ #include "duckdb/planner/expression/list.hpp"
3
+ #include "duckdb/planner/operator/list.hpp"
4
+ #include "duckdb/planner/filter/conjunction_filter.hpp"
5
+ #include "duckdb/planner/expression_iterator.hpp"
6
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
7
+ #include "duckdb/function/table/table_scan.hpp"
8
+ #include "duckdb/planner/operator/logical_get.hpp"
9
+ #include "duckdb/storage/data_table.hpp"
10
+ #include "duckdb/planner/filter/constant_filter.hpp"
11
+
12
+ namespace duckdb {
13
+
14
+ static ExpressionBinding GetChildColumnBinding(Expression &expr) {
15
+ auto ret = ExpressionBinding();
16
+ switch (expr.expression_class) {
17
+ case ExpressionClass::BOUND_FUNCTION: {
18
+ // TODO: Other expression classes that can have 0 children?
19
+ auto &func = expr.Cast<BoundFunctionExpression>();
20
+ // no children some sort of gen_random_uuid() or equivalent.
21
+ if (func.children.empty()) {
22
+ ret.found_expression = true;
23
+ ret.expression_is_constant = true;
24
+ return ret;
25
+ }
26
+ break;
27
+ }
28
+ case ExpressionClass::BOUND_COLUMN_REF: {
29
+ ret.found_expression = true;
30
+ auto &new_col_ref = expr.Cast<BoundColumnRefExpression>();
31
+ ret.child_binding = ColumnBinding(new_col_ref.binding.table_index, new_col_ref.binding.column_index);
32
+ return ret;
33
+ }
34
+ case ExpressionClass::BOUND_LAMBDA_REF:
35
+ case ExpressionClass::BOUND_CONSTANT:
36
+ case ExpressionClass::BOUND_DEFAULT:
37
+ case ExpressionClass::BOUND_PARAMETER:
38
+ case ExpressionClass::BOUND_REF:
39
+ ret.found_expression = true;
40
+ ret.expression_is_constant = true;
41
+ return ret;
42
+ default:
43
+ break;
44
+ }
45
+ ExpressionIterator::EnumerateChildren(expr, [&](unique_ptr<Expression> &child) {
46
+ auto recursive_result = GetChildColumnBinding(*child);
47
+ if (recursive_result.found_expression) {
48
+ ret = recursive_result;
49
+ }
50
+ });
51
+ // we didn't find a Bound Column Ref
52
+ return ret;
53
+ }
54
+
55
+ RelationStats RelationStatisticsHelper::ExtractGetStats(LogicalGet &get, ClientContext &context) {
56
+ auto return_stats = RelationStats();
57
+
58
+ auto base_table_cardinality = get.EstimateCardinality(context);
59
+ auto cardinality_after_filters = base_table_cardinality;
60
+ unique_ptr<BaseStatistics> column_statistics;
61
+
62
+ auto table_thing = get.GetTable();
63
+ auto name = string("some table");
64
+ if (table_thing) {
65
+ name = table_thing->name;
66
+ return_stats.table_name = name;
67
+ }
68
+
69
+ // if we can get the catalog table, then our column statistics will be accurate
70
+ // parquet readers etc. will still return statistics, but they initialize distinct column
71
+ // counts to 0.
72
+ // TODO: fix this, some file formats can encode distinct counts, we don't want to rely on
73
+ // getting a catalog table to know that we can use statistics.
74
+ bool have_catalog_table_statistics = false;
75
+ if (get.GetTable()) {
76
+ have_catalog_table_statistics = true;
77
+ }
78
+
79
+ // first push back basic distinct counts for each column (if we have them).
80
+ for (idx_t i = 0; i < get.column_ids.size(); i++) {
81
+ bool have_distinct_count_stats = false;
82
+ if (get.function.statistics) {
83
+ column_statistics = get.function.statistics(context, get.bind_data.get(), get.column_ids[i]);
84
+ if (column_statistics && have_catalog_table_statistics) {
85
+ auto column_distinct_count = DistinctCount({column_statistics->GetDistinctCount(), true});
86
+ return_stats.column_distinct_count.push_back(column_distinct_count);
87
+ return_stats.column_names.push_back(name + "." + get.names.at(get.column_ids.at(i)));
88
+ have_distinct_count_stats = true;
89
+ }
90
+ }
91
+ if (!have_distinct_count_stats) {
92
+ // currently treating the cardinality as the distinct count.
93
+ // the cardinality estimator will update these distinct counts based
94
+ // on the extra columns that are joined on.
95
+ auto column_distinct_count = DistinctCount({cardinality_after_filters, false});
96
+ return_stats.column_distinct_count.push_back(column_distinct_count);
97
+ auto column_name = string("column");
98
+ if (get.column_ids.at(i) < get.names.size()) {
99
+ column_name = get.names.at(get.column_ids.at(i));
100
+ }
101
+ return_stats.column_names.push_back(get.GetName() + "." + column_name);
102
+ }
103
+ }
104
+
105
+ if (!get.table_filters.filters.empty()) {
106
+ column_statistics = nullptr;
107
+ for (auto &it : get.table_filters.filters) {
108
+ if (get.bind_data && get.function.name.compare("seq_scan") == 0) {
109
+ auto &table_scan_bind_data = get.bind_data->Cast<TableScanBindData>();
110
+ column_statistics = get.function.statistics(context, &table_scan_bind_data, it.first);
111
+ }
112
+
113
+ if (column_statistics && it.second->filter_type == TableFilterType::CONJUNCTION_AND) {
114
+ auto &filter = it.second->Cast<ConjunctionAndFilter>();
115
+ idx_t cardinality_with_and_filter = RelationStatisticsHelper::InspectConjunctionAND(
116
+ base_table_cardinality, it.first, filter, *column_statistics);
117
+ cardinality_after_filters = MinValue(cardinality_after_filters, cardinality_with_and_filter);
118
+ }
119
+ }
120
+ // if the above code didn't find an equality filter (i.e country_code = "[us]")
121
+ // and there are other table filters (i.e cost > 50), use default selectivity.
122
+ bool has_equality_filter = (cardinality_after_filters != base_table_cardinality);
123
+ if (!has_equality_filter && !get.table_filters.filters.empty()) {
124
+ cardinality_after_filters =
125
+ MaxValue<idx_t>(base_table_cardinality * RelationStatisticsHelper::DEFAULT_SELECTIVITY, 1);
126
+ }
127
+ if (base_table_cardinality == 0) {
128
+ cardinality_after_filters = 0;
129
+ }
130
+ }
131
+ return_stats.cardinality = cardinality_after_filters;
132
+ // update the estimated cardinality of the get as well.
133
+ // This is not updated during plan reconstruction.
134
+ get.estimated_cardinality = cardinality_after_filters;
135
+ get.has_estimated_cardinality = true;
136
+ D_ASSERT(base_table_cardinality >= cardinality_after_filters);
137
+ return_stats.stats_initialized = true;
138
+ return return_stats;
139
+ }
140
+
141
+ RelationStats RelationStatisticsHelper::ExtractDelimGetStats(LogicalDelimGet &delim_get, ClientContext &context) {
142
+ RelationStats stats;
143
+ stats.table_name = delim_get.GetName();
144
+ idx_t card = delim_get.EstimateCardinality(context);
145
+ stats.cardinality = card;
146
+ stats.stats_initialized = true;
147
+ for (auto &binding : delim_get.GetColumnBindings()) {
148
+ stats.column_distinct_count.push_back(DistinctCount({1, false}));
149
+ stats.column_names.push_back("column" + to_string(binding.column_index));
150
+ }
151
+ return stats;
152
+ }
153
+
154
+ RelationStats RelationStatisticsHelper::ExtractProjectionStats(LogicalProjection &proj, RelationStats &child_stats) {
155
+ auto proj_stats = RelationStats();
156
+ proj_stats.cardinality = child_stats.cardinality;
157
+ proj_stats.table_name = proj.GetName();
158
+ for (auto &expr : proj.expressions) {
159
+ proj_stats.column_names.push_back(expr->GetName());
160
+ auto res = GetChildColumnBinding(*expr);
161
+ D_ASSERT(res.found_expression);
162
+ if (res.expression_is_constant) {
163
+ proj_stats.column_distinct_count.push_back(DistinctCount({1, true}));
164
+ } else {
165
+ auto column_index = res.child_binding.column_index;
166
+ if (column_index >= child_stats.column_distinct_count.size() && expr->ToString() == "count_star()") {
167
+ // only one value for a count star
168
+ proj_stats.column_distinct_count.push_back(DistinctCount({1, true}));
169
+ } else {
170
+ // TODO: add this back in
171
+ // D_ASSERT(column_index < stats.column_distinct_count.size());
172
+ if (column_index < child_stats.column_distinct_count.size()) {
173
+ proj_stats.column_distinct_count.push_back(child_stats.column_distinct_count.at(column_index));
174
+ } else {
175
+ proj_stats.column_distinct_count.push_back(DistinctCount({proj_stats.cardinality, false}));
176
+ }
177
+ }
178
+ }
179
+ }
180
+ proj_stats.stats_initialized = true;
181
+ return proj_stats;
182
+ }
183
+
184
+ RelationStats RelationStatisticsHelper::ExtractDummyScanStats(LogicalDummyScan &dummy_scan, ClientContext &context) {
185
+ auto stats = RelationStats();
186
+ idx_t card = dummy_scan.EstimateCardinality(context);
187
+ stats.cardinality = card;
188
+ for (idx_t i = 0; i < dummy_scan.GetColumnBindings().size(); i++) {
189
+ stats.column_distinct_count.push_back(DistinctCount({card, false}));
190
+ stats.column_names.push_back("dummy_scan_column");
191
+ }
192
+ stats.stats_initialized = true;
193
+ stats.table_name = "dummy scan";
194
+ return stats;
195
+ }
196
+
197
+ void RelationStatisticsHelper::CopyRelationStats(RelationStats &to, const RelationStats &from) {
198
+ to.column_distinct_count = from.column_distinct_count;
199
+ to.column_names = from.column_names;
200
+ to.cardinality = from.cardinality;
201
+ to.table_name = from.table_name;
202
+ to.stats_initialized = from.stats_initialized;
203
+ }
204
+
205
+ RelationStats RelationStatisticsHelper::CombineStatsOfReorderableOperator(vector<ColumnBinding> &bindings,
206
+ vector<RelationStats> relation_stats) {
207
+ RelationStats stats;
208
+ idx_t max_card = 0;
209
+ for (auto &child_stats : relation_stats) {
210
+ for (idx_t i = 0; i < child_stats.column_distinct_count.size(); i++) {
211
+ stats.column_distinct_count.push_back(child_stats.column_distinct_count.at(i));
212
+ stats.column_names.push_back(child_stats.column_names.at(i));
213
+ }
214
+ stats.table_name += "joined with " + child_stats.table_name;
215
+ max_card = MaxValue(max_card, child_stats.cardinality);
216
+ }
217
+ stats.stats_initialized = true;
218
+ stats.cardinality = max_card;
219
+ return stats;
220
+ }
221
+
222
+ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(LogicalOperator &op,
223
+ vector<RelationStats> child_stats) {
224
+ D_ASSERT(child_stats.size() == 2);
225
+ RelationStats ret;
226
+ idx_t child_1_card = child_stats[0].stats_initialized ? child_stats[0].cardinality : 0;
227
+ idx_t child_2_card = child_stats[1].stats_initialized ? child_stats[1].cardinality : 0;
228
+ ret.cardinality = MaxValue(child_1_card, child_2_card);
229
+ ret.stats_initialized = true;
230
+ ret.filter_strength = 1;
231
+ ret.table_name = child_stats[0].table_name + " joined with " + child_stats[1].table_name;
232
+ for (auto &stats : child_stats) {
233
+ // MARK joins are nonreorderable. They won't return initialized stats
234
+ // continue in this case.
235
+ if (!stats.stats_initialized) {
236
+ continue;
237
+ }
238
+ for (auto &distinct_count : stats.column_distinct_count) {
239
+ ret.column_distinct_count.push_back(distinct_count);
240
+ }
241
+ for (auto &column_name : stats.column_names) {
242
+ ret.column_names.push_back(column_name);
243
+ }
244
+ }
245
+ return ret;
246
+ }
247
+
248
+ RelationStats RelationStatisticsHelper::ExtractExpressionGetStats(LogicalExpressionGet &expression_get,
249
+ ClientContext &context) {
250
+ auto stats = RelationStats();
251
+ idx_t card = expression_get.EstimateCardinality(context);
252
+ stats.cardinality = card;
253
+ for (idx_t i = 0; i < expression_get.GetColumnBindings().size(); i++) {
254
+ stats.column_distinct_count.push_back(DistinctCount({card, false}));
255
+ stats.column_names.push_back("expression_get_column");
256
+ }
257
+ stats.stats_initialized = true;
258
+ stats.table_name = "expression_get";
259
+ return stats;
260
+ }
261
+
262
+ RelationStats RelationStatisticsHelper::ExtractWindowStats(LogicalWindow &window, RelationStats &child_stats) {
263
+ RelationStats stats;
264
+ stats.cardinality = child_stats.cardinality;
265
+ stats.column_distinct_count = child_stats.column_distinct_count;
266
+ stats.column_names = child_stats.column_names;
267
+ stats.stats_initialized = true;
268
+ auto num_child_columns = window.GetColumnBindings().size();
269
+
270
+ for (idx_t column_index = child_stats.column_distinct_count.size(); column_index < num_child_columns;
271
+ column_index++) {
272
+ stats.column_distinct_count.push_back(DistinctCount({child_stats.cardinality, false}));
273
+ stats.column_names.push_back("window");
274
+ }
275
+ return stats;
276
+ }
277
+
278
+ RelationStats RelationStatisticsHelper::ExtractAggregationStats(LogicalAggregate &aggr, RelationStats &child_stats) {
279
+ RelationStats stats;
280
+ // TODO: look at child distinct count to better estimate cardinality.
281
+ stats.cardinality = child_stats.cardinality;
282
+ stats.column_distinct_count = child_stats.column_distinct_count;
283
+ stats.column_names = child_stats.column_names;
284
+ stats.stats_initialized = true;
285
+ auto num_child_columns = aggr.GetColumnBindings().size();
286
+
287
+ for (idx_t column_index = child_stats.column_distinct_count.size(); column_index < num_child_columns;
288
+ column_index++) {
289
+ stats.column_distinct_count.push_back(DistinctCount({child_stats.cardinality, false}));
290
+ stats.column_names.push_back("aggregate");
291
+ }
292
+ return stats;
293
+ }
294
+
295
+ idx_t RelationStatisticsHelper::InspectConjunctionAND(idx_t cardinality, idx_t column_index,
296
+ ConjunctionAndFilter &filter, BaseStatistics &base_stats) {
297
+ auto cardinality_after_filters = cardinality;
298
+ for (auto &child_filter : filter.child_filters) {
299
+ if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
300
+ continue;
301
+ }
302
+ auto &comparison_filter = child_filter->Cast<ConstantFilter>();
303
+ if (comparison_filter.comparison_type != ExpressionType::COMPARE_EQUAL) {
304
+ continue;
305
+ }
306
+ auto column_count = base_stats.GetDistinctCount();
307
+ auto filtered_card = cardinality;
308
+ // column_count = 0 when there is no column count (i.e parquet scans)
309
+ if (column_count > 0) {
310
+ // we want the ceil of cardinality/column_count. We also want to avoid compiler errors
311
+ filtered_card = (cardinality + column_count - 1) / column_count;
312
+ cardinality_after_filters = filtered_card;
313
+ }
314
+ }
315
+ return cardinality_after_filters;
316
+ }
317
+
318
+ // TODO: Currently only simple AND filters are pushed into table scans.
319
+ // When OR filters are pushed this function can be added
320
+ // idx_t RelationStatisticsHelper::InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter
321
+ // &filter,
322
+ // BaseStatistics &base_stats) {
323
+ // auto has_equality_filter = false;
324
+ // auto cardinality_after_filters = cardinality;
325
+ // for (auto &child_filter : filter.child_filters) {
326
+ // if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
327
+ // continue;
328
+ // }
329
+ // auto &comparison_filter = child_filter->Cast<ConstantFilter>();
330
+ // if (comparison_filter.comparison_type == ExpressionType::COMPARE_EQUAL) {
331
+ // auto column_count = base_stats.GetDistinctCount();
332
+ // auto increment = MaxValue<idx_t>(((cardinality + column_count - 1) / column_count), 1);
333
+ // if (has_equality_filter) {
334
+ // cardinality_after_filters += increment;
335
+ // } else {
336
+ // cardinality_after_filters = increment;
337
+ // }
338
+ // has_equality_filter = true;
339
+ // }
340
+ // if (child_filter->filter_type == TableFilterType::CONJUNCTION_AND) {
341
+ // auto &and_filter = child_filter->Cast<ConjunctionAndFilter>();
342
+ // cardinality_after_filters = RelationStatisticsHelper::InspectConjunctionAND(
343
+ // cardinality_after_filters, column_index, and_filter, base_stats);
344
+ // continue;
345
+ // }
346
+ // }
347
+ // D_ASSERT(cardinality_after_filters > 0);
348
+ // return cardinality_after_filters;
349
+ //}
350
+
351
+ } // namespace duckdb
@@ -458,6 +458,8 @@ bool Executor::ExecutionIsFinished() {
458
458
  }
459
459
 
460
460
  PendingExecutionResult Executor::ExecuteTask() {
461
+ // Only executor should return NO_TASKS_AVAILABLE
462
+ D_ASSERT(execution_result != PendingExecutionResult::NO_TASKS_AVAILABLE);
461
463
  if (execution_result != PendingExecutionResult::RESULT_NOT_READY) {
462
464
  return execution_result;
463
465
  }
@@ -468,6 +470,10 @@ PendingExecutionResult Executor::ExecuteTask() {
468
470
  if (!task) {
469
471
  scheduler.GetTaskFromProducer(*producer, task);
470
472
  }
473
+ if (!task && !HasError()) {
474
+ // there are no tasks to be scheduled and there are tasks blocked
475
+ return PendingExecutionResult::NO_TASKS_AVAILABLE;
476
+ }
471
477
  if (task) {
472
478
  // if we have a task, partially process it
473
479
  auto result = task->Execute(TaskExecutionMode::PROCESS_PARTIAL);
@@ -9,6 +9,7 @@
9
9
  #include "concurrentqueue.h"
10
10
  #include "duckdb/common/thread.hpp"
11
11
  #include "lightweightsemaphore.h"
12
+ #include <thread>
12
13
  #else
13
14
  #include <queue>
14
15
  #endif
@@ -256,6 +257,12 @@ void TaskScheduler::Signal(idx_t n) {
256
257
  #endif
257
258
  }
258
259
 
260
+ void TaskScheduler::YieldThread() {
261
+ #ifndef DUCKDB_NO_THREADS
262
+ std::this_thread::yield();
263
+ #endif
264
+ }
265
+
259
266
  void TaskScheduler::SetThreadsInternal(int32_t n) {
260
267
  #ifndef DUCKDB_NO_THREADS
261
268
  if (threads.size() == idx_t(n - 1)) {
@@ -8,6 +8,7 @@
8
8
  #include "duckdb/parser/statement/extension_statement.hpp"
9
9
  #include "duckdb/parser/statement/select_statement.hpp"
10
10
  #include "duckdb/parser/statement/update_statement.hpp"
11
+ #include "duckdb/parser/group_by_node.hpp"
11
12
  #include "duckdb/parser/tableref/expressionlistref.hpp"
12
13
  #include "duckdb/parser/transformer.hpp"
13
14
  #include "parser/parser.hpp"
@@ -340,6 +341,22 @@ vector<unique_ptr<ParsedExpression>> Parser::ParseExpressionList(const string &s
340
341
  return std::move(select_node.select_list);
341
342
  }
342
343
 
344
+ GroupByNode Parser::ParseGroupByList(const string &group_by, ParserOptions options) {
345
+ // construct a mock SELECT query with our group_by expressions
346
+ string mock_query = StringUtil::Format("SELECT 42 GROUP BY %s", group_by);
347
+ // parse the query
348
+ Parser parser(options);
349
+ parser.ParseQuery(mock_query);
350
+ // check the result
351
+ if (parser.statements.size() != 1 || parser.statements[0]->type != StatementType::SELECT_STATEMENT) {
352
+ throw ParserException("Expected a single SELECT statement");
353
+ }
354
+ auto &select = parser.statements[0]->Cast<SelectStatement>();
355
+ D_ASSERT(select.node->type == QueryNodeType::SELECT_NODE);
356
+ auto &select_node = select.node->Cast<SelectNode>();
357
+ return std::move(select_node.groups);
358
+ }
359
+
343
360
  vector<OrderByNode> Parser::ParseOrderList(const string &select_list, ParserOptions options) {
344
361
  // construct a mock query
345
362
  string mock_query = "SELECT * FROM tbl ORDER BY " + select_list;
@@ -351,9 +368,7 @@ vector<OrderByNode> Parser::ParseOrderList(const string &select_list, ParserOpti
351
368
  throw ParserException("Expected a single SELECT statement");
352
369
  }
353
370
  auto &select = parser.statements[0]->Cast<SelectStatement>();
354
- if (select.node->type != QueryNodeType::SELECT_NODE) {
355
- throw ParserException("Expected a single SELECT node");
356
- }
371
+ D_ASSERT(select.node->type == QueryNodeType::SELECT_NODE);
357
372
  auto &select_node = select.node->Cast<SelectNode>();
358
373
  if (select_node.modifiers.empty() || select_node.modifiers[0]->type != ResultModifierType::ORDER_MODIFIER ||
359
374
  select_node.modifiers.size() != 1) {
@@ -161,9 +161,9 @@ void PivotColumnEntry::Serialize(Serializer &serializer) const {
161
161
  }
162
162
 
163
163
  void PivotColumnEntry::FormatSerialize(FormatSerializer &serializer) const {
164
- serializer.WriteProperty("values", values);
165
- serializer.WriteOptionalProperty("star_expr", star_expr);
166
- serializer.WriteProperty("alias", alias);
164
+ serializer.WriteProperty(100, "values", values);
165
+ serializer.WriteOptionalProperty(101, "star_expr", star_expr);
166
+ serializer.WriteProperty(102, "alias", alias);
167
167
  }
168
168
 
169
169
  PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
@@ -178,9 +178,9 @@ PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
178
178
 
179
179
  PivotColumnEntry PivotColumnEntry::FormatDeserialize(FormatDeserializer &source) {
180
180
  PivotColumnEntry result;
181
- source.ReadProperty("values", result.values);
182
- source.ReadOptionalProperty("star_expr", result.star_expr);
183
- source.ReadProperty("alias", result.alias);
181
+ source.ReadProperty(100, "values", result.values);
182
+ source.ReadOptionalProperty(101, "star_expr", result.star_expr);
183
+ source.ReadProperty(102, "alias", result.alias);
184
184
  return result;
185
185
  }
186
186
 
@@ -46,7 +46,7 @@ BoundStatement Binder::Bind(ExecuteStatement &stmt) {
46
46
  // catalog was modified or statement does not have clear types: rebind the statement before running the execute
47
47
  Planner prepared_planner(context);
48
48
  for (auto &pair : bind_values) {
49
- prepared_planner.parameter_data.emplace(pair);
49
+ prepared_planner.parameter_data.emplace(std::make_pair(pair.first, BoundParameterData(pair.second)));
50
50
  }
51
51
  prepared = prepared_planner.PrepareSQLStatement(entry->second->unbound_statement->Copy());
52
52
  rebound_plan = std::move(prepared_planner.plan);
@@ -106,24 +106,24 @@ unique_ptr<Expression> BoundAggregateExpression::Deserialize(ExpressionDeseriali
106
106
 
107
107
  void BoundAggregateExpression::FormatSerialize(FormatSerializer &serializer) const {
108
108
  Expression::FormatSerialize(serializer);
109
- serializer.WriteProperty("return_type", return_type);
110
- serializer.WriteProperty("children", children);
109
+ serializer.WriteProperty(200, "return_type", return_type);
110
+ serializer.WriteProperty(201, "children", children);
111
111
  FunctionSerializer::FormatSerialize(serializer, function, bind_info.get());
112
- serializer.WriteProperty("aggregate_type", aggr_type);
113
- serializer.WriteOptionalProperty("filter", filter);
114
- serializer.WriteOptionalProperty("order_bys", order_bys);
112
+ serializer.WriteProperty(203, "aggregate_type", aggr_type);
113
+ serializer.WriteOptionalProperty(204, "filter", filter);
114
+ serializer.WriteOptionalProperty(205, "order_bys", order_bys);
115
115
  }
116
116
 
117
117
  unique_ptr<Expression> BoundAggregateExpression::FormatDeserialize(FormatDeserializer &deserializer) {
118
- auto return_type = deserializer.ReadProperty<LogicalType>("return_type");
119
- auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>("children");
118
+ auto return_type = deserializer.ReadProperty<LogicalType>(200, "return_type");
119
+ auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>(201, "children");
120
120
  auto entry = FunctionSerializer::FormatDeserialize<AggregateFunction, AggregateFunctionCatalogEntry>(
121
121
  deserializer, CatalogType::AGGREGATE_FUNCTION_ENTRY, children);
122
- auto aggregate_type = deserializer.ReadProperty<AggregateType>("aggregate_type");
123
- auto filter = deserializer.ReadOptionalProperty<unique_ptr<Expression>>("filter");
122
+ auto aggregate_type = deserializer.ReadProperty<AggregateType>(203, "aggregate_type");
123
+ auto filter = deserializer.ReadOptionalProperty<unique_ptr<Expression>>(204, "filter");
124
124
  auto result = make_uniq<BoundAggregateExpression>(std::move(entry.first), std::move(children), std::move(filter),
125
125
  std::move(entry.second), aggregate_type);
126
- deserializer.ReadOptionalProperty("order_bys", result->order_bys);
126
+ deserializer.ReadOptionalProperty(205, "order_bys", result->order_bys);
127
127
  return std::move(result);
128
128
  }
129
129
 
@@ -97,20 +97,20 @@ unique_ptr<Expression> BoundFunctionExpression::Deserialize(ExpressionDeserializ
97
97
 
98
98
  void BoundFunctionExpression::FormatSerialize(FormatSerializer &serializer) const {
99
99
  Expression::FormatSerialize(serializer);
100
- serializer.WriteProperty("return_type", return_type);
101
- serializer.WriteProperty("children", children);
100
+ serializer.WriteProperty(200, "return_type", return_type);
101
+ serializer.WriteProperty(201, "children", children);
102
102
  FunctionSerializer::FormatSerialize(serializer, function, bind_info.get());
103
- serializer.WriteProperty("is_operator", is_operator);
103
+ serializer.WriteProperty(202, "is_operator", is_operator);
104
104
  }
105
105
 
106
106
  unique_ptr<Expression> BoundFunctionExpression::FormatDeserialize(FormatDeserializer &deserializer) {
107
- auto return_type = deserializer.ReadProperty<LogicalType>("return_type");
108
- auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>("children");
107
+ auto return_type = deserializer.ReadProperty<LogicalType>(200, "return_type");
108
+ auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>(201, "children");
109
109
  auto entry = FunctionSerializer::FormatDeserialize<ScalarFunction, ScalarFunctionCatalogEntry>(
110
110
  deserializer, CatalogType::SCALAR_FUNCTION_ENTRY, children);
111
111
  auto result = make_uniq<BoundFunctionExpression>(std::move(return_type), std::move(entry.first),
112
112
  std::move(children), std::move(entry.second));
113
- deserializer.ReadProperty("is_operator", result->is_operator);
113
+ deserializer.ReadProperty(202, "is_operator", result->is_operator);
114
114
  return std::move(result);
115
115
  }
116
116
 
@@ -164,28 +164,28 @@ unique_ptr<Expression> BoundWindowExpression::Deserialize(ExpressionDeserializat
164
164
 
165
165
  void BoundWindowExpression::FormatSerialize(FormatSerializer &serializer) const {
166
166
  Expression::FormatSerialize(serializer);
167
- serializer.WriteProperty("return_type", return_type);
168
- serializer.WriteProperty("children", children);
167
+ serializer.WriteProperty(200, "return_type", return_type);
168
+ serializer.WriteProperty(201, "children", children);
169
169
  if (type == ExpressionType::WINDOW_AGGREGATE) {
170
170
  D_ASSERT(aggregate);
171
171
  FunctionSerializer::FormatSerialize(serializer, *aggregate, bind_info.get());
172
172
  }
173
- serializer.WriteProperty("partitions", partitions);
174
- serializer.WriteProperty("orders", orders);
175
- serializer.WriteOptionalProperty("filters", filter_expr);
176
- serializer.WriteProperty("ignore_nulls", ignore_nulls);
177
- serializer.WriteProperty("start", start);
178
- serializer.WriteProperty("end", end);
179
- serializer.WriteOptionalProperty("start_expr", start_expr);
180
- serializer.WriteOptionalProperty("end_expr", end_expr);
181
- serializer.WriteOptionalProperty("offset_expr", offset_expr);
182
- serializer.WriteOptionalProperty("default_expr", default_expr);
173
+ serializer.WriteProperty(202, "partitions", partitions);
174
+ serializer.WriteProperty(203, "orders", orders);
175
+ serializer.WriteOptionalProperty(204, "filters", filter_expr);
176
+ serializer.WriteProperty(205, "ignore_nulls", ignore_nulls);
177
+ serializer.WriteProperty(206, "start", start);
178
+ serializer.WriteProperty(207, "end", end);
179
+ serializer.WriteOptionalProperty(208, "start_expr", start_expr);
180
+ serializer.WriteOptionalProperty(209, "end_expr", end_expr);
181
+ serializer.WriteOptionalProperty(210, "offset_expr", offset_expr);
182
+ serializer.WriteOptionalProperty(211, "default_expr", default_expr);
183
183
  }
184
184
 
185
185
  unique_ptr<Expression> BoundWindowExpression::FormatDeserialize(FormatDeserializer &deserializer) {
186
186
  auto expression_type = deserializer.Get<ExpressionType>();
187
- auto return_type = deserializer.ReadProperty<LogicalType>("return_type");
188
- auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>("children");
187
+ auto return_type = deserializer.ReadProperty<LogicalType>(200, "return_type");
188
+ auto children = deserializer.ReadProperty<vector<unique_ptr<Expression>>>(201, "children");
189
189
  unique_ptr<AggregateFunction> aggregate;
190
190
  unique_ptr<FunctionData> bind_info;
191
191
  if (expression_type == ExpressionType::WINDOW_AGGREGATE) {
@@ -196,16 +196,16 @@ unique_ptr<Expression> BoundWindowExpression::FormatDeserialize(FormatDeserializ
196
196
  }
197
197
  auto result =
198
198
  make_uniq<BoundWindowExpression>(expression_type, return_type, std::move(aggregate), std::move(bind_info));
199
- deserializer.ReadProperty("partitions", result->partitions);
200
- deserializer.ReadProperty("orders", result->orders);
201
- deserializer.ReadOptionalProperty("filters", result->filter_expr);
202
- deserializer.ReadProperty("ignore_nulls", result->ignore_nulls);
203
- deserializer.ReadProperty("start", result->start);
204
- deserializer.ReadProperty("end", result->end);
205
- deserializer.ReadOptionalProperty("start_expr", result->start_expr);
206
- deserializer.ReadOptionalProperty("end_expr", result->end_expr);
207
- deserializer.ReadOptionalProperty("offset_expr", result->offset_expr);
208
- deserializer.ReadOptionalProperty("default_expr", result->default_expr);
199
+ deserializer.ReadProperty(202, "partitions", result->partitions);
200
+ deserializer.ReadProperty(203, "orders", result->orders);
201
+ deserializer.ReadOptionalProperty(204, "filters", result->filter_expr);
202
+ deserializer.ReadProperty(205, "ignore_nulls", result->ignore_nulls);
203
+ deserializer.ReadProperty(206, "start", result->start);
204
+ deserializer.ReadProperty(207, "end", result->end);
205
+ deserializer.ReadOptionalProperty(208, "start_expr", result->start_expr);
206
+ deserializer.ReadOptionalProperty(209, "end_expr", result->end_expr);
207
+ deserializer.ReadOptionalProperty(210, "offset_expr", result->offset_expr);
208
+ deserializer.ReadOptionalProperty(211, "default_expr", result->default_expr);
209
209
  return std::move(result);
210
210
  }
211
211
 
@@ -20,12 +20,12 @@ unique_ptr<LogicalExtensionOperator> LogicalExtensionOperator::Deserialize(Logic
20
20
 
21
21
  void LogicalExtensionOperator::FormatSerialize(FormatSerializer &serializer) const {
22
22
  LogicalOperator::FormatSerialize(serializer);
23
- serializer.WriteProperty("extension_name", GetExtensionName());
23
+ serializer.WriteProperty(200, "extension_name", GetExtensionName());
24
24
  }
25
25
 
26
26
  unique_ptr<LogicalOperator> LogicalExtensionOperator::FormatDeserialize(FormatDeserializer &deserializer) {
27
27
  auto &config = DBConfig::GetConfig(deserializer.Get<ClientContext &>());
28
- auto extension_name = deserializer.ReadProperty<string>("extension_name");
28
+ auto extension_name = deserializer.ReadProperty<string>(200, "extension_name");
29
29
  for (auto &extension : config.operator_extensions) {
30
30
  if (extension->GetName() == extension_name) {
31
31
  return extension->FormatDeserialize(deserializer);