duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -15,7 +15,7 @@ namespace std {
|
|
15
15
|
template <>
|
16
16
|
struct hash<duckdb::JoinNode> {
|
17
17
|
inline string operator()(const duckdb::JoinNode &join_node) const {
|
18
|
-
return join_node.set
|
18
|
+
return join_node.set.ToString();
|
19
19
|
}
|
20
20
|
};
|
21
21
|
} // namespace std
|
@@ -93,15 +93,16 @@ static unique_ptr<LogicalOperator> PushFilter(unique_ptr<LogicalOperator> node,
|
|
93
93
|
return node;
|
94
94
|
}
|
95
95
|
|
96
|
-
bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
|
97
|
-
LogicalOperator
|
96
|
+
bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
|
97
|
+
vector<reference<LogicalOperator>> &filter_operators,
|
98
|
+
optional_ptr<LogicalOperator> parent) {
|
98
99
|
LogicalOperator *op = &input_op;
|
99
100
|
while (op->children.size() == 1 &&
|
100
101
|
(op->type != LogicalOperatorType::LOGICAL_PROJECTION &&
|
101
102
|
op->type != LogicalOperatorType::LOGICAL_EXPRESSION_GET && op->type != LogicalOperatorType::LOGICAL_GET)) {
|
102
103
|
if (op->type == LogicalOperatorType::LOGICAL_FILTER) {
|
103
104
|
// extract join conditions from filter
|
104
|
-
filter_operators.push_back(op);
|
105
|
+
filter_operators.push_back(*op);
|
105
106
|
}
|
106
107
|
if (op->type == LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY ||
|
107
108
|
op->type == LogicalOperatorType::LOGICAL_WINDOW) {
|
@@ -124,7 +125,7 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
124
125
|
auto &join = op->Cast<LogicalComparisonJoin>();
|
125
126
|
if (join.join_type == JoinType::INNER) {
|
126
127
|
// extract join conditions from inner join
|
127
|
-
filter_operators.push_back(op);
|
128
|
+
filter_operators.push_back(*op);
|
128
129
|
} else {
|
129
130
|
// non-inner join, not reorderable yet
|
130
131
|
non_reorderable_operation = true;
|
@@ -174,7 +175,7 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
174
175
|
unordered_set<idx_t> bindings;
|
175
176
|
LogicalJoin::GetTableReferences(*op, bindings);
|
176
177
|
// now create the relation that refers to all these bindings
|
177
|
-
auto relation = make_uniq<SingleJoinRelation>(
|
178
|
+
auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
|
178
179
|
auto relation_id = relations.size();
|
179
180
|
// Add binding information from the nonreorderable join to this relation.
|
180
181
|
for (idx_t it : bindings) {
|
@@ -194,11 +195,10 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
194
195
|
bool can_reorder_right = ExtractJoinRelations(*op->children[1], filter_operators, op);
|
195
196
|
return can_reorder_left && can_reorder_right;
|
196
197
|
}
|
197
|
-
|
198
198
|
case LogicalOperatorType::LOGICAL_EXPRESSION_GET: {
|
199
199
|
// base table scan, add to set of relations
|
200
200
|
auto &get = op->Cast<LogicalExpressionGet>();
|
201
|
-
auto relation = make_uniq<SingleJoinRelation>(
|
201
|
+
auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
|
202
202
|
//! make sure the optimizer has knowledge of the exact column bindings as well.
|
203
203
|
relation_mapping[get.table_index] = relations.size();
|
204
204
|
relations.push_back(std::move(relation));
|
@@ -207,7 +207,7 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
207
207
|
case LogicalOperatorType::LOGICAL_DUMMY_SCAN: {
|
208
208
|
// table function call, add to set of relations
|
209
209
|
auto &dummy_scan = op->Cast<LogicalDummyScan>();
|
210
|
-
auto relation = make_uniq<SingleJoinRelation>(
|
210
|
+
auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
|
211
211
|
relation_mapping[dummy_scan.table_index] = relations.size();
|
212
212
|
relations.push_back(std::move(relation));
|
213
213
|
return true;
|
@@ -215,13 +215,13 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
215
215
|
case LogicalOperatorType::LOGICAL_GET:
|
216
216
|
case LogicalOperatorType::LOGICAL_PROJECTION: {
|
217
217
|
auto table_index = op->GetTableIndex()[0];
|
218
|
-
auto relation = make_uniq<SingleJoinRelation>(
|
218
|
+
auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
|
219
219
|
auto relation_id = relations.size();
|
220
220
|
|
221
221
|
// If the children are empty, operator can't ge a logical get.
|
222
222
|
if (op->children.empty() && op->type == LogicalOperatorType::LOGICAL_GET) {
|
223
223
|
auto &get = op->Cast<LogicalGet>();
|
224
|
-
cardinality_estimator.AddRelationColumnMapping(
|
224
|
+
cardinality_estimator.AddRelationColumnMapping(get, relation_id);
|
225
225
|
relation_mapping[table_index] = relation_id;
|
226
226
|
relations.push_back(std::move(relation));
|
227
227
|
return true;
|
@@ -230,7 +230,6 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
230
230
|
// we run the join order optimizer within the subquery as well
|
231
231
|
JoinOrderOptimizer optimizer(context);
|
232
232
|
op->children[0] = optimizer.Optimize(std::move(op->children[0]));
|
233
|
-
|
234
233
|
// push one child column binding map back.
|
235
234
|
vector<column_binding_map_t<ColumnBinding>> child_binding_maps;
|
236
235
|
child_binding_maps.emplace_back();
|
@@ -253,26 +252,26 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
|
|
253
252
|
}
|
254
253
|
|
255
254
|
//! Update the exclusion set with all entries in the subgraph
|
256
|
-
static void UpdateExclusionSet(JoinRelationSet
|
257
|
-
for (idx_t i = 0; i < node
|
258
|
-
exclusion_set.insert(node
|
255
|
+
static void UpdateExclusionSet(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
|
256
|
+
for (idx_t i = 0; i < node.count; i++) {
|
257
|
+
exclusion_set.insert(node.relations[i]);
|
259
258
|
}
|
260
259
|
}
|
261
260
|
|
262
261
|
//! Create a new JoinTree node by joining together two previous JoinTree nodes
|
263
|
-
unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet
|
264
|
-
const vector<NeighborInfo
|
265
|
-
JoinNode
|
262
|
+
unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet &set,
|
263
|
+
const vector<reference<NeighborInfo>> &possible_connections,
|
264
|
+
JoinNode &left, JoinNode &right) {
|
266
265
|
// for the hash join we want the right side (build side) to have the smallest cardinality
|
267
266
|
// also just a heuristic but for now...
|
268
267
|
// FIXME: we should probably actually benchmark that as well
|
269
268
|
// FIXME: should consider different join algorithms, should we pick a join algorithm here as well? (probably)
|
270
269
|
double expected_cardinality;
|
271
|
-
NeighborInfo
|
272
|
-
auto plan = plans.find(set);
|
270
|
+
optional_ptr<NeighborInfo> best_connection;
|
271
|
+
auto plan = plans.find(&set);
|
273
272
|
// if we have already calculated an expected cardinality for this set,
|
274
273
|
// just re-use that cardinality
|
275
|
-
if (left
|
274
|
+
if (left.GetCardinality<double>() < right.GetCardinality<double>()) {
|
276
275
|
return CreateJoinTree(set, possible_connections, right, left);
|
277
276
|
}
|
278
277
|
if (plan != plans.end()) {
|
@@ -280,14 +279,14 @@ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
|
|
280
279
|
throw InternalException("No plan: internal error in join order optimizer");
|
281
280
|
}
|
282
281
|
expected_cardinality = plan->second->GetCardinality<double>();
|
283
|
-
best_connection = possible_connections.back();
|
282
|
+
best_connection = &possible_connections.back().get();
|
284
283
|
} else if (possible_connections.empty()) {
|
285
284
|
// cross product
|
286
285
|
expected_cardinality = cardinality_estimator.EstimateCrossProduct(left, right);
|
287
286
|
} else {
|
288
287
|
// normal join, expect foreign key join
|
289
288
|
expected_cardinality = cardinality_estimator.EstimateCardinalityWithSet(set);
|
290
|
-
best_connection = possible_connections.back();
|
289
|
+
best_connection = &possible_connections.back().get();
|
291
290
|
}
|
292
291
|
|
293
292
|
auto cost = CardinalityEstimator::ComputeCost(left, right, expected_cardinality);
|
@@ -296,50 +295,51 @@ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
|
|
296
295
|
return result;
|
297
296
|
}
|
298
297
|
|
299
|
-
bool JoinOrderOptimizer::NodeInFullPlan(JoinNode
|
300
|
-
return join_nodes_in_full_plan.find(node
|
298
|
+
bool JoinOrderOptimizer::NodeInFullPlan(JoinNode &node) {
|
299
|
+
return join_nodes_in_full_plan.find(node.set.ToString()) != join_nodes_in_full_plan.end();
|
301
300
|
}
|
302
301
|
|
303
|
-
void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode
|
304
|
-
if (
|
305
|
-
return;
|
306
|
-
}
|
307
|
-
if (node->set->count == relations.size()) {
|
302
|
+
void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode &node) {
|
303
|
+
if (node.set.count == relations.size()) {
|
308
304
|
join_nodes_in_full_plan.clear();
|
309
305
|
}
|
310
|
-
if (node
|
311
|
-
join_nodes_in_full_plan.insert(node
|
306
|
+
if (node.set.count < relations.size()) {
|
307
|
+
join_nodes_in_full_plan.insert(node.set.ToString());
|
308
|
+
}
|
309
|
+
if (node.left) {
|
310
|
+
UpdateJoinNodesInFullPlan(*node.left);
|
311
|
+
}
|
312
|
+
if (node.right) {
|
313
|
+
UpdateJoinNodesInFullPlan(*node.right);
|
312
314
|
}
|
313
|
-
UpdateJoinNodesInFullPlan(node->left);
|
314
|
-
UpdateJoinNodesInFullPlan(node->right);
|
315
315
|
}
|
316
316
|
|
317
|
-
JoinNode
|
318
|
-
const vector<NeighborInfo
|
317
|
+
JoinNode &JoinOrderOptimizer::EmitPair(JoinRelationSet &left, JoinRelationSet &right,
|
318
|
+
const vector<reference<NeighborInfo>> &info) {
|
319
319
|
// get the left and right join plans
|
320
|
-
auto &left_plan = plans[left];
|
321
|
-
auto &right_plan = plans[right];
|
320
|
+
auto &left_plan = plans[&left];
|
321
|
+
auto &right_plan = plans[&right];
|
322
322
|
if (!left_plan || !right_plan) {
|
323
323
|
throw InternalException("No left or right plan: internal error in join order optimizer");
|
324
324
|
}
|
325
|
-
auto new_set = set_manager.Union(left, right);
|
325
|
+
auto &new_set = set_manager.Union(left, right);
|
326
326
|
// create the join tree based on combining the two plans
|
327
|
-
auto new_plan = CreateJoinTree(new_set, info, left_plan
|
327
|
+
auto new_plan = CreateJoinTree(new_set, info, *left_plan, *right_plan);
|
328
328
|
// check if this plan is the optimal plan we found for this set of relations
|
329
|
-
auto entry = plans.find(new_set);
|
329
|
+
auto entry = plans.find(&new_set);
|
330
330
|
if (entry == plans.end() || new_plan->GetCost() < entry->second->GetCost()) {
|
331
331
|
// the plan is the optimal plan, move it into the dynamic programming tree
|
332
|
-
auto result = new_plan
|
332
|
+
auto &result = *new_plan;
|
333
333
|
|
334
334
|
//! make sure plans are symmetric for cardinality estimation
|
335
335
|
if (entry != plans.end()) {
|
336
|
-
cardinality_estimator.VerifySymmetry(result, entry->second
|
336
|
+
cardinality_estimator.VerifySymmetry(result, *entry->second);
|
337
337
|
}
|
338
338
|
if (full_plan_found &&
|
339
|
-
join_nodes_in_full_plan.find(new_plan->set
|
339
|
+
join_nodes_in_full_plan.find(new_plan->set.ToString()) != join_nodes_in_full_plan.end()) {
|
340
340
|
must_update_full_plan = true;
|
341
341
|
}
|
342
|
-
if (new_set
|
342
|
+
if (new_set.count == relations.size()) {
|
343
343
|
full_plan_found = true;
|
344
344
|
// If we find a full plan, we need to keep track of which nodes are in the full plan.
|
345
345
|
// It's possible the DP algorithm updates one of these nodes, then goes on to solve
|
@@ -355,14 +355,14 @@ JoinNode *JoinOrderOptimizer::EmitPair(JoinRelationSet *left, JoinRelationSet *r
|
|
355
355
|
}
|
356
356
|
|
357
357
|
D_ASSERT(new_plan);
|
358
|
-
plans[new_set] = std::move(new_plan);
|
358
|
+
plans[&new_set] = std::move(new_plan);
|
359
359
|
return result;
|
360
360
|
}
|
361
|
-
return entry->second
|
361
|
+
return *entry->second;
|
362
362
|
}
|
363
363
|
|
364
|
-
bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet
|
365
|
-
const vector<NeighborInfo
|
364
|
+
bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet &left, JoinRelationSet &right,
|
365
|
+
const vector<reference<NeighborInfo>> &info) {
|
366
366
|
pairs++;
|
367
367
|
// If a full plan is created, it's possible a node in the plan gets updated. When this happens, make sure you keep
|
368
368
|
// emitting pairs until you emit another final plan. Another final plan is guaranteed to be produced because of
|
@@ -377,13 +377,13 @@ bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet *left, JoinRelationSet *rig
|
|
377
377
|
return true;
|
378
378
|
}
|
379
379
|
|
380
|
-
bool JoinOrderOptimizer::EmitCSG(JoinRelationSet
|
381
|
-
if (node
|
380
|
+
bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
|
381
|
+
if (node.count == relations.size()) {
|
382
382
|
return true;
|
383
383
|
}
|
384
384
|
// create the exclusion set as everything inside the subgraph AND anything with members BELOW it
|
385
385
|
unordered_set<idx_t> exclusion_set;
|
386
|
-
for (idx_t i = 0; i < node
|
386
|
+
for (idx_t i = 0; i < node.relations[0]; i++) {
|
387
387
|
exclusion_set.insert(i);
|
388
388
|
}
|
389
389
|
UpdateExclusionSet(node, exclusion_set);
|
@@ -401,7 +401,7 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet *node) {
|
|
401
401
|
for (auto neighbor : neighbors) {
|
402
402
|
// since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
|
403
403
|
// (only!) this neighbor, hence we have to do a connectedness check before we can emit it
|
404
|
-
auto neighbor_relation = set_manager.GetJoinRelation(neighbor);
|
404
|
+
auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
|
405
405
|
auto connections = query_graph.GetConnections(node, neighbor_relation);
|
406
406
|
if (!connections.empty()) {
|
407
407
|
if (!TryEmitPair(node, neighbor_relation, connections)) {
|
@@ -415,20 +415,20 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet *node) {
|
|
415
415
|
return true;
|
416
416
|
}
|
417
417
|
|
418
|
-
bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet
|
418
|
+
bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right,
|
419
419
|
unordered_set<idx_t> exclusion_set) {
|
420
420
|
// get the neighbors of the second relation under the exclusion set
|
421
421
|
auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
|
422
422
|
if (neighbors.empty()) {
|
423
423
|
return true;
|
424
424
|
}
|
425
|
-
vector<JoinRelationSet
|
426
|
-
union_sets.
|
425
|
+
vector<reference<JoinRelationSet>> union_sets;
|
426
|
+
union_sets.reserve(neighbors.size());
|
427
427
|
for (idx_t i = 0; i < neighbors.size(); i++) {
|
428
|
-
auto neighbor = set_manager.GetJoinRelation(neighbors[i]);
|
428
|
+
auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
|
429
429
|
// emit the combinations of this node and its neighbors
|
430
|
-
auto combined_set = set_manager.Union(right, neighbor);
|
431
|
-
if (combined_set
|
430
|
+
auto &combined_set = set_manager.Union(right, neighbor);
|
431
|
+
if (combined_set.count > right.count && plans.find(&combined_set) != plans.end()) {
|
432
432
|
auto connections = query_graph.GetConnections(left, combined_set);
|
433
433
|
if (!connections.empty()) {
|
434
434
|
if (!TryEmitPair(left, combined_set, connections)) {
|
@@ -436,7 +436,7 @@ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet *left, JoinRelati
|
|
436
436
|
}
|
437
437
|
}
|
438
438
|
}
|
439
|
-
union_sets
|
439
|
+
union_sets.push_back(combined_set);
|
440
440
|
}
|
441
441
|
// recursively enumerate the sets
|
442
442
|
unordered_set<idx_t> new_exclusion_set = exclusion_set;
|
@@ -450,24 +450,24 @@ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet *left, JoinRelati
|
|
450
450
|
return true;
|
451
451
|
}
|
452
452
|
|
453
|
-
bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet
|
453
|
+
bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
|
454
454
|
// find neighbors of S under the exclusion set
|
455
455
|
auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
|
456
456
|
if (neighbors.empty()) {
|
457
457
|
return true;
|
458
458
|
}
|
459
|
-
vector<JoinRelationSet
|
460
|
-
union_sets.
|
459
|
+
vector<reference<JoinRelationSet>> union_sets;
|
460
|
+
union_sets.reserve(neighbors.size());
|
461
461
|
for (idx_t i = 0; i < neighbors.size(); i++) {
|
462
|
-
auto neighbor = set_manager.GetJoinRelation(neighbors[i]);
|
462
|
+
auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
|
463
463
|
// emit the combinations of this node and its neighbors
|
464
|
-
auto new_set = set_manager.Union(node, neighbor);
|
465
|
-
if (new_set
|
464
|
+
auto &new_set = set_manager.Union(node, neighbor);
|
465
|
+
if (new_set.count > node.count && plans.find(&new_set) != plans.end()) {
|
466
466
|
if (!EmitCSG(new_set)) {
|
467
467
|
return false;
|
468
468
|
}
|
469
469
|
}
|
470
|
-
union_sets
|
470
|
+
union_sets.push_back(new_set);
|
471
471
|
}
|
472
472
|
// recursively enumerate the sets
|
473
473
|
unordered_set<idx_t> new_exclusion_set = exclusion_set;
|
@@ -489,7 +489,7 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
|
|
489
489
|
// we enumerate over all the possible pairs in the neighborhood
|
490
490
|
for (idx_t i = relations.size(); i > 0; i--) {
|
491
491
|
// for every node in the set, we consider it as the start node once
|
492
|
-
auto start_node = set_manager.GetJoinRelation(i - 1);
|
492
|
+
auto &start_node = set_manager.GetJoinRelation(i - 1);
|
493
493
|
// emit the start node
|
494
494
|
if (!EmitCSG(start_node)) {
|
495
495
|
return false;
|
@@ -532,8 +532,7 @@ static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> cu
|
|
532
532
|
// works by first creating all sets with cardinality 1
|
533
533
|
// then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
|
534
534
|
// is greater than all relations in the set.
|
535
|
-
static vector<unordered_set<idx_t>> GetAllNeighborSets(
|
536
|
-
vector<idx_t> neighbors) {
|
535
|
+
static vector<unordered_set<idx_t>> GetAllNeighborSets(unordered_set<idx_t> &exclusion_set, vector<idx_t> neighbors) {
|
537
536
|
vector<unordered_set<idx_t>> ret;
|
538
537
|
sort(neighbors.begin(), neighbors.end());
|
539
538
|
vector<unordered_set<idx_t>> added;
|
@@ -565,25 +564,25 @@ static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set,
|
|
565
564
|
return ret;
|
566
565
|
}
|
567
566
|
|
568
|
-
void JoinOrderOptimizer::UpdateDPTree(JoinNode
|
567
|
+
void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
|
569
568
|
if (!NodeInFullPlan(new_plan)) {
|
570
569
|
// if the new node is not in the full plan, feel free to return
|
571
570
|
// because you won't be updating the full plan.
|
572
571
|
return;
|
573
572
|
}
|
574
|
-
auto new_set = new_plan
|
573
|
+
auto &new_set = new_plan.set;
|
575
574
|
// now update every plan that uses this plan
|
576
575
|
unordered_set<idx_t> exclusion_set;
|
577
|
-
for (idx_t i = 0; i < new_set
|
578
|
-
exclusion_set.insert(new_set
|
576
|
+
for (idx_t i = 0; i < new_set.count; i++) {
|
577
|
+
exclusion_set.insert(new_set.relations[i]);
|
579
578
|
}
|
580
579
|
auto neighbors = query_graph.GetNeighbors(new_set, exclusion_set);
|
581
|
-
auto all_neighbors = GetAllNeighborSets(
|
580
|
+
auto all_neighbors = GetAllNeighborSets(exclusion_set, neighbors);
|
582
581
|
for (auto neighbor : all_neighbors) {
|
583
|
-
auto neighbor_relation = set_manager.GetJoinRelation(neighbor);
|
584
|
-
auto combined_set = set_manager.Union(new_set, neighbor_relation);
|
582
|
+
auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
|
583
|
+
auto &combined_set = set_manager.Union(new_set, neighbor_relation);
|
585
584
|
|
586
|
-
auto combined_set_plan = plans.find(combined_set);
|
585
|
+
auto combined_set_plan = plans.find(&combined_set);
|
587
586
|
if (combined_set_plan == plans.end()) {
|
588
587
|
continue;
|
589
588
|
}
|
@@ -592,14 +591,14 @@ void JoinOrderOptimizer::UpdateDPTree(JoinNode *new_plan) {
|
|
592
591
|
auto connections = query_graph.GetConnections(new_set, neighbor_relation);
|
593
592
|
// recurse and update up the tree if the combined set produces a plan with a lower cost
|
594
593
|
// only recurse on neighbor relations that have plans.
|
595
|
-
auto right_plan = plans.find(neighbor_relation);
|
594
|
+
auto right_plan = plans.find(&neighbor_relation);
|
596
595
|
if (right_plan == plans.end()) {
|
597
596
|
continue;
|
598
597
|
}
|
599
|
-
auto updated_plan = EmitPair(new_set, neighbor_relation, connections);
|
598
|
+
auto &updated_plan = EmitPair(new_set, neighbor_relation, connections);
|
600
599
|
// <= because the child node has already been replaced. You need to
|
601
600
|
// replace the parent node as well in this case
|
602
|
-
if (updated_plan
|
601
|
+
if (updated_plan.GetCost() < combined_set_plan_cost) {
|
603
602
|
UpdateDPTree(updated_plan);
|
604
603
|
}
|
605
604
|
}
|
@@ -609,7 +608,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
609
608
|
// at this point, we exited the dynamic programming but did not compute the final join order because it took too
|
610
609
|
// long instead, we use a greedy heuristic to obtain a join ordering now we use Greedy Operator Ordering to
|
611
610
|
// construct the result tree first we start out with all the base relations (the to-be-joined relations)
|
612
|
-
vector<JoinRelationSet
|
611
|
+
vector<reference<JoinRelationSet>> join_relations; // T in the paper
|
613
612
|
for (idx_t i = 0; i < relations.size(); i++) {
|
614
613
|
join_relations.push_back(set_manager.GetJoinRelation(i));
|
615
614
|
}
|
@@ -618,7 +617,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
618
617
|
// smallest cost. This is O(r^2) per step, and every step will reduce the total amount of relations to-be-joined
|
619
618
|
// by 1, so the total cost is O(r^3) in the amount of relations
|
620
619
|
idx_t best_left = 0, best_right = 0;
|
621
|
-
JoinNode
|
620
|
+
optional_ptr<JoinNode> best_connection;
|
622
621
|
for (idx_t i = 0; i < join_relations.size(); i++) {
|
623
622
|
auto left = join_relations[i];
|
624
623
|
for (idx_t j = i + 1; j < join_relations.size(); j++) {
|
@@ -627,7 +626,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
627
626
|
auto connection = query_graph.GetConnections(left, right);
|
628
627
|
if (!connection.empty()) {
|
629
628
|
// we can check the cost of this connection
|
630
|
-
auto node = EmitPair(left, right, connection);
|
629
|
+
auto &node = EmitPair(left, right, connection);
|
631
630
|
|
632
631
|
// update the DP tree in case a plan created by the DP algorithm uses the node
|
633
632
|
// that was potentially just updated by EmitPair. You will get a use-after-free
|
@@ -635,9 +634,9 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
635
634
|
// if node in FullPath, then updateDP tree.
|
636
635
|
UpdateDPTree(node);
|
637
636
|
|
638
|
-
if (!best_connection || node
|
637
|
+
if (!best_connection || node.GetCost() < best_connection->GetCost()) {
|
639
638
|
// best pair found so far
|
640
|
-
best_connection = node;
|
639
|
+
best_connection = &node;
|
641
640
|
best_left = i;
|
642
641
|
best_right = j;
|
643
642
|
}
|
@@ -647,11 +646,11 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
647
646
|
if (!best_connection) {
|
648
647
|
// could not find a connection, but we were not done with finding a completed plan
|
649
648
|
// we have to add a cross product; we add it between the two smallest relations
|
650
|
-
JoinNode
|
649
|
+
optional_ptr<JoinNode> smallest_plans[2];
|
651
650
|
idx_t smallest_index[2];
|
652
651
|
for (idx_t i = 0; i < join_relations.size(); i++) {
|
653
652
|
// get the plan for this relation
|
654
|
-
auto current_plan = plans[join_relations[i]].get();
|
653
|
+
auto current_plan = plans[&join_relations[i].get()].get();
|
655
654
|
// check if the cardinality is smaller than the smallest two found so far
|
656
655
|
for (idx_t j = 0; j < 2; j++) {
|
657
656
|
if (!smallest_plans[j] ||
|
@@ -667,19 +666,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
667
666
|
}
|
668
667
|
D_ASSERT(smallest_plans[0] && smallest_plans[1]);
|
669
668
|
D_ASSERT(smallest_index[0] != smallest_index[1]);
|
670
|
-
auto left = smallest_plans[0]->set;
|
671
|
-
auto right = smallest_plans[1]->set;
|
669
|
+
auto &left = smallest_plans[0]->set;
|
670
|
+
auto &right = smallest_plans[1]->set;
|
672
671
|
// create a cross product edge (i.e. edge with empty filter) between these two sets in the query graph
|
673
672
|
query_graph.CreateEdge(left, right, nullptr);
|
674
673
|
// now emit the pair and continue with the algorithm
|
675
674
|
auto connections = query_graph.GetConnections(left, right);
|
676
675
|
D_ASSERT(!connections.empty());
|
677
676
|
|
678
|
-
best_connection = EmitPair(left, right, connections);
|
677
|
+
best_connection = &EmitPair(left, right, connections);
|
679
678
|
best_left = smallest_index[0];
|
680
679
|
best_right = smallest_index[1];
|
681
680
|
|
682
|
-
UpdateDPTree(best_connection);
|
681
|
+
UpdateDPTree(*best_connection);
|
683
682
|
// the code below assumes best_right > best_left
|
684
683
|
if (best_left > best_right) {
|
685
684
|
std::swap(best_left, best_right);
|
@@ -709,10 +708,10 @@ void JoinOrderOptimizer::GenerateCrossProducts() {
|
|
709
708
|
// generate a set of cross products to combine the currently available plans into a full join plan
|
710
709
|
// we create edges between every relation with a high cost
|
711
710
|
for (idx_t i = 0; i < relations.size(); i++) {
|
712
|
-
auto left = set_manager.GetJoinRelation(i);
|
711
|
+
auto &left = set_manager.GetJoinRelation(i);
|
713
712
|
for (idx_t j = 0; j < relations.size(); j++) {
|
714
713
|
if (i != j) {
|
715
|
-
auto right = set_manager.GetJoinRelation(j);
|
714
|
+
auto &right = set_manager.GetJoinRelation(j);
|
716
715
|
query_graph.CreateEdge(left, right, nullptr);
|
717
716
|
query_graph.CreateEdge(right, left, nullptr);
|
718
717
|
}
|
@@ -723,7 +722,7 @@ void JoinOrderOptimizer::GenerateCrossProducts() {
|
|
723
722
|
static unique_ptr<LogicalOperator> ExtractJoinRelation(SingleJoinRelation &rel) {
|
724
723
|
auto &children = rel.parent->children;
|
725
724
|
for (idx_t i = 0; i < children.size(); i++) {
|
726
|
-
if (children[i].get() == rel.op) {
|
725
|
+
if (children[i].get() == &rel.op) {
|
727
726
|
// found it! take ownership of it from the parent
|
728
727
|
auto result = std::move(children[i]);
|
729
728
|
children.erase(children.begin() + i);
|
@@ -733,39 +732,41 @@ static unique_ptr<LogicalOperator> ExtractJoinRelation(SingleJoinRelation &rel)
|
|
733
732
|
throw Exception("Could not find relation in parent node (?)");
|
734
733
|
}
|
735
734
|
|
736
|
-
|
737
|
-
|
738
|
-
JoinRelationSet
|
739
|
-
JoinRelationSet
|
735
|
+
GenerateJoinRelation JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations,
|
736
|
+
JoinNode &node) {
|
737
|
+
optional_ptr<JoinRelationSet> left_node;
|
738
|
+
optional_ptr<JoinRelationSet> right_node;
|
739
|
+
optional_ptr<JoinRelationSet> result_relation;
|
740
740
|
unique_ptr<LogicalOperator> result_operator;
|
741
|
-
if (node
|
741
|
+
if (node.left && node.right && node.info) {
|
742
742
|
// generate the left and right children
|
743
|
-
auto left = GenerateJoins(extracted_relations, node
|
744
|
-
auto right = GenerateJoins(extracted_relations, node
|
743
|
+
auto left = GenerateJoins(extracted_relations, *node.left);
|
744
|
+
auto right = GenerateJoins(extracted_relations, *node.right);
|
745
745
|
|
746
|
-
if (node
|
746
|
+
if (node.info->filters.empty()) {
|
747
747
|
// no filters, create a cross product
|
748
|
-
result_operator = LogicalCrossProduct::Create(std::move(left.
|
748
|
+
result_operator = LogicalCrossProduct::Create(std::move(left.op), std::move(right.op));
|
749
749
|
} else {
|
750
750
|
// we have filters, create a join node
|
751
751
|
auto join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
|
752
|
-
join->children.push_back(std::move(left.
|
753
|
-
join->children.push_back(std::move(right.
|
752
|
+
join->children.push_back(std::move(left.op));
|
753
|
+
join->children.push_back(std::move(right.op));
|
754
754
|
// set the join conditions from the join node
|
755
|
-
for (auto &
|
755
|
+
for (auto &filter_ref : node.info->filters) {
|
756
|
+
auto &f = filter_ref.get();
|
756
757
|
// extract the filter from the operator it originally belonged to
|
757
|
-
D_ASSERT(filters[f
|
758
|
-
auto condition = std::move(filters[f
|
758
|
+
D_ASSERT(filters[f.filter_index]);
|
759
|
+
auto condition = std::move(filters[f.filter_index]);
|
759
760
|
// now create the actual join condition
|
760
|
-
D_ASSERT((JoinRelationSet::IsSubset(left.
|
761
|
-
JoinRelationSet::IsSubset(right.
|
762
|
-
(JoinRelationSet::IsSubset(left.
|
763
|
-
JoinRelationSet::IsSubset(right.
|
761
|
+
D_ASSERT((JoinRelationSet::IsSubset(left.set, *f.left_set) &&
|
762
|
+
JoinRelationSet::IsSubset(right.set, *f.right_set)) ||
|
763
|
+
(JoinRelationSet::IsSubset(left.set, *f.right_set) &&
|
764
|
+
JoinRelationSet::IsSubset(right.set, *f.left_set)));
|
764
765
|
JoinCondition cond;
|
765
766
|
D_ASSERT(condition->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
|
766
767
|
auto &comparison = condition->Cast<BoundComparisonExpression>();
|
767
768
|
// we need to figure out which side is which by looking at the relations available to us
|
768
|
-
bool invert = !JoinRelationSet::IsSubset(left.
|
769
|
+
bool invert = !JoinRelationSet::IsSubset(left.set, *f.left_set);
|
769
770
|
cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
|
770
771
|
cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
|
771
772
|
cond.comparison = condition->type;
|
@@ -779,18 +780,18 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
|
|
779
780
|
D_ASSERT(!join->conditions.empty());
|
780
781
|
result_operator = std::move(join);
|
781
782
|
}
|
782
|
-
left_node = left.
|
783
|
-
right_node = right.
|
784
|
-
right_node = right.
|
785
|
-
result_relation = set_manager.Union(left_node, right_node);
|
783
|
+
left_node = &left.set;
|
784
|
+
right_node = &right.set;
|
785
|
+
right_node = &right.set;
|
786
|
+
result_relation = &set_manager.Union(*left_node, *right_node);
|
786
787
|
} else {
|
787
788
|
// base node, get the entry from the list of extracted relations
|
788
|
-
D_ASSERT(node
|
789
|
-
D_ASSERT(extracted_relations[node
|
790
|
-
result_relation = node
|
791
|
-
result_operator = std::move(extracted_relations[node
|
789
|
+
D_ASSERT(node.set.count == 1);
|
790
|
+
D_ASSERT(extracted_relations[node.set.relations[0]]);
|
791
|
+
result_relation = &node.set;
|
792
|
+
result_operator = std::move(extracted_relations[node.set.relations[0]]);
|
792
793
|
}
|
793
|
-
result_operator->estimated_props = node
|
794
|
+
result_operator->estimated_props = node.estimated_props->Copy();
|
794
795
|
result_operator->estimated_cardinality = result_operator->estimated_props->GetCardinality<idx_t>();
|
795
796
|
result_operator->has_estimated_cardinality = true;
|
796
797
|
if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER &&
|
@@ -808,16 +809,16 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
|
|
808
809
|
// hence we should push it here
|
809
810
|
for (auto &filter_info : filter_infos) {
|
810
811
|
// check if the filter has already been extracted
|
811
|
-
auto info = filter_info
|
812
|
-
if (filters[info
|
812
|
+
auto &info = *filter_info;
|
813
|
+
if (filters[info.filter_index]) {
|
813
814
|
// now check if the filter is a subset of the current relation
|
814
815
|
// note that infos with an empty relation set are a special case and we do not push them down
|
815
|
-
if (info
|
816
|
-
auto filter = std::move(filters[info
|
816
|
+
if (info.set.count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) {
|
817
|
+
auto filter = std::move(filters[info.filter_index]);
|
817
818
|
// if it is, we can push the filter
|
818
819
|
// we can push it either into a join or as a filter
|
819
820
|
// check if we are in a join or in a base table
|
820
|
-
if (!left_node || !info
|
821
|
+
if (!left_node || !info.left_set) {
|
821
822
|
// base table or non-comparison expression, push it as a filter
|
822
823
|
result_operator = PushFilter(std::move(result_operator), std::move(filter));
|
823
824
|
continue;
|
@@ -826,11 +827,11 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
|
|
826
827
|
// check if the nodes can be split up into left/right
|
827
828
|
bool found_subset = false;
|
828
829
|
bool invert = false;
|
829
|
-
if (JoinRelationSet::IsSubset(left_node, info
|
830
|
-
JoinRelationSet::IsSubset(right_node, info
|
830
|
+
if (JoinRelationSet::IsSubset(*left_node, *info.left_set) &&
|
831
|
+
JoinRelationSet::IsSubset(*right_node, *info.right_set)) {
|
831
832
|
found_subset = true;
|
832
|
-
} else if (JoinRelationSet::IsSubset(right_node, info
|
833
|
-
JoinRelationSet::IsSubset(left_node, info
|
833
|
+
} else if (JoinRelationSet::IsSubset(*right_node, *info.left_set) &&
|
834
|
+
JoinRelationSet::IsSubset(*left_node, *info.right_set)) {
|
834
835
|
invert = true;
|
835
836
|
found_subset = true;
|
836
837
|
}
|
@@ -877,10 +878,10 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
|
|
877
878
|
}
|
878
879
|
}
|
879
880
|
}
|
880
|
-
return
|
881
|
+
return GenerateJoinRelation(*result_relation, std::move(result_operator));
|
881
882
|
}
|
882
883
|
|
883
|
-
unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode
|
884
|
+
unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node) {
|
884
885
|
// now we have to rewrite the plan
|
885
886
|
bool root_is_join = plan->children.size() > 1;
|
886
887
|
|
@@ -898,14 +899,14 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOp
|
|
898
899
|
// check if the filter has already been extracted
|
899
900
|
if (filter) {
|
900
901
|
// if not we need to push it
|
901
|
-
join_tree.
|
902
|
+
join_tree.op = PushFilter(std::move(join_tree.op), std::move(filter));
|
902
903
|
}
|
903
904
|
}
|
904
905
|
|
905
906
|
// find the first join in the relation to know where to place this node
|
906
907
|
if (root_is_join) {
|
907
908
|
// first node is the join, return it immediately
|
908
|
-
return std::move(join_tree.
|
909
|
+
return std::move(join_tree.op);
|
909
910
|
}
|
910
911
|
D_ASSERT(plan->children.size() == 1);
|
911
912
|
// have to move up through the relations
|
@@ -919,7 +920,7 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOp
|
|
919
920
|
op = op->children[0].get();
|
920
921
|
}
|
921
922
|
// have to replace at this node
|
922
|
-
parent->children[0] = std::move(join_tree.
|
923
|
+
parent->children[0] = std::move(join_tree.op);
|
923
924
|
return plan;
|
924
925
|
}
|
925
926
|
|
@@ -935,7 +936,7 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
935
936
|
// group by and this filter cannot be reordered
|
936
937
|
// extract a list of all relations that have to be joined together
|
937
938
|
// and a list of all conditions that is applied to them
|
938
|
-
vector<LogicalOperator
|
939
|
+
vector<reference<LogicalOperator>> filter_operators;
|
939
940
|
if (!ExtractJoinRelations(*op, filter_operators)) {
|
940
941
|
// do not support reordering this type of plan
|
941
942
|
return plan;
|
@@ -947,10 +948,11 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
947
948
|
// now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate
|
948
949
|
// filters in the process
|
949
950
|
expression_set_t filter_set;
|
950
|
-
for (auto &
|
951
|
-
|
952
|
-
|
953
|
-
|
951
|
+
for (auto &filter_op : filter_operators) {
|
952
|
+
auto &f_op = filter_op.get();
|
953
|
+
if (f_op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
|
954
|
+
f_op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
|
955
|
+
auto &join = f_op.Cast<LogicalComparisonJoin>();
|
954
956
|
D_ASSERT(join.join_type == JoinType::INNER);
|
955
957
|
D_ASSERT(join.expressions.empty());
|
956
958
|
for (auto &cond : join.conditions) {
|
@@ -963,26 +965,27 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
963
965
|
}
|
964
966
|
join.conditions.clear();
|
965
967
|
} else {
|
966
|
-
for (auto &expression : f_op
|
968
|
+
for (auto &expression : f_op.expressions) {
|
967
969
|
if (filter_set.find(*expression) == filter_set.end()) {
|
968
970
|
filter_set.insert(*expression);
|
969
971
|
filters.push_back(std::move(expression));
|
970
972
|
}
|
971
973
|
}
|
972
|
-
f_op
|
974
|
+
f_op.expressions.clear();
|
973
975
|
}
|
974
976
|
}
|
975
977
|
// create potential edges from the comparisons
|
976
978
|
for (idx_t i = 0; i < filters.size(); i++) {
|
977
979
|
auto &filter = filters[i];
|
978
|
-
auto info = make_uniq<FilterInfo>();
|
979
|
-
auto filter_info = info.get();
|
980
|
-
filter_infos.push_back(std::move(info));
|
981
980
|
// first extract the relation set for the entire filter
|
982
981
|
unordered_set<idx_t> bindings;
|
983
982
|
ExtractBindings(*filter, bindings);
|
984
|
-
|
985
|
-
|
983
|
+
auto &set = set_manager.GetJoinRelation(bindings);
|
984
|
+
|
985
|
+
auto info = make_uniq<FilterInfo>(set, i);
|
986
|
+
auto filter_info = info.get();
|
987
|
+
filter_infos.push_back(std::move(info));
|
988
|
+
|
986
989
|
// now check if it can be used as a join predicate
|
987
990
|
if (filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON) {
|
988
991
|
auto &comparison = filter->Cast<BoundComparisonExpression>();
|
@@ -995,15 +998,15 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
995
998
|
if (!left_bindings.empty() && !right_bindings.empty()) {
|
996
999
|
// both the left and the right side have bindings
|
997
1000
|
// first create the relation sets, if they do not exist
|
998
|
-
filter_info->left_set = set_manager.GetJoinRelation(left_bindings);
|
999
|
-
filter_info->right_set = set_manager.GetJoinRelation(right_bindings);
|
1001
|
+
filter_info->left_set = &set_manager.GetJoinRelation(left_bindings);
|
1002
|
+
filter_info->right_set = &set_manager.GetJoinRelation(right_bindings);
|
1000
1003
|
// we can only create a meaningful edge if the sets are not exactly the same
|
1001
1004
|
if (filter_info->left_set != filter_info->right_set) {
|
1002
1005
|
// check if the sets are disjoint
|
1003
1006
|
if (Disjoint(left_bindings, right_bindings)) {
|
1004
1007
|
// they are disjoint, we only need to create one set of edges in the join graph
|
1005
|
-
query_graph.CreateEdge(filter_info->left_set, filter_info->right_set, filter_info);
|
1006
|
-
query_graph.CreateEdge(filter_info->right_set, filter_info->left_set, filter_info);
|
1008
|
+
query_graph.CreateEdge(*filter_info->left_set, *filter_info->right_set, filter_info);
|
1009
|
+
query_graph.CreateEdge(*filter_info->right_set, *filter_info->left_set, filter_info);
|
1007
1010
|
} else {
|
1008
1011
|
continue;
|
1009
1012
|
}
|
@@ -1019,15 +1022,15 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
1019
1022
|
vector<NodeOp> nodes_ops;
|
1020
1023
|
for (idx_t i = 0; i < relations.size(); i++) {
|
1021
1024
|
auto &rel = *relations[i];
|
1022
|
-
auto node = set_manager.GetJoinRelation(i);
|
1025
|
+
auto &node = set_manager.GetJoinRelation(i);
|
1023
1026
|
nodes_ops.emplace_back(make_uniq<JoinNode>(node, 0), rel.op);
|
1024
1027
|
}
|
1025
1028
|
|
1026
|
-
cardinality_estimator.InitCardinalityEstimatorProps(
|
1029
|
+
cardinality_estimator.InitCardinalityEstimatorProps(nodes_ops, filter_infos);
|
1027
1030
|
|
1028
1031
|
for (auto &node_op : nodes_ops) {
|
1029
1032
|
D_ASSERT(node_op.node);
|
1030
|
-
plans[node_op.node->set] = std::move(node_op.node);
|
1033
|
+
plans[&node_op.node->set] = std::move(node_op.node);
|
1031
1034
|
}
|
1032
1035
|
// now we perform the actual dynamic programming to compute the final result
|
1033
1036
|
SolveJoinOrder();
|
@@ -1037,8 +1040,8 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
1037
1040
|
for (idx_t i = 0; i < relations.size(); i++) {
|
1038
1041
|
bindings.insert(i);
|
1039
1042
|
}
|
1040
|
-
auto total_relation = set_manager.GetJoinRelation(bindings);
|
1041
|
-
auto final_plan = plans.find(total_relation);
|
1043
|
+
auto &total_relation = set_manager.GetJoinRelation(bindings);
|
1044
|
+
auto final_plan = plans.find(&total_relation);
|
1042
1045
|
if (final_plan == plans.end()) {
|
1043
1046
|
// could not find the final plan
|
1044
1047
|
// this should only happen in case the sets are actually disjunct
|
@@ -1051,11 +1054,11 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
|
|
1051
1054
|
//! solve the join order again
|
1052
1055
|
SolveJoinOrder();
|
1053
1056
|
// now we can obtain the final plan!
|
1054
|
-
final_plan = plans.find(total_relation);
|
1057
|
+
final_plan = plans.find(&total_relation);
|
1055
1058
|
D_ASSERT(final_plan != plans.end());
|
1056
1059
|
}
|
1057
1060
|
// now perform the actual reordering
|
1058
|
-
return RewritePlan(std::move(plan), final_plan->second
|
1061
|
+
return RewritePlan(std::move(plan), *final_plan->second);
|
1059
1062
|
}
|
1060
1063
|
|
1061
1064
|
} // namespace duckdb
|