duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -12,40 +12,37 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
|
-
static TableCatalogEntry
|
16
|
-
|
17
|
-
|
18
|
-
}
|
19
|
-
D_ASSERT(op->type == LogicalOperatorType::LOGICAL_GET);
|
20
|
-
auto &get = op->Cast<LogicalGet>();
|
15
|
+
static optional_ptr<TableCatalogEntry> GetCatalogTableEntry(LogicalOperator &op) {
|
16
|
+
D_ASSERT(op.type == LogicalOperatorType::LOGICAL_GET);
|
17
|
+
auto &get = op.Cast<LogicalGet>();
|
21
18
|
TableCatalogEntry *entry = get.GetTable();
|
22
19
|
return entry;
|
23
20
|
}
|
24
21
|
|
25
22
|
// The filter was made on top of a logical sample or other projection,
|
26
23
|
// but no specific columns are referenced. See issue 4978 number 4.
|
27
|
-
bool CardinalityEstimator::EmptyFilter(FilterInfo
|
28
|
-
if (!filter_info
|
24
|
+
bool CardinalityEstimator::EmptyFilter(FilterInfo &filter_info) {
|
25
|
+
if (!filter_info.left_set && !filter_info.right_set) {
|
29
26
|
return true;
|
30
27
|
}
|
31
28
|
return false;
|
32
29
|
}
|
33
30
|
|
34
|
-
void CardinalityEstimator::AddRelationTdom(FilterInfo
|
35
|
-
D_ASSERT(filter_info
|
31
|
+
void CardinalityEstimator::AddRelationTdom(FilterInfo &filter_info) {
|
32
|
+
D_ASSERT(filter_info.set.count >= 1);
|
36
33
|
for (const RelationsToTDom &r2tdom : relations_to_tdoms) {
|
37
34
|
auto &i_set = r2tdom.equivalent_relations;
|
38
|
-
if (i_set.find(filter_info
|
35
|
+
if (i_set.find(filter_info.left_binding) != i_set.end()) {
|
39
36
|
// found an equivalent filter
|
40
37
|
return;
|
41
38
|
}
|
42
39
|
}
|
43
|
-
auto key = ColumnBinding(filter_info
|
40
|
+
auto key = ColumnBinding(filter_info.left_binding.table_index, filter_info.left_binding.column_index);
|
44
41
|
relations_to_tdoms.emplace_back(column_binding_set_t({key}));
|
45
42
|
}
|
46
43
|
|
47
|
-
bool CardinalityEstimator::SingleColumnFilter(FilterInfo
|
48
|
-
if (filter_info
|
44
|
+
bool CardinalityEstimator::SingleColumnFilter(FilterInfo &filter_info) {
|
45
|
+
if (filter_info.left_set && filter_info.right_set) {
|
49
46
|
// Both set
|
50
47
|
return false;
|
51
48
|
}
|
@@ -114,16 +111,16 @@ void CardinalityEstimator::AddColumnToRelationMap(idx_t table_index, idx_t colum
|
|
114
111
|
relation_attributes[table_index].columns.insert(column_index);
|
115
112
|
}
|
116
113
|
|
117
|
-
void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>>
|
114
|
+
void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>> &filter_infos) {
|
118
115
|
// For each filter, we fill keep track of the index of the equivalent relation set
|
119
116
|
// the left and right relation needs to be added to.
|
120
|
-
for (auto &filter :
|
121
|
-
if (SingleColumnFilter(filter
|
117
|
+
for (auto &filter : filter_infos) {
|
118
|
+
if (SingleColumnFilter(*filter)) {
|
122
119
|
// Filter on one relation, (i.e string or range filter on a column).
|
123
120
|
// Grab the first relation and add it to the equivalence_relations
|
124
|
-
AddRelationTdom(filter
|
121
|
+
AddRelationTdom(*filter);
|
125
122
|
continue;
|
126
|
-
} else if (EmptyFilter(filter
|
123
|
+
} else if (EmptyFilter(*filter)) {
|
127
124
|
continue;
|
128
125
|
}
|
129
126
|
D_ASSERT(filter->left_set->count >= 1);
|
@@ -134,14 +131,14 @@ void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>
|
|
134
131
|
}
|
135
132
|
}
|
136
133
|
|
137
|
-
void CardinalityEstimator::VerifySymmetry(JoinNode
|
138
|
-
if (result
|
134
|
+
void CardinalityEstimator::VerifySymmetry(JoinNode &result, JoinNode &entry) {
|
135
|
+
if (result.GetCardinality<double>() != entry.GetCardinality<double>()) {
|
139
136
|
// Currently it's possible that some entries are cartesian joins.
|
140
137
|
// When this is the case, you don't always have symmetry, but
|
141
138
|
// if the cost of the result is less, then just assure the cardinality
|
142
139
|
// is also less, then you have the same effect of symmetry.
|
143
|
-
D_ASSERT(ceil(result
|
144
|
-
floor(result
|
140
|
+
D_ASSERT(ceil(result.GetCardinality<double>()) <= ceil(entry.GetCardinality<double>()) ||
|
141
|
+
floor(result.GetCardinality<double>()) <= floor(entry.GetCardinality<double>()));
|
145
142
|
}
|
146
143
|
}
|
147
144
|
|
@@ -151,29 +148,29 @@ void CardinalityEstimator::InitTotalDomains() {
|
|
151
148
|
relations_to_tdoms.erase(remove_start, relations_to_tdoms.end());
|
152
149
|
}
|
153
150
|
|
154
|
-
double CardinalityEstimator::ComputeCost(JoinNode
|
155
|
-
return expected_cardinality + left
|
151
|
+
double CardinalityEstimator::ComputeCost(JoinNode &left, JoinNode &right, double expected_cardinality) {
|
152
|
+
return expected_cardinality + left.GetCost() + right.GetCost();
|
156
153
|
}
|
157
154
|
|
158
|
-
double CardinalityEstimator::EstimateCrossProduct(const JoinNode
|
155
|
+
double CardinalityEstimator::EstimateCrossProduct(const JoinNode &left, const JoinNode &right) {
|
159
156
|
// need to explicity use double here, otherwise auto converts it to an int, then
|
160
157
|
// there is an autocast in the return.
|
161
|
-
|
162
|
-
|
163
|
-
|
158
|
+
if (left.GetCardinality<double>() >= (NumericLimits<double>::Maximum() / right.GetCardinality<double>())) {
|
159
|
+
return NumericLimits<double>::Maximum();
|
160
|
+
}
|
161
|
+
return left.GetCardinality<double>() * right.GetCardinality<double>();
|
164
162
|
}
|
165
163
|
|
166
|
-
void CardinalityEstimator::AddRelationColumnMapping(LogicalGet
|
167
|
-
for (idx_t it = 0; it < get
|
164
|
+
void CardinalityEstimator::AddRelationColumnMapping(LogicalGet &get, idx_t relation_id) {
|
165
|
+
for (idx_t it = 0; it < get.column_ids.size(); it++) {
|
168
166
|
auto key = ColumnBinding(relation_id, it);
|
169
|
-
auto value = ColumnBinding(get
|
167
|
+
auto value = ColumnBinding(get.table_index, get.column_ids[it]);
|
170
168
|
AddRelationToColumnMapping(key, value);
|
171
169
|
}
|
172
170
|
}
|
173
171
|
|
174
|
-
void UpdateDenom(Subgraph2Denominator
|
175
|
-
relation_2_denom
|
176
|
-
relation_to_tdom->has_tdom_hll ? relation_to_tdom->tdom_hll : relation_to_tdom->tdom_no_hll;
|
172
|
+
void UpdateDenom(Subgraph2Denominator &relation_2_denom, RelationsToTDom &relation_to_tdom) {
|
173
|
+
relation_2_denom.denom *= relation_to_tdom.has_tdom_hll ? relation_to_tdom.tdom_hll : relation_to_tdom.tdom_no_hll;
|
177
174
|
}
|
178
175
|
|
179
176
|
void FindSubgraphMatchAndMerge(Subgraph2Denominator &merge_to, idx_t find_me,
|
@@ -191,12 +188,12 @@ void FindSubgraphMatchAndMerge(Subgraph2Denominator &merge_to, idx_t find_me,
|
|
191
188
|
}
|
192
189
|
}
|
193
190
|
|
194
|
-
double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet
|
191
|
+
double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) {
|
195
192
|
double numerator = 1;
|
196
193
|
unordered_set<idx_t> actual_set;
|
197
|
-
for (idx_t i = 0; i < new_set
|
198
|
-
numerator *= relation_attributes[new_set
|
199
|
-
actual_set.insert(new_set
|
194
|
+
for (idx_t i = 0; i < new_set.count; i++) {
|
195
|
+
numerator *= relation_attributes[new_set.relations[i]].cardinality;
|
196
|
+
actual_set.insert(new_set.relations[i]);
|
200
197
|
}
|
201
198
|
vector<Subgraph2Denominator> subgraphs;
|
202
199
|
bool done = false;
|
@@ -253,7 +250,7 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet *new_set
|
|
253
250
|
// Now insert the right binding and update denominator with the
|
254
251
|
// tdom of the filter
|
255
252
|
it->relations.insert(find_table);
|
256
|
-
UpdateDenom(
|
253
|
+
UpdateDenom(*it, relation_2_tdom);
|
257
254
|
found_match = true;
|
258
255
|
break;
|
259
256
|
}
|
@@ -262,16 +259,16 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet *new_set
|
|
262
259
|
// a connection.
|
263
260
|
if (!found_match) {
|
264
261
|
subgraphs.emplace_back();
|
265
|
-
auto subgraph =
|
266
|
-
subgraph
|
267
|
-
subgraph
|
268
|
-
UpdateDenom(subgraph,
|
262
|
+
auto &subgraph = subgraphs.back();
|
263
|
+
subgraph.relations.insert(filter->left_binding.table_index);
|
264
|
+
subgraph.relations.insert(filter->right_binding.table_index);
|
265
|
+
UpdateDenom(subgraph, relation_2_tdom);
|
269
266
|
}
|
270
267
|
auto remove_start = std::remove_if(subgraphs.begin(), subgraphs.end(),
|
271
268
|
[](Subgraph2Denominator &s) { return s.relations.empty(); });
|
272
269
|
subgraphs.erase(remove_start, subgraphs.end());
|
273
270
|
|
274
|
-
if (subgraphs.size() == 1 && subgraphs.at(0).relations.size() == new_set
|
271
|
+
if (subgraphs.size() == 1 && subgraphs.at(0).relations.size() == new_set.count) {
|
275
272
|
// You have found enough filters to connect the relations. These are guaranteed
|
276
273
|
// to be the filters with the highest Tdoms.
|
277
274
|
done = true;
|
@@ -296,38 +293,38 @@ double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet *new_set
|
|
296
293
|
return numerator / denom;
|
297
294
|
}
|
298
295
|
|
299
|
-
static bool IsLogicalFilter(LogicalOperator
|
300
|
-
return op
|
296
|
+
static bool IsLogicalFilter(LogicalOperator &op) {
|
297
|
+
return op.type == LogicalOperatorType::LOGICAL_FILTER;
|
301
298
|
}
|
302
299
|
|
303
|
-
static LogicalGet
|
304
|
-
LogicalGet
|
305
|
-
switch (op
|
300
|
+
static optional_ptr<LogicalGet> GetLogicalGet(LogicalOperator &op, idx_t table_index = DConstants::INVALID_INDEX) {
|
301
|
+
optional_ptr<LogicalGet> get;
|
302
|
+
switch (op.type) {
|
306
303
|
case LogicalOperatorType::LOGICAL_GET:
|
307
|
-
get = (
|
304
|
+
get = &op.Cast<LogicalGet>();
|
308
305
|
break;
|
309
306
|
case LogicalOperatorType::LOGICAL_FILTER:
|
310
|
-
get = GetLogicalGet(op
|
307
|
+
get = GetLogicalGet(*op.children.at(0), table_index);
|
311
308
|
break;
|
312
309
|
case LogicalOperatorType::LOGICAL_PROJECTION:
|
313
|
-
get = GetLogicalGet(op
|
310
|
+
get = GetLogicalGet(*op.children.at(0), table_index);
|
314
311
|
break;
|
315
312
|
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
316
313
|
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
|
317
|
-
|
314
|
+
auto &join = op.Cast<LogicalComparisonJoin>();
|
318
315
|
// We should never be calling GetLogicalGet without a valid table_index.
|
319
316
|
// We are attempting to get the catalog table for a relation (for statistics/cardinality estimation)
|
320
317
|
// A logical join means there is a non-reorderable relation in the join plan. This means we need
|
321
318
|
// to know the exact table index to return.
|
322
319
|
D_ASSERT(table_index != DConstants::INVALID_INDEX);
|
323
320
|
if (join.join_type == JoinType::MARK || join.join_type == JoinType::LEFT) {
|
324
|
-
auto
|
325
|
-
get = GetLogicalGet(
|
321
|
+
auto &left_child = *join.children.at(0);
|
322
|
+
get = GetLogicalGet(left_child, table_index);
|
326
323
|
if (get && get->table_index == table_index) {
|
327
324
|
return get;
|
328
325
|
}
|
329
|
-
|
330
|
-
get = GetLogicalGet(
|
326
|
+
auto &right_child = *join.children.at(1);
|
327
|
+
get = GetLogicalGet(right_child, table_index);
|
331
328
|
if (get && get->table_index == table_index) {
|
332
329
|
return get;
|
333
330
|
}
|
@@ -369,27 +366,27 @@ bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
|
|
369
366
|
return a.tdom_no_hll > b.tdom_no_hll;
|
370
367
|
}
|
371
368
|
|
372
|
-
void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp>
|
373
|
-
vector<unique_ptr<FilterInfo>>
|
369
|
+
void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> &node_ops,
|
370
|
+
vector<unique_ptr<FilterInfo>> &filter_infos) {
|
374
371
|
InitEquivalentRelations(filter_infos);
|
375
372
|
InitTotalDomains();
|
376
|
-
for (idx_t i = 0; i < node_ops
|
377
|
-
auto join_node =
|
378
|
-
auto op =
|
379
|
-
join_node
|
380
|
-
if (op
|
381
|
-
auto &join = op
|
373
|
+
for (idx_t i = 0; i < node_ops.size(); i++) {
|
374
|
+
auto &join_node = *node_ops[i].node;
|
375
|
+
auto &op = node_ops[i].op;
|
376
|
+
join_node.SetBaseTableCardinality(op.EstimateCardinality(context));
|
377
|
+
if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
|
378
|
+
auto &join = op.Cast<LogicalComparisonJoin>();
|
382
379
|
if (join.join_type == JoinType::LEFT) {
|
383
380
|
// If a base op is a Logical Comparison join it is probably a left join,
|
384
381
|
// so the cost of the larger table is a fine estimate.
|
385
382
|
// TODO: provide better estimates for cost of mark joins
|
386
383
|
// MARK joins are used for anti and semi joins, so the cost can conceivably be
|
387
384
|
// less than the base table cardinality.
|
388
|
-
join_node
|
385
|
+
join_node.SetCost(join_node.GetBaseTableCardinality());
|
389
386
|
}
|
390
|
-
} else if (op
|
387
|
+
} else if (op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
|
391
388
|
// AsOf joins have the cardinality of the LHS
|
392
|
-
join_node
|
389
|
+
join_node.SetCost(join_node.GetBaseTableCardinality());
|
393
390
|
}
|
394
391
|
// Total domains can be affected by filters. So we update base table cardinality first
|
395
392
|
EstimateBaseTableCardinality(join_node, op);
|
@@ -401,15 +398,14 @@ void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> *node_op
|
|
401
398
|
std::sort(relations_to_tdoms.begin(), relations_to_tdoms.end(), SortTdoms);
|
402
399
|
}
|
403
400
|
|
404
|
-
void CardinalityEstimator::UpdateTotalDomains(JoinNode
|
405
|
-
auto relation_id = node
|
406
|
-
relation_attributes[relation_id].cardinality = node
|
401
|
+
void CardinalityEstimator::UpdateTotalDomains(JoinNode &node, LogicalOperator &op) {
|
402
|
+
auto relation_id = node.set.relations[0];
|
403
|
+
relation_attributes[relation_id].cardinality = node.GetCardinality<double>();
|
407
404
|
//! Initialize the distinct count for all columns used in joins with the current relation.
|
408
|
-
idx_t distinct_count = node
|
409
|
-
TableCatalogEntry
|
405
|
+
idx_t distinct_count = node.GetBaseTableCardinality();
|
406
|
+
optional_ptr<TableCatalogEntry> catalog_table;
|
410
407
|
|
411
|
-
|
412
|
-
LogicalGet *get = nullptr;
|
408
|
+
optional_ptr<LogicalGet> get;
|
413
409
|
bool get_updated = true;
|
414
410
|
for (auto &column : relation_attributes[relation_id].columns) {
|
415
411
|
//! for every column used in a filter in the relation, get the distinct count via HLL, or assume it to be
|
@@ -430,7 +426,11 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
|
|
430
426
|
}
|
431
427
|
|
432
428
|
if (get_updated) {
|
433
|
-
|
429
|
+
if (get) {
|
430
|
+
catalog_table = GetCatalogTableEntry(*get);
|
431
|
+
} else {
|
432
|
+
catalog_table = nullptr;
|
433
|
+
}
|
434
434
|
}
|
435
435
|
|
436
436
|
if (catalog_table && actual_binding != relation_column_to_original_column.end()) {
|
@@ -440,26 +440,12 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
|
|
440
440
|
distinct_count = base_stats->GetDistinctCount();
|
441
441
|
}
|
442
442
|
|
443
|
-
// means you have a direct filter on a column. The distinct_count/total domain for the column
|
444
|
-
// should be decreased to match the predicted total domain matching the filter.
|
445
|
-
// We decrease the total domain for all columns in the equivalence set because filter pushdown
|
446
|
-
// will mean all columns are affected.
|
447
|
-
if (direct_filter) {
|
448
|
-
distinct_count = node->GetCardinality<idx_t>();
|
449
|
-
}
|
450
|
-
|
451
443
|
// HLL has estimation error, distinct_count can't be greater than cardinality of the table before filters
|
452
|
-
if (distinct_count > node
|
453
|
-
distinct_count = node
|
444
|
+
if (distinct_count > node.GetBaseTableCardinality()) {
|
445
|
+
distinct_count = node.GetBaseTableCardinality();
|
454
446
|
}
|
455
447
|
} else {
|
456
|
-
|
457
|
-
// with filter effects. Otherwise assume the distinct count is still the cardinality
|
458
|
-
if (direct_filter) {
|
459
|
-
distinct_count = node->GetCardinality<idx_t>();
|
460
|
-
} else {
|
461
|
-
distinct_count = node->GetBaseTableCardinality();
|
462
|
-
}
|
448
|
+
distinct_count = node.GetBaseTableCardinality();
|
463
449
|
}
|
464
450
|
// Update the relation_to_tdom set with the estimated distinct count (or tdom) calculated above
|
465
451
|
for (auto &relation_to_tdom : relations_to_tdoms) {
|
@@ -489,7 +475,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
|
|
489
475
|
}
|
490
476
|
}
|
491
477
|
|
492
|
-
TableFilterSet
|
478
|
+
optional_ptr<TableFilterSet> CardinalityEstimator::GetTableFilters(LogicalOperator &op, idx_t table_index) {
|
493
479
|
auto get = GetLogicalGet(op, table_index);
|
494
480
|
return get ? &get->table_filters : nullptr;
|
495
481
|
}
|
@@ -552,12 +538,12 @@ idx_t CardinalityEstimator::InspectConjunctionOR(idx_t cardinality, idx_t column
|
|
552
538
|
return cardinality_after_filters;
|
553
539
|
}
|
554
540
|
|
555
|
-
idx_t CardinalityEstimator::InspectTableFilters(idx_t cardinality, LogicalOperator
|
541
|
+
idx_t CardinalityEstimator::InspectTableFilters(idx_t cardinality, LogicalOperator &op, TableFilterSet &table_filters,
|
556
542
|
idx_t table_index) {
|
557
543
|
idx_t cardinality_after_filters = cardinality;
|
558
544
|
auto get = GetLogicalGet(op, table_index);
|
559
545
|
unique_ptr<BaseStatistics> column_statistics;
|
560
|
-
for (auto &it : table_filters
|
546
|
+
for (auto &it : table_filters.filters) {
|
561
547
|
column_statistics = nullptr;
|
562
548
|
if (get->bind_data && get->function.name.compare("seq_scan") == 0) {
|
563
549
|
auto &table_scan_bind_data = get->bind_data->Cast<TableScanBindData>();
|
@@ -578,22 +564,22 @@ idx_t CardinalityEstimator::InspectTableFilters(idx_t cardinality, LogicalOperat
|
|
578
564
|
// if the above code didn't find an equality filter (i.e country_code = "[us]")
|
579
565
|
// and there are other table filters, use default selectivity.
|
580
566
|
bool has_equality_filter = (cardinality_after_filters != cardinality);
|
581
|
-
if (!has_equality_filter && !table_filters
|
567
|
+
if (!has_equality_filter && !table_filters.filters.empty()) {
|
582
568
|
cardinality_after_filters = MaxValue<idx_t>(cardinality * DEFAULT_SELECTIVITY, 1);
|
583
569
|
}
|
584
570
|
return cardinality_after_filters;
|
585
571
|
}
|
586
572
|
|
587
|
-
void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode
|
573
|
+
void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalOperator &op) {
|
588
574
|
auto has_logical_filter = IsLogicalFilter(op);
|
589
|
-
D_ASSERT(node
|
590
|
-
auto relation_id = node
|
575
|
+
D_ASSERT(node.set.count == 1);
|
576
|
+
auto relation_id = node.set.relations[0];
|
591
577
|
|
592
578
|
double lowest_card_found = NumericLimits<double>::Maximum();
|
593
579
|
for (auto &column : relation_attributes[relation_id].columns) {
|
594
|
-
auto card_after_filters = node
|
580
|
+
auto card_after_filters = node.GetBaseTableCardinality();
|
595
581
|
ColumnBinding key = ColumnBinding(relation_id, column);
|
596
|
-
TableFilterSet
|
582
|
+
optional_ptr<TableFilterSet> table_filters;
|
597
583
|
auto actual_binding = relation_column_to_original_column.find(key);
|
598
584
|
if (actual_binding != relation_column_to_original_column.end()) {
|
599
585
|
table_filters = GetTableFilters(op, actual_binding->second.table_index);
|
@@ -601,7 +587,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode *node, LogicalO
|
|
601
587
|
|
602
588
|
if (table_filters) {
|
603
589
|
double inspect_result =
|
604
|
-
(double)InspectTableFilters(card_after_filters, op, table_filters, actual_binding->second.table_index);
|
590
|
+
(double)InspectTableFilters(card_after_filters, op, *table_filters, actual_binding->second.table_index);
|
605
591
|
card_after_filters = MinValue(inspect_result, (double)card_after_filters);
|
606
592
|
}
|
607
593
|
if (has_logical_filter) {
|
@@ -609,7 +595,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode *node, LogicalO
|
|
609
595
|
}
|
610
596
|
lowest_card_found = MinValue(card_after_filters, lowest_card_found);
|
611
597
|
}
|
612
|
-
node
|
598
|
+
node.SetEstimatedCardinality(lowest_card_found);
|
613
599
|
}
|
614
600
|
|
615
601
|
} // namespace duckdb
|
@@ -6,14 +6,14 @@
|
|
6
6
|
|
7
7
|
namespace duckdb {
|
8
8
|
|
9
|
-
JoinNode::JoinNode(JoinRelationSet
|
9
|
+
JoinNode::JoinNode(JoinRelationSet &set, const double base_cardinality)
|
10
10
|
: set(set), info(nullptr), has_filter(false), left(nullptr), right(nullptr), base_cardinality(base_cardinality) {
|
11
11
|
estimated_props = make_uniq<EstimatedProperties>(base_cardinality, 0);
|
12
12
|
}
|
13
13
|
|
14
|
-
JoinNode::JoinNode(JoinRelationSet
|
14
|
+
JoinNode::JoinNode(JoinRelationSet &set, optional_ptr<NeighborInfo> info, JoinNode &left, JoinNode &right,
|
15
15
|
const double base_cardinality, double cost)
|
16
|
-
: set(set), info(info), has_filter(false), left(left), right(right), base_cardinality(base_cardinality) {
|
16
|
+
: set(set), info(info), has_filter(false), left(&left), right(&right), base_cardinality(base_cardinality) {
|
17
17
|
estimated_props = make_uniq<EstimatedProperties>(base_cardinality, cost);
|
18
18
|
}
|
19
19
|
|
@@ -31,7 +31,7 @@ void JoinNode::SetCost(double cost) {
|
|
31
31
|
}
|
32
32
|
|
33
33
|
double JoinNode::GetBaseTableCardinality() {
|
34
|
-
if (set
|
34
|
+
if (set.count > 1) {
|
35
35
|
throw InvalidInputException("Cannot call get base table cardinality on intermediate join node");
|
36
36
|
}
|
37
37
|
return base_cardinality;
|
@@ -46,11 +46,8 @@ void JoinNode::SetEstimatedCardinality(double estimated_card) {
|
|
46
46
|
}
|
47
47
|
|
48
48
|
string JoinNode::ToString() {
|
49
|
-
if (!set) {
|
50
|
-
return "";
|
51
|
-
}
|
52
49
|
string result = "-------------------------------\n";
|
53
|
-
result += set
|
50
|
+
result += set.ToString() + "\n";
|
54
51
|
result += "card = " + to_string(GetCardinality<double>()) + "\n";
|
55
52
|
bool is_cartesian = false;
|
56
53
|
if (left && right) {
|