duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +42 -5
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +3 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +64 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -5,6 +5,7 @@
|
|
5
5
|
#include "duckdb/planner/expression/list.hpp"
|
6
6
|
#include "duckdb/planner/expression_iterator.hpp"
|
7
7
|
#include "duckdb/planner/operator/list.hpp"
|
8
|
+
#include "duckdb/common/queue.hpp"
|
8
9
|
|
9
10
|
#include <algorithm>
|
10
11
|
#include <cmath>
|
@@ -323,6 +324,65 @@ void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode &node) {
|
|
323
324
|
}
|
324
325
|
}
|
325
326
|
|
327
|
+
static vector<unordered_set<idx_t>> AddSuperSets(const vector<unordered_set<idx_t>> ¤t,
|
328
|
+
const vector<idx_t> &all_neighbors) {
|
329
|
+
vector<unordered_set<idx_t>> ret;
|
330
|
+
|
331
|
+
for (const auto &neighbor_set : current) {
|
332
|
+
auto max_val = std::max_element(neighbor_set.begin(), neighbor_set.end());
|
333
|
+
for (const auto &neighbor : all_neighbors) {
|
334
|
+
if (*max_val >= neighbor) {
|
335
|
+
continue;
|
336
|
+
}
|
337
|
+
if (neighbor_set.count(neighbor) == 0) {
|
338
|
+
unordered_set<idx_t> new_set;
|
339
|
+
for (auto &n : neighbor_set) {
|
340
|
+
new_set.insert(n);
|
341
|
+
}
|
342
|
+
new_set.insert(neighbor);
|
343
|
+
ret.push_back(new_set);
|
344
|
+
}
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
return ret;
|
349
|
+
}
|
350
|
+
|
351
|
+
// works by first creating all sets with cardinality 1
|
352
|
+
// then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
|
353
|
+
// is greater than all relations in the set.
|
354
|
+
static vector<unordered_set<idx_t>> GetAllNeighborSets(vector<idx_t> neighbors) {
|
355
|
+
vector<unordered_set<idx_t>> ret;
|
356
|
+
sort(neighbors.begin(), neighbors.end());
|
357
|
+
vector<unordered_set<idx_t>> added;
|
358
|
+
for (auto &neighbor : neighbors) {
|
359
|
+
added.push_back(unordered_set<idx_t>({neighbor}));
|
360
|
+
ret.push_back(unordered_set<idx_t>({neighbor}));
|
361
|
+
}
|
362
|
+
do {
|
363
|
+
added = AddSuperSets(added, neighbors);
|
364
|
+
for (auto &d : added) {
|
365
|
+
ret.push_back(d);
|
366
|
+
}
|
367
|
+
} while (!added.empty());
|
368
|
+
#if DEBUG
|
369
|
+
// drive by test to make sure we have an accurate amount of
|
370
|
+
// subsets, and that each neighbor is in a correct amount
|
371
|
+
// of those subsets.
|
372
|
+
D_ASSERT(ret.size() == pow(2, neighbors.size()) - 1);
|
373
|
+
for (auto &n : neighbors) {
|
374
|
+
idx_t count = 0;
|
375
|
+
for (auto &set : ret) {
|
376
|
+
if (set.count(n) >= 1) {
|
377
|
+
count += 1;
|
378
|
+
}
|
379
|
+
}
|
380
|
+
D_ASSERT(count == pow(2, neighbors.size() - 1));
|
381
|
+
}
|
382
|
+
#endif
|
383
|
+
return ret;
|
384
|
+
}
|
385
|
+
|
326
386
|
JoinNode &JoinOrderOptimizer::EmitPair(JoinRelationSet &left, JoinRelationSet &right,
|
327
387
|
const vector<reference<NeighborInfo>> &info) {
|
328
388
|
// get the left and right join plans
|
@@ -405,8 +465,19 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
|
|
405
465
|
//! Neighbors should be reversed when iterating over them.
|
406
466
|
std::sort(neighbors.begin(), neighbors.end(), std::greater_equal<idx_t>());
|
407
467
|
for (idx_t i = 0; i < neighbors.size() - 1; i++) {
|
408
|
-
D_ASSERT(neighbors[i]
|
468
|
+
D_ASSERT(neighbors[i] > neighbors[i + 1]);
|
469
|
+
}
|
470
|
+
|
471
|
+
// Dphyp paper missiing this.
|
472
|
+
// Because we are traversing in reverse order, we need to add neighbors whose number is smaller than the current
|
473
|
+
// node to exclusion_set
|
474
|
+
// This avoids duplicated enumeration
|
475
|
+
unordered_set<idx_t> new_exclusion_set = exclusion_set;
|
476
|
+
for (idx_t i = 0; i < neighbors.size(); ++i) {
|
477
|
+
D_ASSERT(new_exclusion_set.find(neighbors[i]) == new_exclusion_set.end());
|
478
|
+
new_exclusion_set.insert(neighbors[i]);
|
409
479
|
}
|
480
|
+
|
410
481
|
for (auto neighbor : neighbors) {
|
411
482
|
// since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
|
412
483
|
// (only!) this neighbor, hence we have to do a connectedness check before we can emit it
|
@@ -417,27 +488,35 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
|
|
417
488
|
return false;
|
418
489
|
}
|
419
490
|
}
|
420
|
-
|
491
|
+
|
492
|
+
if (!EnumerateCmpRecursive(node, neighbor_relation, new_exclusion_set)) {
|
421
493
|
return false;
|
422
494
|
}
|
495
|
+
|
496
|
+
new_exclusion_set.erase(neighbor);
|
423
497
|
}
|
424
498
|
return true;
|
425
499
|
}
|
426
500
|
|
427
501
|
bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right,
|
428
|
-
unordered_set<idx_t> exclusion_set) {
|
502
|
+
unordered_set<idx_t> &exclusion_set) {
|
429
503
|
// get the neighbors of the second relation under the exclusion set
|
430
504
|
auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
|
431
505
|
if (neighbors.empty()) {
|
432
506
|
return true;
|
433
507
|
}
|
508
|
+
|
509
|
+
auto all_subset = GetAllNeighborSets(neighbors);
|
434
510
|
vector<reference<JoinRelationSet>> union_sets;
|
435
|
-
union_sets.reserve(
|
436
|
-
for (
|
437
|
-
auto &neighbor = set_manager.GetJoinRelation(
|
511
|
+
union_sets.reserve(all_subset.size());
|
512
|
+
for (const auto &rel_set : all_subset) {
|
513
|
+
auto &neighbor = set_manager.GetJoinRelation(rel_set);
|
438
514
|
// emit the combinations of this node and its neighbors
|
439
515
|
auto &combined_set = set_manager.Union(right, neighbor);
|
440
|
-
|
516
|
+
// If combined_set.count == right.count, This means we found a neighbor that has been present before
|
517
|
+
// This means we didn't set exclusion_set correctly.
|
518
|
+
D_ASSERT(combined_set.count > right.count);
|
519
|
+
if (plans.find(&combined_set) != plans.end()) {
|
441
520
|
auto connections = query_graph.GetConnections(left, combined_set);
|
442
521
|
if (!connections.empty()) {
|
443
522
|
if (!TryEmitPair(left, combined_set, connections)) {
|
@@ -447,11 +526,15 @@ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelati
|
|
447
526
|
}
|
448
527
|
union_sets.push_back(combined_set);
|
449
528
|
}
|
450
|
-
|
529
|
+
|
451
530
|
unordered_set<idx_t> new_exclusion_set = exclusion_set;
|
452
|
-
for (
|
531
|
+
for (const auto &neighbor : neighbors) {
|
532
|
+
new_exclusion_set.insert(neighbor);
|
533
|
+
}
|
534
|
+
|
535
|
+
// recursively enumerate the sets
|
536
|
+
for (idx_t i = 0; i < union_sets.size(); i++) {
|
453
537
|
// updated the set of excluded entries with this neighbor
|
454
|
-
new_exclusion_set.insert(neighbors[i]);
|
455
538
|
if (!EnumerateCmpRecursive(left, union_sets[i], new_exclusion_set)) {
|
456
539
|
return false;
|
457
540
|
}
|
@@ -465,26 +548,30 @@ bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet &node, unordered_
|
|
465
548
|
if (neighbors.empty()) {
|
466
549
|
return true;
|
467
550
|
}
|
551
|
+
|
552
|
+
auto all_subset = GetAllNeighborSets(neighbors);
|
468
553
|
vector<reference<JoinRelationSet>> union_sets;
|
469
|
-
union_sets.reserve(
|
470
|
-
for (
|
471
|
-
auto &neighbor = set_manager.GetJoinRelation(
|
554
|
+
union_sets.reserve(all_subset.size());
|
555
|
+
for (const auto &rel_set : all_subset) {
|
556
|
+
auto &neighbor = set_manager.GetJoinRelation(rel_set);
|
472
557
|
// emit the combinations of this node and its neighbors
|
473
558
|
auto &new_set = set_manager.Union(node, neighbor);
|
474
|
-
|
559
|
+
D_ASSERT(new_set.count > node.count);
|
560
|
+
if (plans.find(&new_set) != plans.end()) {
|
475
561
|
if (!EmitCSG(new_set)) {
|
476
562
|
return false;
|
477
563
|
}
|
478
564
|
}
|
479
565
|
union_sets.push_back(new_set);
|
480
566
|
}
|
481
|
-
|
567
|
+
|
482
568
|
unordered_set<idx_t> new_exclusion_set = exclusion_set;
|
483
|
-
for (
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
569
|
+
for (const auto &neighbor : neighbors) {
|
570
|
+
new_exclusion_set.insert(neighbor);
|
571
|
+
}
|
572
|
+
|
573
|
+
// recursively enumerate the sets
|
574
|
+
for (idx_t i = 0; i < union_sets.size(); i++) {
|
488
575
|
// updated the set of excluded entries with this neighbor
|
489
576
|
if (!EnumerateCSGRecursive(union_sets[i], new_exclusion_set)) {
|
490
577
|
return false;
|
@@ -505,7 +592,7 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
|
|
505
592
|
}
|
506
593
|
// initialize the set of exclusion_set as all the nodes with a number below this
|
507
594
|
unordered_set<idx_t> exclusion_set;
|
508
|
-
for (idx_t j = 0; j < i
|
595
|
+
for (idx_t j = 0; j < i; j++) {
|
509
596
|
exclusion_set.insert(j);
|
510
597
|
}
|
511
598
|
// then we recursively search for neighbors that do not belong to the banned entries
|
@@ -516,63 +603,6 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
|
|
516
603
|
return true;
|
517
604
|
}
|
518
605
|
|
519
|
-
static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> current,
|
520
|
-
const vector<idx_t> &all_neighbors) {
|
521
|
-
vector<unordered_set<idx_t>> ret;
|
522
|
-
for (auto &neighbor : all_neighbors) {
|
523
|
-
for (auto &neighbor_set : current) {
|
524
|
-
auto max_val = std::max_element(neighbor_set.begin(), neighbor_set.end());
|
525
|
-
if (*max_val >= neighbor) {
|
526
|
-
continue;
|
527
|
-
}
|
528
|
-
if (neighbor_set.count(neighbor) == 0) {
|
529
|
-
unordered_set<idx_t> new_set;
|
530
|
-
for (auto &n : neighbor_set) {
|
531
|
-
new_set.insert(n);
|
532
|
-
}
|
533
|
-
new_set.insert(neighbor);
|
534
|
-
ret.push_back(new_set);
|
535
|
-
}
|
536
|
-
}
|
537
|
-
}
|
538
|
-
return ret;
|
539
|
-
}
|
540
|
-
|
541
|
-
// works by first creating all sets with cardinality 1
|
542
|
-
// then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
|
543
|
-
// is greater than all relations in the set.
|
544
|
-
static vector<unordered_set<idx_t>> GetAllNeighborSets(unordered_set<idx_t> &exclusion_set, vector<idx_t> neighbors) {
|
545
|
-
vector<unordered_set<idx_t>> ret;
|
546
|
-
sort(neighbors.begin(), neighbors.end());
|
547
|
-
vector<unordered_set<idx_t>> added;
|
548
|
-
for (auto &neighbor : neighbors) {
|
549
|
-
added.push_back(unordered_set<idx_t>({neighbor}));
|
550
|
-
ret.push_back(unordered_set<idx_t>({neighbor}));
|
551
|
-
}
|
552
|
-
do {
|
553
|
-
added = AddSuperSets(added, neighbors);
|
554
|
-
for (auto &d : added) {
|
555
|
-
ret.push_back(d);
|
556
|
-
}
|
557
|
-
} while (!added.empty());
|
558
|
-
#if DEBUG
|
559
|
-
// drive by test to make sure we have an accurate amount of
|
560
|
-
// subsets, and that each neighbor is in a correct amount
|
561
|
-
// of those subsets.
|
562
|
-
D_ASSERT(ret.size() == pow(2, neighbors.size()) - 1);
|
563
|
-
for (auto &n : neighbors) {
|
564
|
-
idx_t count = 0;
|
565
|
-
for (auto &set : ret) {
|
566
|
-
if (set.count(n) >= 1) {
|
567
|
-
count += 1;
|
568
|
-
}
|
569
|
-
}
|
570
|
-
D_ASSERT(count == pow(2, neighbors.size() - 1));
|
571
|
-
}
|
572
|
-
#endif
|
573
|
-
return ret;
|
574
|
-
}
|
575
|
-
|
576
606
|
void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
|
577
607
|
if (!NodeInFullPlan(new_plan)) {
|
578
608
|
// if the new node is not in the full plan, feel free to return
|
@@ -586,8 +616,8 @@ void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
|
|
586
616
|
exclusion_set.insert(new_set.relations[i]);
|
587
617
|
}
|
588
618
|
auto neighbors = query_graph.GetNeighbors(new_set, exclusion_set);
|
589
|
-
auto all_neighbors = GetAllNeighborSets(
|
590
|
-
for (auto neighbor : all_neighbors) {
|
619
|
+
auto all_neighbors = GetAllNeighborSets(neighbors);
|
620
|
+
for (const auto &neighbor : all_neighbors) {
|
591
621
|
auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
|
592
622
|
auto &combined_set = set_manager.Union(new_set, neighbor_relation);
|
593
623
|
|
@@ -820,8 +850,9 @@ GenerateJoinRelation JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<Logical
|
|
820
850
|
// FILTER on top of GET, add estimated properties to both
|
821
851
|
auto &filter_props = *result_operator->estimated_props;
|
822
852
|
auto &child_operator = *result_operator->children[0];
|
823
|
-
child_operator.estimated_props = make_uniq<EstimatedProperties>(
|
824
|
-
|
853
|
+
child_operator.estimated_props = make_uniq<EstimatedProperties>(filter_props.GetCardinality<double>() /
|
854
|
+
CardinalityEstimator::DEFAULT_SELECTIVITY,
|
855
|
+
filter_props.GetCost<double>());
|
825
856
|
child_operator.estimated_cardinality = child_operator.estimated_props->GetCardinality<idx_t>();
|
826
857
|
child_operator.has_estimated_cardinality = true;
|
827
858
|
}
|
@@ -65,7 +65,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
|
|
65
65
|
return GetJoinRelation(std::move(relations), count);
|
66
66
|
}
|
67
67
|
|
68
|
-
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &bindings) {
|
68
|
+
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(const unordered_set<idx_t> &bindings) {
|
69
69
|
// create a sorted vector of the relations
|
70
70
|
unsafe_unique_array<idx_t> relations = bindings.empty() ? nullptr : make_unsafe_uniq_array<idx_t>(bindings.size());
|
71
71
|
idx_t count = 0;
|
@@ -94,16 +94,12 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati
|
|
94
94
|
relations[count++] = left.relations[i];
|
95
95
|
}
|
96
96
|
break;
|
97
|
-
} else if (left.relations[i] == right.relations[j]) {
|
98
|
-
// equivalent, add only one of the two pairs
|
99
|
-
relations[count++] = left.relations[i];
|
100
|
-
i++;
|
101
|
-
j++;
|
102
97
|
} else if (left.relations[i] < right.relations[j]) {
|
103
98
|
// left is smaller, progress left and add it to the set
|
104
99
|
relations[count++] = left.relations[i];
|
105
100
|
i++;
|
106
101
|
} else {
|
102
|
+
D_ASSERT(left.relations[i] > right.relations[j]);
|
107
103
|
// right is smaller, progress right and add it to the set
|
108
104
|
relations[count++] = right.relations[j];
|
109
105
|
j++;
|
@@ -76,22 +76,30 @@ void QueryGraph::CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optio
|
|
76
76
|
info.neighbors.push_back(std::move(n));
|
77
77
|
}
|
78
78
|
|
79
|
+
void QueryGraph::EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
|
80
|
+
const std::function<bool(NeighborInfo &)> &callback) {
|
81
|
+
|
82
|
+
for (auto &neighbor : info.get().neighbors) {
|
83
|
+
if (callback(*neighbor)) {
|
84
|
+
return;
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
for (idx_t node_index = index; node_index < node.count; ++node_index) {
|
89
|
+
auto iter = info.get().children.find(node.relations[node_index]);
|
90
|
+
if (iter != info.get().children.end()) {
|
91
|
+
reference<QueryEdge> new_info = *iter->second;
|
92
|
+
EnumerateNeighborsDFS(node, new_info, node_index + 1, callback);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
79
97
|
void QueryGraph::EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) {
|
80
98
|
for (idx_t j = 0; j < node.count; j++) {
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
// node not found
|
86
|
-
break;
|
87
|
-
}
|
88
|
-
// check if any subset of the other set is in this sets neighbors
|
89
|
-
info = *entry->second;
|
90
|
-
for (auto &neighbor : info.get().neighbors) {
|
91
|
-
if (callback(*neighbor)) {
|
92
|
-
return;
|
93
|
-
}
|
94
|
-
}
|
99
|
+
auto iter = root.children.find(node.relations[j]);
|
100
|
+
if (iter != root.children.end()) {
|
101
|
+
reference<QueryEdge> new_info = *iter->second;
|
102
|
+
EnumerateNeighborsDFS(node, new_info, j + 1, callback);
|
95
103
|
}
|
96
104
|
}
|
97
105
|
}
|
@@ -1,27 +1,28 @@
|
|
1
1
|
#include "duckdb/optimizer/optimizer.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/execution/column_binding_resolver.hpp"
|
4
|
-
#include "duckdb/execution/expression_executor.hpp"
|
5
4
|
#include "duckdb/main/client_context.hpp"
|
6
5
|
#include "duckdb/main/config.hpp"
|
7
6
|
#include "duckdb/main/query_profiler.hpp"
|
8
7
|
#include "duckdb/optimizer/column_lifetime_optimizer.hpp"
|
9
8
|
#include "duckdb/optimizer/common_aggregate_optimizer.hpp"
|
9
|
+
#include "duckdb/optimizer/compressed_materialization.hpp"
|
10
10
|
#include "duckdb/optimizer/cse_optimizer.hpp"
|
11
11
|
#include "duckdb/optimizer/deliminator.hpp"
|
12
|
-
#include "duckdb/optimizer/unnest_rewriter.hpp"
|
13
12
|
#include "duckdb/optimizer/expression_heuristics.hpp"
|
14
13
|
#include "duckdb/optimizer/filter_pullup.hpp"
|
15
14
|
#include "duckdb/optimizer/filter_pushdown.hpp"
|
16
15
|
#include "duckdb/optimizer/in_clause_rewriter.hpp"
|
17
16
|
#include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
|
18
17
|
#include "duckdb/optimizer/regex_range_filter.hpp"
|
18
|
+
#include "duckdb/optimizer/remove_duplicate_groups.hpp"
|
19
19
|
#include "duckdb/optimizer/remove_unused_columns.hpp"
|
20
20
|
#include "duckdb/optimizer/rule/equal_or_null_simplification.hpp"
|
21
21
|
#include "duckdb/optimizer/rule/in_clause_simplification.hpp"
|
22
22
|
#include "duckdb/optimizer/rule/list.hpp"
|
23
23
|
#include "duckdb/optimizer/statistics_propagator.hpp"
|
24
24
|
#include "duckdb/optimizer/topn_optimizer.hpp"
|
25
|
+
#include "duckdb/optimizer/unnest_rewriter.hpp"
|
25
26
|
#include "duckdb/planner/binder.hpp"
|
26
27
|
#include "duckdb/planner/planner.hpp"
|
27
28
|
|
@@ -52,6 +53,10 @@ Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context),
|
|
52
53
|
#endif
|
53
54
|
}
|
54
55
|
|
56
|
+
ClientContext &Optimizer::GetContext() {
|
57
|
+
return context;
|
58
|
+
}
|
59
|
+
|
55
60
|
void Optimizer::RunOptimizer(OptimizerType type, const std::function<void()> &callback) {
|
56
61
|
auto &config = DBConfig::GetConfig(context);
|
57
62
|
if (config.options.disabled_optimizers.find(type) != config.options.disabled_optimizers.end()) {
|
@@ -73,6 +78,16 @@ void Optimizer::Verify(LogicalOperator &op) {
|
|
73
78
|
|
74
79
|
unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan_p) {
|
75
80
|
Verify(*plan_p);
|
81
|
+
|
82
|
+
switch (plan_p->type) {
|
83
|
+
case LogicalOperatorType::LOGICAL_TRANSACTION:
|
84
|
+
case LogicalOperatorType::LOGICAL_SET:
|
85
|
+
case LogicalOperatorType::LOGICAL_PRAGMA:
|
86
|
+
return plan_p;
|
87
|
+
default:
|
88
|
+
break;
|
89
|
+
}
|
90
|
+
|
76
91
|
this->plan = std::move(plan_p);
|
77
92
|
// first we perform expression rewrites using the ExpressionRewriter
|
78
93
|
// this does not change the logical plan structure, but only simplifies the expression trees
|
@@ -96,8 +111,14 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
96
111
|
});
|
97
112
|
|
98
113
|
RunOptimizer(OptimizerType::IN_CLAUSE, [&]() {
|
99
|
-
InClauseRewriter
|
100
|
-
plan =
|
114
|
+
InClauseRewriter ic_rewriter(context, *this);
|
115
|
+
plan = ic_rewriter.Rewrite(std::move(plan));
|
116
|
+
});
|
117
|
+
|
118
|
+
// removes any redundant DelimGets/DelimJoins
|
119
|
+
RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
|
120
|
+
Deliminator deliminator;
|
121
|
+
plan = deliminator.Optimize(std::move(plan));
|
101
122
|
});
|
102
123
|
|
103
124
|
// then we perform the join ordering optimization
|
@@ -107,12 +128,6 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
107
128
|
plan = optimizer.Optimize(std::move(plan));
|
108
129
|
});
|
109
130
|
|
110
|
-
// removes any redundant DelimGets/DelimJoins
|
111
|
-
RunOptimizer(OptimizerType::DELIMINATOR, [&]() {
|
112
|
-
Deliminator deliminator(context);
|
113
|
-
plan = deliminator.Optimize(std::move(plan));
|
114
|
-
});
|
115
|
-
|
116
131
|
// rewrites UNNESTs in DelimJoins by moving them to the projection
|
117
132
|
RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() {
|
118
133
|
UnnestRewriter unnest_rewriter;
|
@@ -125,10 +140,10 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
125
140
|
unused.VisitOperator(*plan);
|
126
141
|
});
|
127
142
|
|
128
|
-
//
|
129
|
-
RunOptimizer(OptimizerType::
|
130
|
-
|
131
|
-
|
143
|
+
// Remove duplicate groups from aggregates
|
144
|
+
RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() {
|
145
|
+
RemoveDuplicateGroups remove;
|
146
|
+
remove.VisitOperator(*plan);
|
132
147
|
});
|
133
148
|
|
134
149
|
// then we extract common subexpressions inside the different operators
|
@@ -137,16 +152,38 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
137
152
|
cse_optimizer.VisitOperator(*plan);
|
138
153
|
});
|
139
154
|
|
155
|
+
// creates projection maps so unused columns are projected out early
|
156
|
+
RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() {
|
157
|
+
ColumnLifetimeAnalyzer column_lifetime(true);
|
158
|
+
column_lifetime.VisitOperator(*plan);
|
159
|
+
});
|
160
|
+
|
161
|
+
// perform statistics propagation
|
162
|
+
column_binding_map_t<unique_ptr<BaseStatistics>> statistics_map;
|
163
|
+
RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() {
|
164
|
+
StatisticsPropagator propagator(*this);
|
165
|
+
propagator.PropagateStatistics(plan);
|
166
|
+
statistics_map = propagator.GetStatisticsMap();
|
167
|
+
});
|
168
|
+
|
169
|
+
// remove duplicate aggregates
|
140
170
|
RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() {
|
141
171
|
CommonAggregateOptimizer common_aggregate;
|
142
172
|
common_aggregate.VisitOperator(*plan);
|
143
173
|
});
|
144
174
|
|
175
|
+
// creates projection maps so unused columns are projected out early
|
145
176
|
RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() {
|
146
177
|
ColumnLifetimeAnalyzer column_lifetime(true);
|
147
178
|
column_lifetime.VisitOperator(*plan);
|
148
179
|
});
|
149
180
|
|
181
|
+
// compress data based on statistics for materializing operators
|
182
|
+
RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() {
|
183
|
+
CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map));
|
184
|
+
compressed_materialization.Compress(plan);
|
185
|
+
});
|
186
|
+
|
150
187
|
// transform ORDER BY + LIMIT to TopN
|
151
188
|
RunOptimizer(OptimizerType::TOP_N, [&]() {
|
152
189
|
TopN topn;
|
@@ -42,13 +42,13 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownCrossProduct(unique_ptr<Logi
|
|
42
42
|
vector<JoinCondition> conditions;
|
43
43
|
vector<unique_ptr<Expression>> arbitrary_expressions;
|
44
44
|
auto join_type = JoinType::INNER;
|
45
|
-
LogicalComparisonJoin::ExtractJoinConditions(join_type, op->children[0], op->children[1],
|
46
|
-
right_bindings, join_expressions, conditions,
|
45
|
+
LogicalComparisonJoin::ExtractJoinConditions(GetContext(), join_type, op->children[0], op->children[1],
|
46
|
+
left_bindings, right_bindings, join_expressions, conditions,
|
47
47
|
arbitrary_expressions);
|
48
48
|
// create the join from the join conditions
|
49
|
-
return LogicalComparisonJoin::CreateJoin(JoinType::INNER, JoinRefType::REGULAR,
|
50
|
-
std::move(op->children[
|
51
|
-
std::move(arbitrary_expressions));
|
49
|
+
return LogicalComparisonJoin::CreateJoin(GetContext(), JoinType::INNER, JoinRefType::REGULAR,
|
50
|
+
std::move(op->children[0]), std::move(op->children[1]),
|
51
|
+
std::move(conditions), std::move(arbitrary_expressions));
|
52
52
|
} else {
|
53
53
|
// no join conditions found: keep as cross product
|
54
54
|
return op;
|
@@ -0,0 +1,127 @@
|
|
1
|
+
#include "duckdb/optimizer/remove_duplicate_groups.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/pair.hpp"
|
4
|
+
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
|
5
|
+
#include "duckdb/planner/operator/logical_aggregate.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
void RemoveDuplicateGroups::VisitOperator(LogicalOperator &op) {
|
10
|
+
switch (op.type) {
|
11
|
+
case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY:
|
12
|
+
VisitAggregate(op.Cast<LogicalAggregate>());
|
13
|
+
break;
|
14
|
+
default:
|
15
|
+
break;
|
16
|
+
}
|
17
|
+
LogicalOperatorVisitor::VisitOperatorExpressions(op);
|
18
|
+
LogicalOperatorVisitor::VisitOperatorChildren(op);
|
19
|
+
}
|
20
|
+
|
21
|
+
void RemoveDuplicateGroups::VisitAggregate(LogicalAggregate &aggr) {
|
22
|
+
if (!aggr.grouping_functions.empty()) {
|
23
|
+
return;
|
24
|
+
}
|
25
|
+
|
26
|
+
auto &groups = aggr.groups;
|
27
|
+
|
28
|
+
column_binding_map_t<idx_t> duplicate_map;
|
29
|
+
vector<pair<idx_t, idx_t>> duplicates;
|
30
|
+
for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) {
|
31
|
+
const auto &group = groups[group_idx];
|
32
|
+
if (group->type != ExpressionType::BOUND_COLUMN_REF) {
|
33
|
+
continue;
|
34
|
+
}
|
35
|
+
const auto &colref = group->Cast<BoundColumnRefExpression>();
|
36
|
+
const auto &binding = colref.binding;
|
37
|
+
const auto it = duplicate_map.find(binding);
|
38
|
+
if (it == duplicate_map.end()) {
|
39
|
+
duplicate_map.emplace(binding, group_idx);
|
40
|
+
} else {
|
41
|
+
duplicates.emplace_back(it->second, group_idx);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
if (duplicates.empty()) {
|
46
|
+
return;
|
47
|
+
}
|
48
|
+
|
49
|
+
// Sort duplicates by max duplicate group idx, because we want to remove groups from the back
|
50
|
+
sort(duplicates.begin(), duplicates.end(),
|
51
|
+
[](const pair<idx_t, idx_t> &lhs, const pair<idx_t, idx_t> &rhs) { return lhs.second > rhs.second; });
|
52
|
+
|
53
|
+
// Now we want to remove the duplicates, but this alters the column bindings coming out of the aggregate,
|
54
|
+
// so we keep track of how they shift and do another round of column binding replacements
|
55
|
+
column_binding_map_t<ColumnBinding> group_binding_map;
|
56
|
+
for (idx_t group_idx = 0; group_idx < groups.size(); group_idx++) {
|
57
|
+
group_binding_map.emplace(ColumnBinding(aggr.group_index, group_idx),
|
58
|
+
ColumnBinding(aggr.group_index, group_idx));
|
59
|
+
}
|
60
|
+
|
61
|
+
for (idx_t duplicate_idx = 0; duplicate_idx < duplicates.size(); duplicate_idx++) {
|
62
|
+
const auto &duplicate = duplicates[duplicate_idx];
|
63
|
+
const auto &remaining_idx = duplicate.first;
|
64
|
+
const auto &removed_idx = duplicate.second;
|
65
|
+
|
66
|
+
// Store expression and remove it from groups
|
67
|
+
stored_expressions.emplace_back(std::move(groups[removed_idx]));
|
68
|
+
groups.erase(groups.begin() + removed_idx);
|
69
|
+
|
70
|
+
// This optimizer should run before statistics propagation, so this should be empty
|
71
|
+
// If it runs after, then group_stats should be updated too
|
72
|
+
D_ASSERT(aggr.group_stats.empty());
|
73
|
+
|
74
|
+
// Remove from grouping sets too
|
75
|
+
for (auto &grouping_set : aggr.grouping_sets) {
|
76
|
+
// Replace removed group with duplicate remaining group
|
77
|
+
if (grouping_set.erase(removed_idx) != 0) {
|
78
|
+
grouping_set.insert(remaining_idx);
|
79
|
+
}
|
80
|
+
|
81
|
+
// Indices shifted: Reinsert groups in the set with group_idx - 1
|
82
|
+
vector<idx_t> group_indices_to_reinsert;
|
83
|
+
for (auto &entry : grouping_set) {
|
84
|
+
if (entry > removed_idx) {
|
85
|
+
group_indices_to_reinsert.emplace_back(entry);
|
86
|
+
}
|
87
|
+
}
|
88
|
+
for (const auto group_idx : group_indices_to_reinsert) {
|
89
|
+
grouping_set.erase(group_idx);
|
90
|
+
}
|
91
|
+
for (const auto group_idx : group_indices_to_reinsert) {
|
92
|
+
grouping_set.insert(group_idx - 1);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
// Update mapping
|
97
|
+
auto it = group_binding_map.find(ColumnBinding(aggr.group_index, removed_idx));
|
98
|
+
D_ASSERT(it != group_binding_map.end());
|
99
|
+
it->second.column_index = remaining_idx;
|
100
|
+
|
101
|
+
for (auto &map_entry : group_binding_map) {
|
102
|
+
auto &new_binding = map_entry.second;
|
103
|
+
if (new_binding.column_index > removed_idx) {
|
104
|
+
new_binding.column_index--;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
// Replace all references to the old group binding with the new group binding
|
110
|
+
for (const auto &map_entry : group_binding_map) {
|
111
|
+
auto it = column_references.find(map_entry.first);
|
112
|
+
if (it != column_references.end()) {
|
113
|
+
for (auto expr : it->second) {
|
114
|
+
expr.get().binding = map_entry.second;
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
unique_ptr<Expression> RemoveDuplicateGroups::VisitReplace(BoundColumnRefExpression &expr,
|
121
|
+
unique_ptr<Expression> *expr_ptr) {
|
122
|
+
// add a column reference
|
123
|
+
column_references[expr.binding].push_back(expr);
|
124
|
+
return nullptr;
|
125
|
+
}
|
126
|
+
|
127
|
+
} // namespace duckdb
|
@@ -302,6 +302,10 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
|
|
302
302
|
everything_referenced = true;
|
303
303
|
break;
|
304
304
|
}
|
305
|
+
case LogicalOperatorType::LOGICAL_MATERIALIZED_CTE: {
|
306
|
+
everything_referenced = true;
|
307
|
+
break;
|
308
|
+
}
|
305
309
|
case LogicalOperatorType::LOGICAL_CTE_REF: {
|
306
310
|
everything_referenced = true;
|
307
311
|
break;
|