duckdb 0.8.2-dev150.0 → 0.8.2-dev1559.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types.cpp +8 -655
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +13 -22
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -0,0 +1,478 @@
|
|
1
|
+
#include "duckdb/optimizer/compressed_materialization.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/execution/expression_executor.hpp"
|
4
|
+
#include "duckdb/function/scalar/operators.hpp"
|
5
|
+
#include "duckdb/optimizer/column_binding_replacer.hpp"
|
6
|
+
#include "duckdb/optimizer/topn_optimizer.hpp"
|
7
|
+
#include "duckdb/planner/binder.hpp"
|
8
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
9
|
+
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
10
|
+
#include "duckdb/planner/expression_iterator.hpp"
|
11
|
+
#include "duckdb/planner/operator/logical_comparison_join.hpp"
|
12
|
+
#include "duckdb/planner/operator/logical_delim_join.hpp"
|
13
|
+
#include "duckdb/planner/operator/logical_projection.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
|
17
|
+
CMChildInfo::CMChildInfo(LogicalOperator &op, const column_binding_set_t &referenced_bindings)
|
18
|
+
: bindings_before(op.GetColumnBindings()), types(op.types), can_compress(bindings_before.size(), true) {
|
19
|
+
for (const auto &binding : referenced_bindings) {
|
20
|
+
for (idx_t binding_idx = 0; binding_idx < bindings_before.size(); binding_idx++) {
|
21
|
+
if (binding == bindings_before[binding_idx]) {
|
22
|
+
can_compress[binding_idx] = false;
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
CMBindingInfo::CMBindingInfo(ColumnBinding binding_p, const LogicalType &type_p)
|
29
|
+
: binding(binding_p), type(type_p), needs_decompression(false) {
|
30
|
+
}
|
31
|
+
|
32
|
+
CompressedMaterializationInfo::CompressedMaterializationInfo(LogicalOperator &op, vector<idx_t> &&child_idxs_p,
|
33
|
+
const column_binding_set_t &referenced_bindings)
|
34
|
+
: child_idxs(child_idxs_p) {
|
35
|
+
child_info.reserve(child_idxs.size());
|
36
|
+
for (const auto &child_idx : child_idxs) {
|
37
|
+
child_info.emplace_back(*op.children[child_idx], referenced_bindings);
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
CompressExpression::CompressExpression(unique_ptr<Expression> expression_p, unique_ptr<BaseStatistics> stats_p)
|
42
|
+
: expression(std::move(expression_p)), stats(std::move(stats_p)) {
|
43
|
+
}
|
44
|
+
|
45
|
+
CompressedMaterialization::CompressedMaterialization(ClientContext &context_p, Binder &binder_p,
|
46
|
+
statistics_map_t &&statistics_map_p)
|
47
|
+
: context(context_p), binder(binder_p), statistics_map(std::move(statistics_map_p)) {
|
48
|
+
}
|
49
|
+
|
50
|
+
void CompressedMaterialization::GetReferencedBindings(const Expression &expression,
|
51
|
+
column_binding_set_t &referenced_bindings) {
|
52
|
+
if (expression.GetExpressionType() == ExpressionType::BOUND_COLUMN_REF) {
|
53
|
+
const auto &col_ref = expression.Cast<BoundColumnRefExpression>();
|
54
|
+
referenced_bindings.insert(col_ref.binding);
|
55
|
+
} else {
|
56
|
+
ExpressionIterator::EnumerateChildren(
|
57
|
+
expression, [&](const Expression &child) { GetReferencedBindings(child, referenced_bindings); });
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
void CompressedMaterialization::UpdateBindingInfo(CompressedMaterializationInfo &info, const ColumnBinding &binding,
|
62
|
+
bool needs_decompression) {
|
63
|
+
auto &binding_map = info.binding_map;
|
64
|
+
auto binding_it = binding_map.find(binding);
|
65
|
+
if (binding_it == binding_map.end()) {
|
66
|
+
return;
|
67
|
+
}
|
68
|
+
|
69
|
+
auto &binding_info = binding_it->second;
|
70
|
+
binding_info.needs_decompression = needs_decompression;
|
71
|
+
auto stats_it = statistics_map.find(binding);
|
72
|
+
if (stats_it != statistics_map.end()) {
|
73
|
+
binding_info.stats = statistics_map[binding]->ToUnique();
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
void CompressedMaterialization::Compress(unique_ptr<LogicalOperator> &op) {
|
78
|
+
root = op.get();
|
79
|
+
root->ResolveOperatorTypes();
|
80
|
+
|
81
|
+
CompressInternal(op);
|
82
|
+
}
|
83
|
+
|
84
|
+
void CompressedMaterialization::CompressInternal(unique_ptr<LogicalOperator> &op) {
|
85
|
+
if (TopN::CanOptimize(*op)) { // Let's not mess with the TopN optimizer
|
86
|
+
CompressInternal(op->children[0]->children[0]);
|
87
|
+
return;
|
88
|
+
}
|
89
|
+
|
90
|
+
for (auto &child : op->children) {
|
91
|
+
CompressInternal(child);
|
92
|
+
}
|
93
|
+
|
94
|
+
switch (op->type) {
|
95
|
+
case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY:
|
96
|
+
CompressAggregate(op);
|
97
|
+
break;
|
98
|
+
case LogicalOperatorType::LOGICAL_DISTINCT:
|
99
|
+
CompressDistinct(op);
|
100
|
+
break;
|
101
|
+
case LogicalOperatorType::LOGICAL_ORDER_BY:
|
102
|
+
CompressOrder(op);
|
103
|
+
break;
|
104
|
+
default:
|
105
|
+
return;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
void CompressedMaterialization::CreateProjections(unique_ptr<LogicalOperator> &op,
|
110
|
+
CompressedMaterializationInfo &info) {
|
111
|
+
auto &materializing_op = *op;
|
112
|
+
|
113
|
+
bool compressed_anything = false;
|
114
|
+
for (idx_t i = 0; i < info.child_idxs.size(); i++) {
|
115
|
+
auto &child_info = info.child_info[i];
|
116
|
+
vector<unique_ptr<CompressExpression>> compress_exprs;
|
117
|
+
if (TryCompressChild(info, child_info, compress_exprs)) {
|
118
|
+
// We can compress: Create a projection on top of the child operator
|
119
|
+
const auto child_idx = info.child_idxs[i];
|
120
|
+
CreateCompressProjection(materializing_op.children[child_idx], std::move(compress_exprs), info, child_info);
|
121
|
+
compressed_anything = true;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
if (compressed_anything) {
|
126
|
+
CreateDecompressProjection(op, info);
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
bool CompressedMaterialization::TryCompressChild(CompressedMaterializationInfo &info, const CMChildInfo &child_info,
|
131
|
+
vector<unique_ptr<CompressExpression>> &compress_exprs) {
|
132
|
+
// Try to compress each of the column bindings of the child
|
133
|
+
bool compressed_anything = false;
|
134
|
+
for (idx_t child_i = 0; child_i < child_info.bindings_before.size(); child_i++) {
|
135
|
+
const auto child_binding = child_info.bindings_before[child_i];
|
136
|
+
const auto &child_type = child_info.types[child_i];
|
137
|
+
const auto &can_compress = child_info.can_compress[child_i];
|
138
|
+
auto compress_expr = GetCompressExpression(child_binding, child_type, can_compress);
|
139
|
+
bool compressed = false;
|
140
|
+
if (compress_expr) { // We compressed, mark the outgoing binding in need of decompression
|
141
|
+
compress_exprs.emplace_back(std::move(compress_expr));
|
142
|
+
compressed = true;
|
143
|
+
} else { // We did not compress, just push a colref
|
144
|
+
auto colref_expr = make_uniq<BoundColumnRefExpression>(child_type, child_binding);
|
145
|
+
auto it = statistics_map.find(colref_expr->binding);
|
146
|
+
unique_ptr<BaseStatistics> colref_stats = it != statistics_map.end() ? it->second->ToUnique() : nullptr;
|
147
|
+
compress_exprs.emplace_back(make_uniq<CompressExpression>(std::move(colref_expr), std::move(colref_stats)));
|
148
|
+
}
|
149
|
+
UpdateBindingInfo(info, child_binding, compressed);
|
150
|
+
compressed_anything = compressed_anything || compressed;
|
151
|
+
}
|
152
|
+
if (!compressed_anything) {
|
153
|
+
// If we compressed anything non-generically, we still need to decompress
|
154
|
+
for (const auto &entry : info.binding_map) {
|
155
|
+
compressed_anything = compressed_anything || entry.second.needs_decompression;
|
156
|
+
}
|
157
|
+
}
|
158
|
+
return compressed_anything;
|
159
|
+
}
|
160
|
+
|
161
|
+
void CompressedMaterialization::CreateCompressProjection(unique_ptr<LogicalOperator> &child_op,
|
162
|
+
vector<unique_ptr<CompressExpression>> &&compress_exprs,
|
163
|
+
CompressedMaterializationInfo &info, CMChildInfo &child_info) {
|
164
|
+
// Replace child op with a projection
|
165
|
+
vector<unique_ptr<Expression>> projections;
|
166
|
+
projections.reserve(compress_exprs.size());
|
167
|
+
for (auto &compress_expr : compress_exprs) {
|
168
|
+
projections.emplace_back(std::move(compress_expr->expression));
|
169
|
+
}
|
170
|
+
const auto table_index = binder.GenerateTableIndex();
|
171
|
+
auto compress_projection = make_uniq<LogicalProjection>(table_index, std::move(projections));
|
172
|
+
compression_table_indices.insert(table_index);
|
173
|
+
compress_projection->ResolveOperatorTypes();
|
174
|
+
|
175
|
+
compress_projection->children.emplace_back(std::move(child_op));
|
176
|
+
child_op = std::move(compress_projection);
|
177
|
+
|
178
|
+
// Get the new bindings and types
|
179
|
+
child_info.bindings_after = child_op->GetColumnBindings();
|
180
|
+
const auto &new_types = child_op->types;
|
181
|
+
|
182
|
+
// Initialize a ColumnBindingReplacer with the new bindings and types
|
183
|
+
ColumnBindingReplacer replacer;
|
184
|
+
auto &replacement_bindings = replacer.replacement_bindings;
|
185
|
+
for (idx_t col_idx = 0; col_idx < child_info.bindings_before.size(); col_idx++) {
|
186
|
+
const auto &old_binding = child_info.bindings_before[col_idx];
|
187
|
+
const auto &new_binding = child_info.bindings_after[col_idx];
|
188
|
+
const auto &new_type = new_types[col_idx];
|
189
|
+
replacement_bindings.emplace_back(old_binding, new_binding, new_type);
|
190
|
+
|
191
|
+
// Remove the old binding from the statistics map
|
192
|
+
statistics_map.erase(old_binding);
|
193
|
+
}
|
194
|
+
|
195
|
+
// Make sure we skip the compress operator when replacing bindings
|
196
|
+
replacer.stop_operator = child_op.get();
|
197
|
+
|
198
|
+
// Make the plan consistent again
|
199
|
+
replacer.VisitOperator(*root);
|
200
|
+
|
201
|
+
// Replace in/out exprs in the binding map too
|
202
|
+
auto &binding_map = info.binding_map;
|
203
|
+
for (auto &replacement_binding : replacement_bindings) {
|
204
|
+
auto it = binding_map.find(replacement_binding.old_binding);
|
205
|
+
if (it == binding_map.end()) {
|
206
|
+
continue;
|
207
|
+
}
|
208
|
+
auto &binding_info = it->second;
|
209
|
+
if (binding_info.binding == replacement_binding.old_binding) {
|
210
|
+
binding_info.binding = replacement_binding.new_binding;
|
211
|
+
}
|
212
|
+
|
213
|
+
if (it->first == replacement_binding.old_binding) {
|
214
|
+
auto binding_info_local = std::move(binding_info);
|
215
|
+
binding_map.erase(it);
|
216
|
+
binding_map.emplace(replacement_binding.new_binding, std::move(binding_info_local));
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
// Add projection stats to statistics map
|
221
|
+
for (idx_t col_idx = 0; col_idx < child_info.bindings_after.size(); col_idx++) {
|
222
|
+
const auto &binding = child_info.bindings_after[col_idx];
|
223
|
+
auto &stats = compress_exprs[col_idx]->stats;
|
224
|
+
statistics_map.emplace(binding, std::move(stats));
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
void CompressedMaterialization::CreateDecompressProjection(unique_ptr<LogicalOperator> &op,
|
229
|
+
CompressedMaterializationInfo &info) {
|
230
|
+
const auto bindings = op->GetColumnBindings();
|
231
|
+
op->ResolveOperatorTypes();
|
232
|
+
const auto &types = op->types;
|
233
|
+
|
234
|
+
// Create decompress expressions for everything we compressed
|
235
|
+
auto &binding_map = info.binding_map;
|
236
|
+
vector<unique_ptr<Expression>> decompress_exprs;
|
237
|
+
vector<optional_ptr<BaseStatistics>> statistics;
|
238
|
+
for (idx_t col_idx = 0; col_idx < bindings.size(); col_idx++) {
|
239
|
+
const auto &binding = bindings[col_idx];
|
240
|
+
auto decompress_expr = make_uniq_base<Expression, BoundColumnRefExpression>(types[col_idx], binding);
|
241
|
+
optional_ptr<BaseStatistics> stats;
|
242
|
+
for (auto &entry : binding_map) {
|
243
|
+
auto &binding_info = entry.second;
|
244
|
+
if (binding_info.binding != binding) {
|
245
|
+
continue;
|
246
|
+
}
|
247
|
+
stats = binding_info.stats.get();
|
248
|
+
if (binding_info.needs_decompression) {
|
249
|
+
decompress_expr = GetDecompressExpression(std::move(decompress_expr), binding_info.type, *stats);
|
250
|
+
}
|
251
|
+
}
|
252
|
+
statistics.push_back(stats);
|
253
|
+
decompress_exprs.emplace_back(std::move(decompress_expr));
|
254
|
+
}
|
255
|
+
|
256
|
+
// Replace op with a projection
|
257
|
+
const auto table_index = binder.GenerateTableIndex();
|
258
|
+
auto decompress_projection = make_uniq<LogicalProjection>(table_index, std::move(decompress_exprs));
|
259
|
+
decompression_table_indices.insert(table_index);
|
260
|
+
|
261
|
+
decompress_projection->children.emplace_back(std::move(op));
|
262
|
+
op = std::move(decompress_projection);
|
263
|
+
|
264
|
+
// Check if we're placing a projection on top of the root
|
265
|
+
if (op->children[0].get() == root.get()) {
|
266
|
+
root = op.get();
|
267
|
+
return;
|
268
|
+
}
|
269
|
+
|
270
|
+
// Get the new bindings and types
|
271
|
+
auto new_bindings = op->GetColumnBindings();
|
272
|
+
op->ResolveOperatorTypes();
|
273
|
+
auto &new_types = op->types;
|
274
|
+
|
275
|
+
// Initialize a ColumnBindingReplacer with the new bindings and types
|
276
|
+
ColumnBindingReplacer replacer;
|
277
|
+
auto &replacement_bindings = replacer.replacement_bindings;
|
278
|
+
for (idx_t col_idx = 0; col_idx < bindings.size(); col_idx++) {
|
279
|
+
const auto &old_binding = bindings[col_idx];
|
280
|
+
const auto &new_binding = new_bindings[col_idx];
|
281
|
+
const auto &new_type = new_types[col_idx];
|
282
|
+
replacement_bindings.emplace_back(old_binding, new_binding, new_type);
|
283
|
+
|
284
|
+
if (statistics[col_idx]) {
|
285
|
+
statistics_map[new_binding] = statistics[col_idx]->ToUnique();
|
286
|
+
}
|
287
|
+
}
|
288
|
+
|
289
|
+
// Make sure we skip the decompress operator when replacing bindings
|
290
|
+
replacer.stop_operator = op.get();
|
291
|
+
|
292
|
+
// Make the plan consistent again
|
293
|
+
replacer.VisitOperator(*root);
|
294
|
+
}
|
295
|
+
|
296
|
+
unique_ptr<CompressExpression> CompressedMaterialization::GetCompressExpression(const ColumnBinding &binding,
|
297
|
+
const LogicalType &type,
|
298
|
+
const bool &can_compress) {
|
299
|
+
auto it = statistics_map.find(binding);
|
300
|
+
if (can_compress && it != statistics_map.end() && it->second) {
|
301
|
+
auto input = make_uniq<BoundColumnRefExpression>(type, binding);
|
302
|
+
const auto &stats = *it->second;
|
303
|
+
return GetCompressExpression(std::move(input), stats);
|
304
|
+
}
|
305
|
+
return nullptr;
|
306
|
+
}
|
307
|
+
|
308
|
+
unique_ptr<CompressExpression> CompressedMaterialization::GetCompressExpression(unique_ptr<Expression> input,
|
309
|
+
const BaseStatistics &stats) {
|
310
|
+
const auto &type = input->return_type;
|
311
|
+
if (type != stats.GetType()) { // LCOV_EXCL_START
|
312
|
+
return nullptr;
|
313
|
+
} // LCOV_EXCL_STOP
|
314
|
+
if (type.IsIntegral()) {
|
315
|
+
return GetIntegralCompress(std::move(input), stats);
|
316
|
+
} else if (type.id() == LogicalTypeId::VARCHAR) {
|
317
|
+
return GetStringCompress(std::move(input), stats);
|
318
|
+
}
|
319
|
+
return nullptr;
|
320
|
+
}
|
321
|
+
|
322
|
+
static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &type, const BaseStatistics &stats) {
|
323
|
+
auto min = NumericStats::Min(stats);
|
324
|
+
auto max = NumericStats::Max(stats);
|
325
|
+
|
326
|
+
vector<unique_ptr<Expression>> arguments;
|
327
|
+
arguments.emplace_back(make_uniq<BoundConstantExpression>(max));
|
328
|
+
arguments.emplace_back(make_uniq<BoundConstantExpression>(min));
|
329
|
+
BoundFunctionExpression sub(type, SubtractFun::GetFunction(type, type), std::move(arguments), nullptr);
|
330
|
+
|
331
|
+
Value result;
|
332
|
+
if (ExpressionExecutor::TryEvaluateScalar(context, sub, result)) {
|
333
|
+
return result;
|
334
|
+
} else {
|
335
|
+
// Couldn't evaluate: Return max hugeint as range so GetIntegralCompress will return nullptr
|
336
|
+
return Value::HUGEINT(NumericLimits<hugeint_t>::Maximum());
|
337
|
+
}
|
338
|
+
}
|
339
|
+
|
340
|
+
unique_ptr<CompressExpression> CompressedMaterialization::GetIntegralCompress(unique_ptr<Expression> input,
|
341
|
+
const BaseStatistics &stats) {
|
342
|
+
const auto &type = input->return_type;
|
343
|
+
if (GetTypeIdSize(type.InternalType()) == 1 || !NumericStats::HasMinMax(stats)) {
|
344
|
+
return nullptr;
|
345
|
+
}
|
346
|
+
|
347
|
+
// Get range and cast to UBIGINT (might fail for HUGEINT, in which case we just return)
|
348
|
+
Value range_value = GetIntegralRangeValue(context, type, stats);
|
349
|
+
if (!range_value.DefaultTryCastAs(LogicalType::UBIGINT)) {
|
350
|
+
return nullptr;
|
351
|
+
}
|
352
|
+
|
353
|
+
// Get the smallest type that the range can fit into
|
354
|
+
const auto range = UBigIntValue::Get(range_value);
|
355
|
+
LogicalType cast_type;
|
356
|
+
if (range <= NumericLimits<uint8_t>().Maximum()) {
|
357
|
+
cast_type = LogicalType::UTINYINT;
|
358
|
+
} else if (range <= NumericLimits<uint16_t>().Maximum()) {
|
359
|
+
cast_type = LogicalType::USMALLINT;
|
360
|
+
} else if (range <= NumericLimits<uint32_t>().Maximum()) {
|
361
|
+
cast_type = LogicalType::UINTEGER;
|
362
|
+
} else {
|
363
|
+
D_ASSERT(range <= NumericLimits<uint64_t>().Maximum());
|
364
|
+
cast_type = LogicalType::UBIGINT;
|
365
|
+
}
|
366
|
+
|
367
|
+
// Check if type that fits the range is smaller than the input type
|
368
|
+
if (GetTypeIdSize(cast_type.InternalType()) == GetTypeIdSize(type.InternalType())) {
|
369
|
+
return nullptr;
|
370
|
+
}
|
371
|
+
D_ASSERT(GetTypeIdSize(cast_type.InternalType()) < GetTypeIdSize(type.InternalType()));
|
372
|
+
|
373
|
+
// Compressing will yield a benefit
|
374
|
+
auto compress_function = CMIntegralCompressFun::GetFunction(type, cast_type);
|
375
|
+
vector<unique_ptr<Expression>> arguments;
|
376
|
+
arguments.emplace_back(std::move(input));
|
377
|
+
arguments.emplace_back(make_uniq<BoundConstantExpression>(NumericStats::Min(stats)));
|
378
|
+
auto compress_expr =
|
379
|
+
make_uniq<BoundFunctionExpression>(cast_type, compress_function, std::move(arguments), nullptr);
|
380
|
+
|
381
|
+
auto compress_stats = BaseStatistics::CreateEmpty(cast_type);
|
382
|
+
compress_stats.CopyBase(stats);
|
383
|
+
NumericStats::SetMin(compress_stats, Value(0).DefaultCastAs(cast_type));
|
384
|
+
NumericStats::SetMax(compress_stats, range_value.DefaultCastAs(cast_type));
|
385
|
+
|
386
|
+
return make_uniq<CompressExpression>(std::move(compress_expr), compress_stats.ToUnique());
|
387
|
+
}
|
388
|
+
|
389
|
+
unique_ptr<CompressExpression> CompressedMaterialization::GetStringCompress(unique_ptr<Expression> input,
|
390
|
+
const BaseStatistics &stats) {
|
391
|
+
if (!StringStats::HasMaxStringLength(stats)) {
|
392
|
+
return nullptr;
|
393
|
+
}
|
394
|
+
|
395
|
+
const auto max_string_length = StringStats::MaxStringLength(stats);
|
396
|
+
LogicalType cast_type = LogicalType::INVALID;
|
397
|
+
for (const auto &compressed_type : CompressedMaterializationFunctions::StringTypes()) {
|
398
|
+
if (max_string_length < GetTypeIdSize(compressed_type.InternalType())) {
|
399
|
+
cast_type = compressed_type;
|
400
|
+
break;
|
401
|
+
}
|
402
|
+
}
|
403
|
+
if (cast_type == LogicalType::INVALID) {
|
404
|
+
return nullptr;
|
405
|
+
}
|
406
|
+
|
407
|
+
auto compress_stats = BaseStatistics::CreateEmpty(cast_type);
|
408
|
+
compress_stats.CopyBase(stats);
|
409
|
+
if (cast_type.id() == LogicalTypeId::USMALLINT) {
|
410
|
+
auto min_string = StringStats::Min(stats);
|
411
|
+
auto max_string = StringStats::Max(stats);
|
412
|
+
|
413
|
+
uint8_t min_numeric = 0;
|
414
|
+
if (max_string_length != 0 && min_string.length() != 0) {
|
415
|
+
min_numeric = *reinterpret_cast<const uint8_t *>(min_string.c_str());
|
416
|
+
}
|
417
|
+
uint8_t max_numeric = 0;
|
418
|
+
if (max_string_length != 0 && max_string.length() != 0) {
|
419
|
+
max_numeric = *reinterpret_cast<const uint8_t *>(max_string.c_str());
|
420
|
+
}
|
421
|
+
|
422
|
+
Value min_val = Value::USMALLINT(min_numeric);
|
423
|
+
Value max_val = Value::USMALLINT(max_numeric + 1);
|
424
|
+
if (max_numeric < NumericLimits<uint8_t>::Maximum()) {
|
425
|
+
cast_type = LogicalType::UTINYINT;
|
426
|
+
compress_stats = BaseStatistics::CreateEmpty(cast_type);
|
427
|
+
compress_stats.CopyBase(stats);
|
428
|
+
min_val = Value::UTINYINT(min_numeric);
|
429
|
+
max_val = Value::UTINYINT(max_numeric + 1);
|
430
|
+
}
|
431
|
+
|
432
|
+
NumericStats::SetMin(compress_stats, min_val);
|
433
|
+
NumericStats::SetMax(compress_stats, max_val);
|
434
|
+
}
|
435
|
+
|
436
|
+
auto compress_function = CMStringCompressFun::GetFunction(cast_type);
|
437
|
+
vector<unique_ptr<Expression>> arguments;
|
438
|
+
arguments.emplace_back(std::move(input));
|
439
|
+
auto compress_expr =
|
440
|
+
make_uniq<BoundFunctionExpression>(cast_type, compress_function, std::move(arguments), nullptr);
|
441
|
+
return make_uniq<CompressExpression>(std::move(compress_expr), compress_stats.ToUnique());
|
442
|
+
}
|
443
|
+
|
444
|
+
unique_ptr<Expression> CompressedMaterialization::GetDecompressExpression(unique_ptr<Expression> input,
|
445
|
+
const LogicalType &result_type,
|
446
|
+
const BaseStatistics &stats) {
|
447
|
+
const auto &type = result_type;
|
448
|
+
if (TypeIsIntegral(type.InternalType())) {
|
449
|
+
return GetIntegralDecompress(std::move(input), result_type, stats);
|
450
|
+
} else if (type.id() == LogicalTypeId::VARCHAR) {
|
451
|
+
return GetStringDecompress(std::move(input), stats);
|
452
|
+
} else {
|
453
|
+
throw InternalException("Type other than integral/string marked for decompression!");
|
454
|
+
}
|
455
|
+
}
|
456
|
+
|
457
|
+
unique_ptr<Expression> CompressedMaterialization::GetIntegralDecompress(unique_ptr<Expression> input,
|
458
|
+
const LogicalType &result_type,
|
459
|
+
const BaseStatistics &stats) {
|
460
|
+
D_ASSERT(NumericStats::HasMinMax(stats));
|
461
|
+
auto decompress_function = CMIntegralDecompressFun::GetFunction(input->return_type, result_type);
|
462
|
+
vector<unique_ptr<Expression>> arguments;
|
463
|
+
arguments.emplace_back(std::move(input));
|
464
|
+
arguments.emplace_back(make_uniq<BoundConstantExpression>(NumericStats::Min(stats)));
|
465
|
+
return make_uniq<BoundFunctionExpression>(result_type, decompress_function, std::move(arguments), nullptr);
|
466
|
+
}
|
467
|
+
|
468
|
+
unique_ptr<Expression> CompressedMaterialization::GetStringDecompress(unique_ptr<Expression> input,
|
469
|
+
const BaseStatistics &stats) {
|
470
|
+
D_ASSERT(StringStats::HasMaxStringLength(stats));
|
471
|
+
auto decompress_function = CMStringDecompressFun::GetFunction(input->return_type);
|
472
|
+
vector<unique_ptr<Expression>> arguments;
|
473
|
+
arguments.emplace_back(std::move(input));
|
474
|
+
return make_uniq<BoundFunctionExpression>(decompress_function.return_type, decompress_function,
|
475
|
+
std::move(arguments), nullptr);
|
476
|
+
}
|
477
|
+
|
478
|
+
} // namespace duckdb
|