duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +42 -5
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +3 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +64 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -0,0 +1,212 @@
|
|
1
|
+
#include "duckdb/function/function_set.hpp"
|
2
|
+
#include "duckdb/function/scalar/compressed_materialization_functions.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
static string IntegralCompressFunctionName(const LogicalType &result_type) {
|
7
|
+
return StringUtil::Format("__internal_compress_integral_%s",
|
8
|
+
StringUtil::Lower(LogicalTypeIdToString(result_type.id())));
|
9
|
+
}
|
10
|
+
|
11
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
12
|
+
struct TemplatedIntegralCompress {
|
13
|
+
static inline RESULT_TYPE Operation(const INPUT_TYPE &input, const INPUT_TYPE &min_val) {
|
14
|
+
D_ASSERT(min_val <= input);
|
15
|
+
return input - min_val;
|
16
|
+
}
|
17
|
+
};
|
18
|
+
|
19
|
+
template <class RESULT_TYPE>
|
20
|
+
struct TemplatedIntegralCompress<hugeint_t, RESULT_TYPE> {
|
21
|
+
static inline RESULT_TYPE Operation(const hugeint_t &input, const hugeint_t &min_val) {
|
22
|
+
D_ASSERT(min_val <= input);
|
23
|
+
return (input - min_val).lower;
|
24
|
+
}
|
25
|
+
};
|
26
|
+
|
27
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
28
|
+
static void IntegralCompressFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
29
|
+
D_ASSERT(args.ColumnCount() == 2);
|
30
|
+
D_ASSERT(args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR);
|
31
|
+
const auto min_val = ConstantVector::GetData<INPUT_TYPE>(args.data[1])[0];
|
32
|
+
UnaryExecutor::Execute<INPUT_TYPE, RESULT_TYPE>(args.data[0], result, args.size(), [&](const INPUT_TYPE &input) {
|
33
|
+
return TemplatedIntegralCompress<INPUT_TYPE, RESULT_TYPE>::Operation(input, min_val);
|
34
|
+
});
|
35
|
+
}
|
36
|
+
|
37
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
38
|
+
static scalar_function_t GetIntegralCompressFunction(const LogicalType &input_type, const LogicalType &result_type) {
|
39
|
+
return IntegralCompressFunction<INPUT_TYPE, RESULT_TYPE>;
|
40
|
+
}
|
41
|
+
|
42
|
+
template <class INPUT_TYPE>
|
43
|
+
static scalar_function_t GetIntegralCompressFunctionResultSwitch(const LogicalType &input_type,
|
44
|
+
const LogicalType &result_type) {
|
45
|
+
switch (result_type.id()) {
|
46
|
+
case LogicalTypeId::UTINYINT:
|
47
|
+
return GetIntegralCompressFunction<INPUT_TYPE, uint8_t>(input_type, result_type);
|
48
|
+
case LogicalTypeId::USMALLINT:
|
49
|
+
return GetIntegralCompressFunction<INPUT_TYPE, uint16_t>(input_type, result_type);
|
50
|
+
case LogicalTypeId::UINTEGER:
|
51
|
+
return GetIntegralCompressFunction<INPUT_TYPE, uint32_t>(input_type, result_type);
|
52
|
+
case LogicalTypeId::UBIGINT:
|
53
|
+
return GetIntegralCompressFunction<INPUT_TYPE, uint64_t>(input_type, result_type);
|
54
|
+
default:
|
55
|
+
throw InternalException("Unexpected result type in GetIntegralCompressFunctionResultSwitch");
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
static scalar_function_t GetIntegralCompressFunctionInputSwitch(const LogicalType &input_type,
|
60
|
+
const LogicalType &result_type) {
|
61
|
+
switch (input_type.id()) {
|
62
|
+
case LogicalTypeId::SMALLINT:
|
63
|
+
return GetIntegralCompressFunctionResultSwitch<int16_t>(input_type, result_type);
|
64
|
+
case LogicalTypeId::INTEGER:
|
65
|
+
return GetIntegralCompressFunctionResultSwitch<int32_t>(input_type, result_type);
|
66
|
+
case LogicalTypeId::BIGINT:
|
67
|
+
return GetIntegralCompressFunctionResultSwitch<int64_t>(input_type, result_type);
|
68
|
+
case LogicalTypeId::HUGEINT:
|
69
|
+
return GetIntegralCompressFunctionResultSwitch<hugeint_t>(input_type, result_type);
|
70
|
+
case LogicalTypeId::USMALLINT:
|
71
|
+
return GetIntegralCompressFunctionResultSwitch<uint16_t>(input_type, result_type);
|
72
|
+
case LogicalTypeId::UINTEGER:
|
73
|
+
return GetIntegralCompressFunctionResultSwitch<uint32_t>(input_type, result_type);
|
74
|
+
case LogicalTypeId::UBIGINT:
|
75
|
+
return GetIntegralCompressFunctionResultSwitch<uint64_t>(input_type, result_type);
|
76
|
+
default:
|
77
|
+
throw InternalException("Unexpected input type in GetIntegralCompressFunctionInputSwitch");
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
static string IntegralDecompressFunctionName(const LogicalType &result_type) {
|
82
|
+
return StringUtil::Format("__internal_decompress_integral_%s",
|
83
|
+
StringUtil::Lower(LogicalTypeIdToString(result_type.id())));
|
84
|
+
}
|
85
|
+
|
86
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
87
|
+
static inline RESULT_TYPE TemplatedIntegralDecompress(const INPUT_TYPE &input, const RESULT_TYPE &min_val) {
|
88
|
+
return min_val + input;
|
89
|
+
}
|
90
|
+
|
91
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
92
|
+
static void IntegralDecompressFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
93
|
+
D_ASSERT(args.ColumnCount() == 2);
|
94
|
+
D_ASSERT(args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR);
|
95
|
+
D_ASSERT(args.data[1].GetType() == result.GetType());
|
96
|
+
const auto min_val = ConstantVector::GetData<RESULT_TYPE>(args.data[1])[0];
|
97
|
+
UnaryExecutor::Execute<INPUT_TYPE, RESULT_TYPE>(args.data[0], result, args.size(), [&](const INPUT_TYPE &input) {
|
98
|
+
return TemplatedIntegralDecompress<INPUT_TYPE, RESULT_TYPE>(input, min_val);
|
99
|
+
});
|
100
|
+
}
|
101
|
+
|
102
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
103
|
+
static scalar_function_t GetIntegralDecompressFunction(const LogicalType &input_type, const LogicalType &result_type) {
|
104
|
+
return IntegralDecompressFunction<INPUT_TYPE, RESULT_TYPE>;
|
105
|
+
}
|
106
|
+
|
107
|
+
template <class INPUT_TYPE>
|
108
|
+
static scalar_function_t GetIntegralDecompressFunctionResultSwitch(const LogicalType &input_type,
|
109
|
+
const LogicalType &result_type) {
|
110
|
+
switch (result_type.id()) {
|
111
|
+
case LogicalTypeId::SMALLINT:
|
112
|
+
return GetIntegralDecompressFunction<INPUT_TYPE, int16_t>(input_type, result_type);
|
113
|
+
case LogicalTypeId::INTEGER:
|
114
|
+
return GetIntegralDecompressFunction<INPUT_TYPE, int32_t>(input_type, result_type);
|
115
|
+
case LogicalTypeId::BIGINT:
|
116
|
+
return GetIntegralDecompressFunction<INPUT_TYPE, int64_t>(input_type, result_type);
|
117
|
+
case LogicalTypeId::HUGEINT:
|
118
|
+
return GetIntegralDecompressFunction<INPUT_TYPE, hugeint_t>(input_type, result_type);
|
119
|
+
case LogicalTypeId::USMALLINT:
|
120
|
+
return GetIntegralDecompressFunction<INPUT_TYPE, uint16_t>(input_type, result_type);
|
121
|
+
case LogicalTypeId::UINTEGER:
|
122
|
+
return GetIntegralDecompressFunction<INPUT_TYPE, uint32_t>(input_type, result_type);
|
123
|
+
case LogicalTypeId::UBIGINT:
|
124
|
+
return GetIntegralDecompressFunction<INPUT_TYPE, uint64_t>(input_type, result_type);
|
125
|
+
default:
|
126
|
+
throw InternalException("Unexpected input type in GetIntegralDecompressFunctionSetSwitch");
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
static scalar_function_t GetIntegralDecompressFunctionInputSwitch(const LogicalType &input_type,
|
131
|
+
const LogicalType &result_type) {
|
132
|
+
switch (input_type.id()) {
|
133
|
+
case LogicalTypeId::UTINYINT:
|
134
|
+
return GetIntegralDecompressFunctionResultSwitch<uint8_t>(input_type, result_type);
|
135
|
+
case LogicalTypeId::USMALLINT:
|
136
|
+
return GetIntegralDecompressFunctionResultSwitch<uint16_t>(input_type, result_type);
|
137
|
+
case LogicalTypeId::UINTEGER:
|
138
|
+
return GetIntegralDecompressFunctionResultSwitch<uint32_t>(input_type, result_type);
|
139
|
+
case LogicalTypeId::UBIGINT:
|
140
|
+
return GetIntegralDecompressFunctionResultSwitch<uint64_t>(input_type, result_type);
|
141
|
+
default:
|
142
|
+
throw InternalException("Unexpected result type in GetIntegralDecompressFunctionInputSwitch");
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
static void CMIntegralSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const ScalarFunction &function) {
|
147
|
+
writer.WriteRegularSerializableList(function.arguments);
|
148
|
+
writer.WriteSerializable(function.return_type);
|
149
|
+
}
|
150
|
+
|
151
|
+
template <scalar_function_t (*GET_FUNCTION)(const LogicalType &, const LogicalType &)>
|
152
|
+
unique_ptr<FunctionData> CMIntegralDeserialize(PlanDeserializationState &state, FieldReader &reader,
|
153
|
+
ScalarFunction &function) {
|
154
|
+
function.arguments = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
|
155
|
+
function.function =
|
156
|
+
GET_FUNCTION(function.arguments[0], reader.ReadRequiredSerializable<LogicalType, LogicalType>());
|
157
|
+
return nullptr;
|
158
|
+
}
|
159
|
+
|
160
|
+
ScalarFunction CMIntegralCompressFun::GetFunction(const LogicalType &input_type, const LogicalType &result_type) {
|
161
|
+
ScalarFunction result(IntegralCompressFunctionName(result_type), {input_type, input_type}, result_type,
|
162
|
+
GetIntegralCompressFunctionInputSwitch(input_type, result_type),
|
163
|
+
CompressedMaterializationFunctions::Bind);
|
164
|
+
result.serialize = CMIntegralSerialize;
|
165
|
+
result.deserialize = CMIntegralDeserialize<GetIntegralCompressFunctionInputSwitch>;
|
166
|
+
return result;
|
167
|
+
}
|
168
|
+
|
169
|
+
static ScalarFunctionSet GetIntegralCompressFunctionSet(const LogicalType &result_type) {
|
170
|
+
ScalarFunctionSet set(IntegralCompressFunctionName(result_type));
|
171
|
+
for (const auto &input_type : LogicalType::Integral()) {
|
172
|
+
if (GetTypeIdSize(result_type.InternalType()) < GetTypeIdSize(input_type.InternalType())) {
|
173
|
+
set.AddFunction(CMIntegralCompressFun::GetFunction(input_type, result_type));
|
174
|
+
}
|
175
|
+
}
|
176
|
+
return set;
|
177
|
+
}
|
178
|
+
|
179
|
+
void CMIntegralCompressFun::RegisterFunction(BuiltinFunctions &set) {
|
180
|
+
for (const auto &result_type : CompressedMaterializationFunctions::IntegralTypes()) {
|
181
|
+
set.AddFunction(GetIntegralCompressFunctionSet(result_type));
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
185
|
+
ScalarFunction CMIntegralDecompressFun::GetFunction(const LogicalType &input_type, const LogicalType &result_type) {
|
186
|
+
ScalarFunction result(IntegralDecompressFunctionName(result_type), {input_type, result_type}, result_type,
|
187
|
+
GetIntegralDecompressFunctionInputSwitch(input_type, result_type),
|
188
|
+
CompressedMaterializationFunctions::Bind);
|
189
|
+
result.serialize = CMIntegralSerialize;
|
190
|
+
result.deserialize = CMIntegralDeserialize<GetIntegralDecompressFunctionInputSwitch>;
|
191
|
+
return result;
|
192
|
+
}
|
193
|
+
|
194
|
+
static ScalarFunctionSet GetIntegralDecompressFunctionSet(const LogicalType &result_type) {
|
195
|
+
ScalarFunctionSet set(IntegralDecompressFunctionName(result_type));
|
196
|
+
for (const auto &input_type : CompressedMaterializationFunctions::IntegralTypes()) {
|
197
|
+
if (GetTypeIdSize(result_type.InternalType()) > GetTypeIdSize(input_type.InternalType())) {
|
198
|
+
set.AddFunction(CMIntegralDecompressFun::GetFunction(input_type, result_type));
|
199
|
+
}
|
200
|
+
}
|
201
|
+
return set;
|
202
|
+
}
|
203
|
+
|
204
|
+
void CMIntegralDecompressFun::RegisterFunction(BuiltinFunctions &set) {
|
205
|
+
for (const auto &result_type : LogicalType::Integral()) {
|
206
|
+
if (GetTypeIdSize(result_type.InternalType()) > 1) {
|
207
|
+
set.AddFunction(GetIntegralDecompressFunctionSet(result_type));
|
208
|
+
}
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
} // namespace duckdb
|
@@ -0,0 +1,249 @@
|
|
1
|
+
#include "duckdb/common/bswap.hpp"
|
2
|
+
#include "duckdb/function/scalar/compressed_materialization_functions.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
static string StringCompressFunctionName(const LogicalType &result_type) {
|
7
|
+
return StringUtil::Format("__internal_compress_string_%s",
|
8
|
+
StringUtil::Lower(LogicalTypeIdToString(result_type.id())));
|
9
|
+
}
|
10
|
+
|
11
|
+
template <idx_t LENGTH>
|
12
|
+
static inline void TemplatedReverseMemCpy(const data_ptr_t __restrict &dest, const const_data_ptr_t __restrict &src) {
|
13
|
+
for (idx_t i = 0; i < LENGTH; i++) {
|
14
|
+
dest[i] = src[LENGTH - 1 - i];
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
static inline void ReverseMemCpy(const data_ptr_t __restrict &dest, const const_data_ptr_t __restrict &src,
|
19
|
+
const idx_t &length) {
|
20
|
+
for (idx_t i = 0; i < length; i++) {
|
21
|
+
dest[i] = src[length - 1 - i];
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
template <class RESULT_TYPE>
|
26
|
+
static inline RESULT_TYPE StringCompressInternal(const string_t &input) {
|
27
|
+
RESULT_TYPE result;
|
28
|
+
const auto result_ptr = data_ptr_cast(&result);
|
29
|
+
if (sizeof(RESULT_TYPE) <= string_t::INLINE_LENGTH) {
|
30
|
+
TemplatedReverseMemCpy<sizeof(RESULT_TYPE)>(result_ptr, const_data_ptr_cast(input.GetPrefix()));
|
31
|
+
} else if (input.IsInlined()) {
|
32
|
+
static constexpr auto REMAINDER = sizeof(RESULT_TYPE) - string_t::INLINE_LENGTH;
|
33
|
+
TemplatedReverseMemCpy<string_t::INLINE_LENGTH>(result_ptr + REMAINDER, const_data_ptr_cast(input.GetPrefix()));
|
34
|
+
memset(result_ptr, '\0', REMAINDER);
|
35
|
+
} else {
|
36
|
+
const auto remainder = sizeof(RESULT_TYPE) - input.GetSize();
|
37
|
+
ReverseMemCpy(result_ptr + remainder, data_ptr_cast(input.GetPointer()), input.GetSize());
|
38
|
+
memset(result_ptr, '\0', remainder);
|
39
|
+
}
|
40
|
+
result_ptr[0] = input.GetSize();
|
41
|
+
return result;
|
42
|
+
}
|
43
|
+
|
44
|
+
template <class RESULT_TYPE>
|
45
|
+
static inline RESULT_TYPE StringCompress(const string_t &input) {
|
46
|
+
D_ASSERT(input.GetSize() < sizeof(RESULT_TYPE));
|
47
|
+
return StringCompressInternal<RESULT_TYPE>(input);
|
48
|
+
}
|
49
|
+
|
50
|
+
template <class RESULT_TYPE>
|
51
|
+
static inline RESULT_TYPE MiniStringCompress(const string_t &input) {
|
52
|
+
if (sizeof(RESULT_TYPE) <= string_t::INLINE_LENGTH) {
|
53
|
+
return input.GetSize() + *const_data_ptr_cast(input.GetPrefix());
|
54
|
+
} else if (input.GetSize() == 0) {
|
55
|
+
return 0;
|
56
|
+
} else {
|
57
|
+
return input.GetSize() + *const_data_ptr_cast(input.GetPointer());
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
template <>
|
62
|
+
inline uint8_t StringCompress(const string_t &input) {
|
63
|
+
D_ASSERT(input.GetSize() <= sizeof(uint8_t));
|
64
|
+
return MiniStringCompress<uint8_t>(input);
|
65
|
+
}
|
66
|
+
|
67
|
+
template <class RESULT_TYPE>
|
68
|
+
static void StringCompressFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
69
|
+
UnaryExecutor::Execute<string_t, RESULT_TYPE>(args.data[0], result, args.size(), StringCompress<RESULT_TYPE>);
|
70
|
+
}
|
71
|
+
|
72
|
+
template <class RESULT_TYPE>
|
73
|
+
static scalar_function_t GetStringCompressFunction(const LogicalType &result_type) {
|
74
|
+
return StringCompressFunction<RESULT_TYPE>;
|
75
|
+
}
|
76
|
+
|
77
|
+
static scalar_function_t GetStringCompressFunctionSwitch(const LogicalType &result_type) {
|
78
|
+
switch (result_type.id()) {
|
79
|
+
case LogicalTypeId::UTINYINT:
|
80
|
+
return GetStringCompressFunction<uint8_t>(result_type);
|
81
|
+
case LogicalTypeId::USMALLINT:
|
82
|
+
return GetStringCompressFunction<uint16_t>(result_type);
|
83
|
+
case LogicalTypeId::UINTEGER:
|
84
|
+
return GetStringCompressFunction<uint32_t>(result_type);
|
85
|
+
case LogicalTypeId::UBIGINT:
|
86
|
+
return GetStringCompressFunction<uint64_t>(result_type);
|
87
|
+
case LogicalTypeId::HUGEINT:
|
88
|
+
return GetStringCompressFunction<hugeint_t>(result_type);
|
89
|
+
default:
|
90
|
+
throw InternalException("Unexpected type in GetStringCompressFunctionSwitch");
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
static string StringDecompressFunctionName() {
|
95
|
+
return "__internal_decompress_string";
|
96
|
+
}
|
97
|
+
|
98
|
+
struct StringDecompressLocalState : public FunctionLocalState {
|
99
|
+
public:
|
100
|
+
explicit StringDecompressLocalState(ClientContext &context) : allocator(Allocator::Get(context)) {
|
101
|
+
}
|
102
|
+
|
103
|
+
static unique_ptr<FunctionLocalState> Init(ExpressionState &state, const BoundFunctionExpression &expr,
|
104
|
+
FunctionData *bind_data) {
|
105
|
+
return make_uniq<StringDecompressLocalState>(state.GetContext());
|
106
|
+
}
|
107
|
+
|
108
|
+
public:
|
109
|
+
ArenaAllocator allocator;
|
110
|
+
};
|
111
|
+
|
112
|
+
template <class INPUT_TYPE>
|
113
|
+
static inline string_t StringDecompress(const INPUT_TYPE &input, ArenaAllocator &allocator) {
|
114
|
+
const auto input_ptr = const_data_ptr_cast(&input);
|
115
|
+
string_t result(input_ptr[0]);
|
116
|
+
if (sizeof(INPUT_TYPE) <= string_t::INLINE_LENGTH) {
|
117
|
+
const auto result_ptr = data_ptr_cast(result.GetPrefixWriteable());
|
118
|
+
TemplatedReverseMemCpy<sizeof(INPUT_TYPE)>(result_ptr, input_ptr);
|
119
|
+
memset(result_ptr + sizeof(INPUT_TYPE) - 1, '\0', string_t::INLINE_LENGTH - sizeof(INPUT_TYPE) + 1);
|
120
|
+
} else if (result.GetSize() <= string_t::INLINE_LENGTH) {
|
121
|
+
static constexpr auto REMAINDER = sizeof(INPUT_TYPE) - string_t::INLINE_LENGTH;
|
122
|
+
const auto result_ptr = data_ptr_cast(result.GetPrefixWriteable());
|
123
|
+
TemplatedReverseMemCpy<string_t::INLINE_LENGTH>(result_ptr, input_ptr + REMAINDER);
|
124
|
+
} else {
|
125
|
+
result.SetPointer(char_ptr_cast(allocator.Allocate(sizeof(INPUT_TYPE))));
|
126
|
+
TemplatedReverseMemCpy<sizeof(INPUT_TYPE)>(data_ptr_cast(result.GetPointer()), input_ptr);
|
127
|
+
memcpy(result.GetPrefixWriteable(), result.GetPointer(), string_t::PREFIX_LENGTH);
|
128
|
+
}
|
129
|
+
return result;
|
130
|
+
}
|
131
|
+
|
132
|
+
template <class INPUT_TYPE>
|
133
|
+
static inline string_t MiniStringDecompress(const INPUT_TYPE &input, ArenaAllocator &allocator) {
|
134
|
+
if (input == 0) {
|
135
|
+
string_t result(uint32_t(0));
|
136
|
+
memset(result.GetPrefixWriteable(), '\0', string_t::INLINE_BYTES);
|
137
|
+
return result;
|
138
|
+
}
|
139
|
+
|
140
|
+
string_t result(1);
|
141
|
+
if (sizeof(INPUT_TYPE) <= string_t::INLINE_LENGTH) {
|
142
|
+
memset(result.GetPrefixWriteable(), '\0', string_t::INLINE_BYTES);
|
143
|
+
*data_ptr_cast(result.GetPrefixWriteable()) = input - 1;
|
144
|
+
} else {
|
145
|
+
result.SetPointer(char_ptr_cast(allocator.Allocate(1)));
|
146
|
+
*data_ptr_cast(result.GetPointer()) = input - 1;
|
147
|
+
memset(result.GetPrefixWriteable(), '\0', string_t::PREFIX_LENGTH);
|
148
|
+
*result.GetPrefixWriteable() = *result.GetPointer();
|
149
|
+
}
|
150
|
+
return result;
|
151
|
+
}
|
152
|
+
|
153
|
+
template <>
|
154
|
+
inline string_t StringDecompress(const uint8_t &input, ArenaAllocator &allocator) {
|
155
|
+
return MiniStringDecompress<uint8_t>(input, allocator);
|
156
|
+
}
|
157
|
+
|
158
|
+
template <class INPUT_TYPE>
|
159
|
+
static void StringDecompressFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
160
|
+
auto &allocator = ExecuteFunctionState::GetFunctionState(state)->Cast<StringDecompressLocalState>().allocator;
|
161
|
+
allocator.Reset();
|
162
|
+
UnaryExecutor::Execute<INPUT_TYPE, string_t>(args.data[0], result, args.size(), [&](const INPUT_TYPE &input) {
|
163
|
+
return StringDecompress<INPUT_TYPE>(input, allocator);
|
164
|
+
});
|
165
|
+
}
|
166
|
+
|
167
|
+
template <class INPUT_TYPE>
|
168
|
+
static scalar_function_t GetStringDecompressFunction(const LogicalType &input_type) {
|
169
|
+
return StringDecompressFunction<INPUT_TYPE>;
|
170
|
+
}
|
171
|
+
|
172
|
+
static scalar_function_t GetStringDecompressFunctionSwitch(const LogicalType &input_type) {
|
173
|
+
switch (input_type.id()) {
|
174
|
+
case LogicalTypeId::UTINYINT:
|
175
|
+
return GetStringDecompressFunction<uint8_t>(input_type);
|
176
|
+
case LogicalTypeId::USMALLINT:
|
177
|
+
return GetStringDecompressFunction<uint16_t>(input_type);
|
178
|
+
case LogicalTypeId::UINTEGER:
|
179
|
+
return GetStringDecompressFunction<uint32_t>(input_type);
|
180
|
+
case LogicalTypeId::UBIGINT:
|
181
|
+
return GetStringDecompressFunction<uint64_t>(input_type);
|
182
|
+
case LogicalTypeId::HUGEINT:
|
183
|
+
return GetStringDecompressFunction<hugeint_t>(input_type);
|
184
|
+
default:
|
185
|
+
throw InternalException("Unexpected type in GetStringDecompressFunctionSwitch");
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
static void CMStringCompressSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
|
190
|
+
const ScalarFunction &function) {
|
191
|
+
writer.WriteRegularSerializableList(function.arguments);
|
192
|
+
writer.WriteSerializable(function.return_type);
|
193
|
+
}
|
194
|
+
|
195
|
+
unique_ptr<FunctionData> CMStringCompressDeserialize(PlanDeserializationState &state, FieldReader &reader,
|
196
|
+
ScalarFunction &function) {
|
197
|
+
function.arguments = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
|
198
|
+
function.function = GetStringCompressFunctionSwitch(reader.ReadRequiredSerializable<LogicalType, LogicalType>());
|
199
|
+
return nullptr;
|
200
|
+
}
|
201
|
+
|
202
|
+
ScalarFunction CMStringCompressFun::GetFunction(const LogicalType &result_type) {
|
203
|
+
ScalarFunction result(StringCompressFunctionName(result_type), {LogicalType::VARCHAR}, result_type,
|
204
|
+
GetStringCompressFunctionSwitch(result_type), CompressedMaterializationFunctions::Bind);
|
205
|
+
result.serialize = CMStringCompressSerialize;
|
206
|
+
result.deserialize = CMStringCompressDeserialize;
|
207
|
+
return result;
|
208
|
+
}
|
209
|
+
|
210
|
+
void CMStringCompressFun::RegisterFunction(BuiltinFunctions &set) {
|
211
|
+
for (const auto &result_type : CompressedMaterializationFunctions::StringTypes()) {
|
212
|
+
set.AddFunction(CMStringCompressFun::GetFunction(result_type));
|
213
|
+
}
|
214
|
+
}
|
215
|
+
|
216
|
+
static void CMStringDecompressSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
|
217
|
+
const ScalarFunction &function) {
|
218
|
+
writer.WriteRegularSerializableList(function.arguments);
|
219
|
+
}
|
220
|
+
|
221
|
+
unique_ptr<FunctionData> CMStringDecompressDeserialize(PlanDeserializationState &state, FieldReader &reader,
|
222
|
+
ScalarFunction &function) {
|
223
|
+
function.arguments = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
|
224
|
+
function.function = GetStringDecompressFunctionSwitch(function.arguments[0]);
|
225
|
+
return nullptr;
|
226
|
+
}
|
227
|
+
|
228
|
+
ScalarFunction CMStringDecompressFun::GetFunction(const LogicalType &input_type) {
|
229
|
+
ScalarFunction result(StringDecompressFunctionName(), {input_type}, LogicalType::VARCHAR,
|
230
|
+
GetStringDecompressFunctionSwitch(input_type), CompressedMaterializationFunctions::Bind,
|
231
|
+
nullptr, nullptr, StringDecompressLocalState::Init);
|
232
|
+
result.serialize = CMStringDecompressSerialize;
|
233
|
+
result.deserialize = CMStringDecompressDeserialize;
|
234
|
+
return result;
|
235
|
+
}
|
236
|
+
|
237
|
+
static ScalarFunctionSet GetStringDecompressFunctionSet() {
|
238
|
+
ScalarFunctionSet set(StringDecompressFunctionName());
|
239
|
+
for (const auto &input_type : CompressedMaterializationFunctions::StringTypes()) {
|
240
|
+
set.AddFunction(CMStringDecompressFun::GetFunction(input_type));
|
241
|
+
}
|
242
|
+
return set;
|
243
|
+
}
|
244
|
+
|
245
|
+
void CMStringDecompressFun::RegisterFunction(BuiltinFunctions &set) {
|
246
|
+
set.AddFunction(GetStringDecompressFunctionSet());
|
247
|
+
}
|
248
|
+
|
249
|
+
} // namespace duckdb
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#include "duckdb/function/scalar/compressed_materialization_functions.hpp"
|
2
|
+
|
3
|
+
namespace duckdb {
|
4
|
+
|
5
|
+
const vector<LogicalType> CompressedMaterializationFunctions::IntegralTypes() {
|
6
|
+
return {LogicalType::UTINYINT, LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT};
|
7
|
+
}
|
8
|
+
|
9
|
+
const vector<LogicalType> CompressedMaterializationFunctions::StringTypes() {
|
10
|
+
return {LogicalType::UTINYINT, LogicalType::USMALLINT, LogicalType::UINTEGER, LogicalType::UBIGINT,
|
11
|
+
LogicalType::HUGEINT};
|
12
|
+
}
|
13
|
+
|
14
|
+
// LCOV_EXCL_START
|
15
|
+
unique_ptr<FunctionData> CompressedMaterializationFunctions::Bind(ClientContext &context,
|
16
|
+
ScalarFunction &bound_function,
|
17
|
+
vector<unique_ptr<Expression>> &arguments) {
|
18
|
+
throw BinderException("Compressed materialization functions are for internal use only!");
|
19
|
+
}
|
20
|
+
// LCOV_EXCL_STOP
|
21
|
+
|
22
|
+
void BuiltinFunctions::RegisterCompressedMaterializationFunctions() {
|
23
|
+
Register<CMIntegralCompressFun>();
|
24
|
+
Register<CMIntegralDecompressFun>();
|
25
|
+
Register<CMStringCompressFun>();
|
26
|
+
Register<CMStringDecompressFun>();
|
27
|
+
}
|
28
|
+
|
29
|
+
} // namespace duckdb
|
@@ -0,0 +1,162 @@
|
|
1
|
+
#include "duckdb/common/types/data_chunk.hpp"
|
2
|
+
#include "duckdb/function/scalar/nested_functions.hpp"
|
3
|
+
#include "duckdb/function/scalar_function.hpp"
|
4
|
+
#include "duckdb/function/built_in_functions.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
void ListResizeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
9
|
+
D_ASSERT(args.data[1].GetType().id() == LogicalTypeId::UBIGINT);
|
10
|
+
if (result.GetType().id() == LogicalTypeId::SQLNULL) {
|
11
|
+
FlatVector::SetNull(result, 0, true);
|
12
|
+
return;
|
13
|
+
}
|
14
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
|
15
|
+
auto count = args.size();
|
16
|
+
|
17
|
+
result.SetVectorType(VectorType::FLAT_VECTOR);
|
18
|
+
|
19
|
+
auto &lists = args.data[0];
|
20
|
+
auto &child = ListVector::GetEntry(args.data[0]);
|
21
|
+
auto &new_sizes = args.data[1];
|
22
|
+
|
23
|
+
UnifiedVectorFormat list_data;
|
24
|
+
lists.ToUnifiedFormat(count, list_data);
|
25
|
+
auto list_entries = UnifiedVectorFormat::GetData<list_entry_t>(list_data);
|
26
|
+
|
27
|
+
UnifiedVectorFormat new_size_data;
|
28
|
+
new_sizes.ToUnifiedFormat(count, new_size_data);
|
29
|
+
auto new_size_entries = UnifiedVectorFormat::GetData<int64_t>(new_size_data);
|
30
|
+
|
31
|
+
UnifiedVectorFormat child_data;
|
32
|
+
child.ToUnifiedFormat(count, child_data);
|
33
|
+
|
34
|
+
// Find the new size of the result child vector
|
35
|
+
idx_t new_child_size = 0;
|
36
|
+
for (idx_t i = 0; i < count; i++) {
|
37
|
+
auto index = new_size_data.sel->get_index(i);
|
38
|
+
if (new_size_data.validity.RowIsValid(index)) {
|
39
|
+
new_child_size += new_size_entries[index];
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
// Create the default vector if it exists
|
44
|
+
UnifiedVectorFormat default_data;
|
45
|
+
optional_ptr<Vector> default_vector;
|
46
|
+
if (args.ColumnCount() == 3) {
|
47
|
+
default_vector = &args.data[2];
|
48
|
+
default_vector->ToUnifiedFormat(count, default_data);
|
49
|
+
default_vector->SetVectorType(VectorType::CONSTANT_VECTOR);
|
50
|
+
}
|
51
|
+
|
52
|
+
ListVector::Reserve(result, new_child_size);
|
53
|
+
ListVector::SetListSize(result, new_child_size);
|
54
|
+
|
55
|
+
auto result_entries = FlatVector::GetData<list_entry_t>(result);
|
56
|
+
auto &result_child = ListVector::GetEntry(result);
|
57
|
+
|
58
|
+
// for each lists in the args
|
59
|
+
idx_t result_child_offset = 0;
|
60
|
+
for (idx_t args_index = 0; args_index < count; args_index++) {
|
61
|
+
auto l_index = list_data.sel->get_index(args_index);
|
62
|
+
auto new_index = new_size_data.sel->get_index(args_index);
|
63
|
+
|
64
|
+
// set null if lists is null
|
65
|
+
if (!list_data.validity.RowIsValid(l_index)) {
|
66
|
+
FlatVector::SetNull(result, args_index, true);
|
67
|
+
continue;
|
68
|
+
}
|
69
|
+
|
70
|
+
idx_t new_size_entry = 0;
|
71
|
+
if (new_size_data.validity.RowIsValid(new_index)) {
|
72
|
+
new_size_entry = new_size_entries[new_index];
|
73
|
+
}
|
74
|
+
|
75
|
+
// find the smallest size between lists and new_sizes
|
76
|
+
auto values_to_copy = MinValue<idx_t>(list_entries[l_index].length, new_size_entry);
|
77
|
+
|
78
|
+
// set the result entry
|
79
|
+
result_entries[args_index].offset = result_child_offset;
|
80
|
+
result_entries[args_index].length = new_size_entry;
|
81
|
+
|
82
|
+
// copy the values from the child vector
|
83
|
+
VectorOperations::Copy(child, result_child, list_entries[l_index].offset + values_to_copy,
|
84
|
+
list_entries[l_index].offset, result_child_offset);
|
85
|
+
result_child_offset += values_to_copy;
|
86
|
+
|
87
|
+
// set default value if it exists
|
88
|
+
idx_t def_index = 0;
|
89
|
+
if (args.ColumnCount() == 3) {
|
90
|
+
def_index = default_data.sel->get_index(args_index);
|
91
|
+
}
|
92
|
+
|
93
|
+
// if the new size is larger than the old size, fill in the default value
|
94
|
+
if (values_to_copy < new_size_entry) {
|
95
|
+
if (default_vector && default_data.validity.RowIsValid(def_index)) {
|
96
|
+
VectorOperations::Copy(*default_vector, result_child, new_size_entry - values_to_copy, def_index,
|
97
|
+
result_child_offset);
|
98
|
+
result_child_offset += new_size_entry - values_to_copy;
|
99
|
+
} else {
|
100
|
+
for (idx_t j = values_to_copy; j < new_size_entry; j++) {
|
101
|
+
FlatVector::SetNull(result_child, result_child_offset, true);
|
102
|
+
result_child_offset++;
|
103
|
+
}
|
104
|
+
}
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
if (args.AllConstant()) {
|
109
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
static unique_ptr<FunctionData> ListResizeBind(ClientContext &context, ScalarFunction &bound_function,
|
114
|
+
vector<unique_ptr<Expression>> &arguments) {
|
115
|
+
D_ASSERT(bound_function.arguments.size() == 2 || arguments.size() == 3);
|
116
|
+
bound_function.arguments[1] = LogicalType::UBIGINT;
|
117
|
+
|
118
|
+
// first argument is constant NULL
|
119
|
+
if (arguments[0]->return_type == LogicalType::SQLNULL) {
|
120
|
+
bound_function.arguments[0] = LogicalType::SQLNULL;
|
121
|
+
bound_function.return_type = LogicalType::SQLNULL;
|
122
|
+
return make_uniq<VariableReturnBindData>(bound_function.return_type);
|
123
|
+
}
|
124
|
+
|
125
|
+
// prepared statements
|
126
|
+
if (arguments[0]->return_type == LogicalType::UNKNOWN) {
|
127
|
+
bound_function.return_type = arguments[0]->return_type;
|
128
|
+
return make_uniq<VariableReturnBindData>(bound_function.return_type);
|
129
|
+
}
|
130
|
+
|
131
|
+
// default type does not match list type
|
132
|
+
if (bound_function.arguments.size() == 3 &&
|
133
|
+
ListType::GetChildType(arguments[0]->return_type) != arguments[2]->return_type &&
|
134
|
+
arguments[2]->return_type != LogicalTypeId::SQLNULL) {
|
135
|
+
bound_function.arguments[2] = ListType::GetChildType(arguments[0]->return_type);
|
136
|
+
}
|
137
|
+
|
138
|
+
bound_function.return_type = arguments[0]->return_type;
|
139
|
+
return make_uniq<VariableReturnBindData>(bound_function.return_type);
|
140
|
+
}
|
141
|
+
|
142
|
+
void ListResizeFun::RegisterFunction(BuiltinFunctions &set) {
|
143
|
+
ScalarFunction sfun({LogicalType::LIST(LogicalTypeId::ANY), LogicalTypeId::ANY},
|
144
|
+
LogicalType::LIST(LogicalTypeId::ANY), ListResizeFunction, ListResizeBind);
|
145
|
+
sfun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
146
|
+
|
147
|
+
ScalarFunction dfun({LogicalType::LIST(LogicalTypeId::ANY), LogicalTypeId::ANY, LogicalTypeId::ANY},
|
148
|
+
LogicalType::LIST(LogicalTypeId::ANY), ListResizeFunction, ListResizeBind);
|
149
|
+
dfun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
150
|
+
|
151
|
+
ScalarFunctionSet list_resize("list_resize");
|
152
|
+
list_resize.AddFunction(sfun);
|
153
|
+
list_resize.AddFunction(dfun);
|
154
|
+
set.AddFunction(list_resize);
|
155
|
+
|
156
|
+
ScalarFunctionSet array_resize("array_resize");
|
157
|
+
array_resize.AddFunction(sfun);
|
158
|
+
array_resize.AddFunction(dfun);
|
159
|
+
set.AddFunction(array_resize);
|
160
|
+
}
|
161
|
+
|
162
|
+
} // namespace duckdb
|