duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/value.cpp +11 -6
- package/src/duckdb/src/common/types.cpp +9 -656
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/arrow.cpp +19 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/columns.test.ts +24 -1
- package/test/test_all_types.test.ts +234 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,9 +1,11 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/allocator.hpp"
|
4
|
+
#include "duckdb/common/types/batched_data_collection.hpp"
|
5
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
2
6
|
#include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
|
3
7
|
#include "duckdb/parallel/base_pipeline_event.hpp"
|
4
|
-
|
5
|
-
#include "duckdb/common/types/batched_data_collection.hpp"
|
6
|
-
#include "duckdb/common/allocator.hpp"
|
8
|
+
|
7
9
|
#include <algorithm>
|
8
10
|
|
9
11
|
namespace duckdb {
|
@@ -67,7 +69,7 @@ public:
|
|
67
69
|
optional_idx batch_index;
|
68
70
|
|
69
71
|
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
70
|
-
collection = make_uniq<ColumnDataCollection>(
|
72
|
+
collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
|
71
73
|
collection->InitializeAppend(append_state);
|
72
74
|
}
|
73
75
|
};
|
@@ -116,7 +116,7 @@ public:
|
|
116
116
|
optional_idx batch_index;
|
117
117
|
|
118
118
|
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
119
|
-
collection = make_uniq<ColumnDataCollection>(
|
119
|
+
collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
|
120
120
|
collection->InitializeAppend(append_state);
|
121
121
|
}
|
122
122
|
};
|
@@ -353,7 +353,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
|
|
353
353
|
} else {
|
354
354
|
// the collection is too large for a batch - we need to repartition
|
355
355
|
// create an empty collection
|
356
|
-
current_collection = make_uniq<ColumnDataCollection>(
|
356
|
+
current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
|
357
357
|
}
|
358
358
|
if (current_collection) {
|
359
359
|
current_collection->InitializeAppend(append_state);
|
@@ -373,7 +373,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
|
|
373
373
|
}
|
374
374
|
// the collection is full - move it to the result and create a new one
|
375
375
|
gstate.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(current_collection)));
|
376
|
-
current_collection = make_uniq<ColumnDataCollection>(
|
376
|
+
current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
|
377
377
|
current_collection->InitializeAppend(append_state);
|
378
378
|
}
|
379
379
|
}
|
@@ -41,7 +41,7 @@ PhysicalInsert::PhysicalInsert(vector<LogicalType> types_p, TableCatalogEntry &t
|
|
41
41
|
return;
|
42
42
|
}
|
43
43
|
|
44
|
-
D_ASSERT(set_expressions.size() == set_columns.size());
|
44
|
+
D_ASSERT(this->set_expressions.size() == this->set_columns.size());
|
45
45
|
|
46
46
|
// One or more columns are referenced from the existing table,
|
47
47
|
// we use the 'insert_types' to figure out which types these columns have
|
@@ -16,6 +16,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
|
|
16
16
|
pivot_map[bound_pivot.pivot_values[p]] = bound_pivot.group_count + p;
|
17
17
|
}
|
18
18
|
// extract the empty aggregate expressions
|
19
|
+
ArenaAllocator allocator(Allocator::DefaultAllocator());
|
19
20
|
for (auto &aggr_expr : bound_pivot.aggregates) {
|
20
21
|
auto &aggr = aggr_expr->Cast<BoundAggregateExpression>();
|
21
22
|
// for each aggregate, initialize an empty aggregate state and finalize it immediately
|
@@ -23,7 +24,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
|
|
23
24
|
aggr.function.initialize(state.get());
|
24
25
|
Vector state_vector(Value::POINTER(CastPointerToValue(state.get())));
|
25
26
|
Vector result_vector(aggr_expr->return_type);
|
26
|
-
AggregateInputData aggr_input_data(aggr.bind_info.get(),
|
27
|
+
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
|
27
28
|
aggr.function.finalize(state_vector, aggr_input_data, result_vector, 1, 0);
|
28
29
|
empty_aggregates.push_back(result_vector.GetValue(0));
|
29
30
|
}
|
@@ -64,6 +64,9 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline ¤t, MetaPipeline &met
|
|
64
64
|
state.SetPipelineSource(current, delim_join.distinct->Cast<PhysicalOperator>());
|
65
65
|
return;
|
66
66
|
}
|
67
|
+
case PhysicalOperatorType::CTE_SCAN: {
|
68
|
+
break;
|
69
|
+
}
|
67
70
|
case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
|
68
71
|
if (!meta_pipeline.HasRecursiveCTE()) {
|
69
72
|
throw InternalException("Recursive CTE scan found without recursive CTE node");
|
@@ -76,4 +79,20 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline ¤t, MetaPipeline &met
|
|
76
79
|
state.SetPipelineSource(current, *this);
|
77
80
|
}
|
78
81
|
|
82
|
+
string PhysicalColumnDataScan::ParamsToString() const {
|
83
|
+
string result = "";
|
84
|
+
switch (type) {
|
85
|
+
case PhysicalOperatorType::CTE_SCAN:
|
86
|
+
case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
|
87
|
+
result += "\n[INFOSEPARATOR]\n";
|
88
|
+
result += StringUtil::Format("idx: %llu", cte_index);
|
89
|
+
break;
|
90
|
+
}
|
91
|
+
default:
|
92
|
+
break;
|
93
|
+
}
|
94
|
+
|
95
|
+
return result;
|
96
|
+
}
|
97
|
+
|
79
98
|
} // namespace duckdb
|
@@ -16,17 +16,18 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
|
|
16
16
|
: PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
|
17
17
|
function(std::move(function_p)), bind_data(std::move(bind_data_p)), column_ids(std::move(column_ids_p)),
|
18
18
|
names(std::move(names_p)), table_filters(std::move(table_filters_p)) {
|
19
|
+
extra_info.file_filters = "";
|
19
20
|
}
|
20
21
|
|
21
22
|
PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction function_p,
|
22
23
|
unique_ptr<FunctionData> bind_data_p, vector<LogicalType> returned_types_p,
|
23
24
|
vector<column_t> column_ids_p, vector<idx_t> projection_ids_p,
|
24
25
|
vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
|
25
|
-
idx_t estimated_cardinality)
|
26
|
+
idx_t estimated_cardinality, ExtraOperatorInfo extra_info)
|
26
27
|
: PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
|
27
28
|
function(std::move(function_p)), bind_data(std::move(bind_data_p)), returned_types(std::move(returned_types_p)),
|
28
29
|
column_ids(std::move(column_ids_p)), projection_ids(std::move(projection_ids_p)), names(std::move(names_p)),
|
29
|
-
table_filters(std::move(table_filters_p)) {
|
30
|
+
table_filters(std::move(table_filters_p)), extra_info(extra_info) {
|
30
31
|
}
|
31
32
|
|
32
33
|
class TableScanGlobalSourceState : public GlobalSourceState {
|
@@ -149,6 +150,10 @@ string PhysicalTableScan::ParamsToString() const {
|
|
149
150
|
}
|
150
151
|
}
|
151
152
|
}
|
153
|
+
if (!extra_info.file_filters.empty()) {
|
154
|
+
result += "\n[INFOSEPARATOR]\n";
|
155
|
+
result += "File Filters: " + extra_info.file_filters;
|
156
|
+
}
|
152
157
|
result += "\n[INFOSEPARATOR]\n";
|
153
158
|
result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
|
154
159
|
return result;
|
@@ -0,0 +1,160 @@
|
|
1
|
+
#include "duckdb/execution/operator/set/physical_cte.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
4
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
|
+
#include "duckdb/execution/aggregate_hashtable.hpp"
|
6
|
+
#include "duckdb/execution/executor.hpp"
|
7
|
+
#include "duckdb/parallel/event.hpp"
|
8
|
+
#include "duckdb/parallel/meta_pipeline.hpp"
|
9
|
+
#include "duckdb/parallel/pipeline.hpp"
|
10
|
+
#include "duckdb/parallel/task_scheduler.hpp"
|
11
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
PhysicalCTE::PhysicalCTE(string ctename, idx_t table_index, vector<LogicalType> types, unique_ptr<PhysicalOperator> top,
|
16
|
+
unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality)
|
17
|
+
: PhysicalOperator(PhysicalOperatorType::CTE, std::move(types), estimated_cardinality), table_index(table_index),
|
18
|
+
ctename(std::move(ctename)) {
|
19
|
+
children.push_back(std::move(top));
|
20
|
+
children.push_back(std::move(bottom));
|
21
|
+
}
|
22
|
+
|
23
|
+
PhysicalCTE::~PhysicalCTE() {
|
24
|
+
}
|
25
|
+
|
26
|
+
//===--------------------------------------------------------------------===//
|
27
|
+
// Sink
|
28
|
+
//===--------------------------------------------------------------------===//
|
29
|
+
class CTEState : public GlobalSinkState {
|
30
|
+
public:
|
31
|
+
explicit CTEState(ClientContext &context, const PhysicalCTE &op)
|
32
|
+
: intermediate_table(context, op.children[1]->GetTypes()) {
|
33
|
+
}
|
34
|
+
ColumnDataCollection intermediate_table;
|
35
|
+
ColumnDataScanState scan_state;
|
36
|
+
bool initialized = false;
|
37
|
+
bool finished_scan = false;
|
38
|
+
};
|
39
|
+
|
40
|
+
unique_ptr<GlobalSinkState> PhysicalCTE::GetGlobalSinkState(ClientContext &context) const {
|
41
|
+
working_table->Reset();
|
42
|
+
return make_uniq<CTEState>(context, *this);
|
43
|
+
}
|
44
|
+
|
45
|
+
SinkResultType PhysicalCTE::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
46
|
+
auto &gstate = input.global_state.Cast<CTEState>();
|
47
|
+
if (!gstate.finished_scan) {
|
48
|
+
working_table->Append(chunk);
|
49
|
+
} else {
|
50
|
+
gstate.intermediate_table.Append(chunk);
|
51
|
+
}
|
52
|
+
return SinkResultType::NEED_MORE_INPUT;
|
53
|
+
}
|
54
|
+
|
55
|
+
//===--------------------------------------------------------------------===//
|
56
|
+
// Source
|
57
|
+
//===--------------------------------------------------------------------===//
|
58
|
+
SourceResultType PhysicalCTE::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
|
59
|
+
auto &gstate = sink_state->Cast<CTEState>();
|
60
|
+
if (!gstate.initialized) {
|
61
|
+
gstate.intermediate_table.InitializeScan(gstate.scan_state);
|
62
|
+
gstate.finished_scan = false;
|
63
|
+
gstate.initialized = true;
|
64
|
+
}
|
65
|
+
if (!gstate.finished_scan) {
|
66
|
+
gstate.finished_scan = true;
|
67
|
+
ExecuteRecursivePipelines(context);
|
68
|
+
}
|
69
|
+
|
70
|
+
gstate.intermediate_table.Scan(gstate.scan_state, chunk);
|
71
|
+
|
72
|
+
return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
|
73
|
+
}
|
74
|
+
|
75
|
+
void PhysicalCTE::ExecuteRecursivePipelines(ExecutionContext &context) const {
|
76
|
+
if (!recursive_meta_pipeline) {
|
77
|
+
throw InternalException("Missing meta pipeline for recursive CTE");
|
78
|
+
}
|
79
|
+
|
80
|
+
// get and reset pipelines
|
81
|
+
vector<shared_ptr<Pipeline>> pipelines;
|
82
|
+
recursive_meta_pipeline->GetPipelines(pipelines, true);
|
83
|
+
for (auto &pipeline : pipelines) {
|
84
|
+
auto sink = pipeline->GetSink();
|
85
|
+
if (sink.get() != this) {
|
86
|
+
sink->sink_state.reset();
|
87
|
+
}
|
88
|
+
for (auto &op_ref : pipeline->GetOperators()) {
|
89
|
+
auto &op = op_ref.get();
|
90
|
+
op.op_state.reset();
|
91
|
+
}
|
92
|
+
pipeline->ClearSource();
|
93
|
+
}
|
94
|
+
|
95
|
+
// get the MetaPipelines in the recursive_meta_pipeline and reschedule them
|
96
|
+
vector<shared_ptr<MetaPipeline>> meta_pipelines;
|
97
|
+
recursive_meta_pipeline->GetMetaPipelines(meta_pipelines, true, false);
|
98
|
+
auto &executor = recursive_meta_pipeline->GetExecutor();
|
99
|
+
vector<shared_ptr<Event>> events;
|
100
|
+
executor.ReschedulePipelines(meta_pipelines, events);
|
101
|
+
|
102
|
+
while (true) {
|
103
|
+
executor.WorkOnTasks();
|
104
|
+
if (executor.HasError()) {
|
105
|
+
executor.ThrowException();
|
106
|
+
}
|
107
|
+
bool finished = true;
|
108
|
+
for (auto &event : events) {
|
109
|
+
if (!event->IsFinished()) {
|
110
|
+
finished = false;
|
111
|
+
break;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
if (finished) {
|
115
|
+
// all pipelines finished: done!
|
116
|
+
break;
|
117
|
+
}
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
//===--------------------------------------------------------------------===//
|
122
|
+
// Pipeline Construction
|
123
|
+
//===--------------------------------------------------------------------===//
|
124
|
+
void PhysicalCTE::BuildPipelines(Pipeline ¤t, MetaPipeline &meta_pipeline) {
|
125
|
+
D_ASSERT(children.size() == 2);
|
126
|
+
op_state.reset();
|
127
|
+
sink_state.reset();
|
128
|
+
recursive_meta_pipeline.reset();
|
129
|
+
|
130
|
+
auto &state = meta_pipeline.GetState();
|
131
|
+
state.SetPipelineSource(current, *this);
|
132
|
+
|
133
|
+
auto &executor = meta_pipeline.GetExecutor();
|
134
|
+
executor.AddMaterializedCTE(*this);
|
135
|
+
|
136
|
+
auto &child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, *this);
|
137
|
+
child_meta_pipeline.Build(*children[0]);
|
138
|
+
|
139
|
+
// the RHS is the recursive pipeline
|
140
|
+
recursive_meta_pipeline = make_shared<MetaPipeline>(executor, state, this);
|
141
|
+
if (meta_pipeline.HasRecursiveCTE()) {
|
142
|
+
recursive_meta_pipeline->SetRecursiveCTE();
|
143
|
+
}
|
144
|
+
recursive_meta_pipeline->Build(*children[1]);
|
145
|
+
}
|
146
|
+
|
147
|
+
vector<const_reference<PhysicalOperator>> PhysicalCTE::GetSources() const {
|
148
|
+
return {*this};
|
149
|
+
}
|
150
|
+
|
151
|
+
string PhysicalCTE::ParamsToString() const {
|
152
|
+
string result = "";
|
153
|
+
result += "\n[INFOSEPARATOR]\n";
|
154
|
+
result += ctename;
|
155
|
+
result += "\n[INFOSEPARATOR]\n";
|
156
|
+
result += StringUtil::Format("idx: %llu", table_index);
|
157
|
+
return result;
|
158
|
+
}
|
159
|
+
|
160
|
+
} // namespace duckdb
|
@@ -12,10 +12,11 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
|
-
PhysicalRecursiveCTE::PhysicalRecursiveCTE(vector<LogicalType> types, bool union_all,
|
16
|
-
unique_ptr<PhysicalOperator>
|
15
|
+
PhysicalRecursiveCTE::PhysicalRecursiveCTE(string ctename, idx_t table_index, vector<LogicalType> types, bool union_all,
|
16
|
+
unique_ptr<PhysicalOperator> top, unique_ptr<PhysicalOperator> bottom,
|
17
|
+
idx_t estimated_cardinality)
|
17
18
|
: PhysicalOperator(PhysicalOperatorType::RECURSIVE_CTE, std::move(types), estimated_cardinality),
|
18
|
-
union_all(union_all) {
|
19
|
+
ctename(std::move(ctename)), table_index(table_index), union_all(union_all) {
|
19
20
|
children.push_back(std::move(top));
|
20
21
|
children.push_back(std::move(bottom));
|
21
22
|
}
|
@@ -30,8 +31,8 @@ class RecursiveCTEState : public GlobalSinkState {
|
|
30
31
|
public:
|
31
32
|
explicit RecursiveCTEState(ClientContext &context, const PhysicalRecursiveCTE &op)
|
32
33
|
: intermediate_table(context, op.GetTypes()), new_groups(STANDARD_VECTOR_SIZE) {
|
33
|
-
ht = make_uniq<GroupedAggregateHashTable>(context,
|
34
|
-
vector<BoundAggregateExpression *>());
|
34
|
+
ht = make_uniq<GroupedAggregateHashTable>(context, BufferAllocator::Get(context), op.types,
|
35
|
+
vector<LogicalType>(), vector<BoundAggregateExpression *>());
|
35
36
|
}
|
36
37
|
|
37
38
|
unique_ptr<GroupedAggregateHashTable> ht;
|
@@ -195,4 +196,13 @@ vector<const_reference<PhysicalOperator>> PhysicalRecursiveCTE::GetSources() con
|
|
195
196
|
return {*this};
|
196
197
|
}
|
197
198
|
|
199
|
+
string PhysicalRecursiveCTE::ParamsToString() const {
|
200
|
+
string result = "";
|
201
|
+
result += "\n[INFOSEPARATOR]\n";
|
202
|
+
result += ctename;
|
203
|
+
result += "\n[INFOSEPARATOR]\n";
|
204
|
+
result += StringUtil::Format("idx: %llu", table_index);
|
205
|
+
return result;
|
206
|
+
}
|
207
|
+
|
198
208
|
} // namespace duckdb
|
@@ -9,10 +9,10 @@ RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
|
|
9
9
|
radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
|
10
10
|
radix_shift(RadixPartitioning::Shift(radix_bits)) {
|
11
11
|
|
12
|
+
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
|
12
13
|
D_ASSERT(n_partitions > 0);
|
13
|
-
D_ASSERT(n_partitions
|
14
|
+
D_ASSERT(n_partitions == RadixPartitioning::NumberOfPartitions(radix_bits));
|
14
15
|
D_ASSERT(IsPowerOfTwo(n_partitions));
|
15
|
-
D_ASSERT(radix_bits <= 8);
|
16
16
|
}
|
17
17
|
|
18
18
|
PartitionableHashTable::PartitionableHashTable(ClientContext &context, Allocator &allocator,
|
@@ -47,11 +47,21 @@ HtEntryType PartitionableHashTable::GetHTEntrySize() {
|
|
47
47
|
return HtEntryType::HT_WIDTH_32;
|
48
48
|
}
|
49
49
|
|
50
|
+
bool OverMemoryLimit(ClientContext &context, const bool is_partitioned, const RadixPartitionInfo &partition_info,
|
51
|
+
const GroupedAggregateHashTable &ht) {
|
52
|
+
const auto n_partitions = is_partitioned ? partition_info.n_partitions : 1;
|
53
|
+
const auto max_memory = BufferManager::GetBufferManager(context).GetMaxMemory();
|
54
|
+
const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
55
|
+
const auto memory_per_partition = 0.6 * max_memory / num_threads / n_partitions;
|
56
|
+
return ht.TotalSize() > memory_per_partition;
|
57
|
+
}
|
58
|
+
|
50
59
|
idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
|
51
60
|
DataChunk &payload, const unsafe_vector<idx_t> &filter) {
|
52
61
|
// If this is false, a single AddChunk would overflow the max capacity
|
53
62
|
D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
|
54
|
-
if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()
|
63
|
+
if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity() ||
|
64
|
+
OverMemoryLimit(context, is_partitioned, partition_info, *list.back())) {
|
55
65
|
idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
|
56
66
|
if (!list.empty()) {
|
57
67
|
new_capacity = list.back()->Capacity();
|
@@ -70,7 +80,7 @@ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bo
|
|
70
80
|
|
71
81
|
// we partition when we are asked to or when the unpartitioned ht runs out of space
|
72
82
|
if (!IsPartitioned() && do_partition) {
|
73
|
-
Partition();
|
83
|
+
Partition(false);
|
74
84
|
}
|
75
85
|
|
76
86
|
if (!IsPartitioned()) {
|
@@ -117,7 +127,7 @@ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bo
|
|
117
127
|
return group_count;
|
118
128
|
}
|
119
129
|
|
120
|
-
void PartitionableHashTable::Partition() {
|
130
|
+
void PartitionableHashTable::Partition(bool sink_done) {
|
121
131
|
D_ASSERT(!IsPartitioned());
|
122
132
|
D_ASSERT(radix_partitioned_hts.empty());
|
123
133
|
D_ASSERT(partition_info.n_partitions > 1);
|
@@ -130,7 +140,7 @@ void PartitionableHashTable::Partition() {
|
|
130
140
|
context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
|
131
141
|
partition_hts[r] = radix_partitioned_hts[r].back().get();
|
132
142
|
}
|
133
|
-
unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits);
|
143
|
+
unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits, sink_done);
|
134
144
|
unpartitioned_ht.reset();
|
135
145
|
}
|
136
146
|
unpartitioned_hts.clear();
|
@@ -153,6 +163,22 @@ HashTableList PartitionableHashTable::GetUnpartitioned() {
|
|
153
163
|
return std::move(unpartitioned_hts);
|
154
164
|
}
|
155
165
|
|
166
|
+
idx_t PartitionableHashTable::GetPartitionCount(idx_t partition) const {
|
167
|
+
idx_t total_size = 0;
|
168
|
+
for (const auto &ht : radix_partitioned_hts[partition]) {
|
169
|
+
total_size += ht->Count();
|
170
|
+
}
|
171
|
+
return total_size;
|
172
|
+
}
|
173
|
+
|
174
|
+
idx_t PartitionableHashTable::GetPartitionSize(idx_t partition) const {
|
175
|
+
idx_t total_size = 0;
|
176
|
+
for (const auto &ht : radix_partitioned_hts[partition]) {
|
177
|
+
total_size += ht->DataSize();
|
178
|
+
}
|
179
|
+
return total_size;
|
180
|
+
}
|
181
|
+
|
156
182
|
void PartitionableHashTable::Finalize() {
|
157
183
|
if (IsPartitioned()) {
|
158
184
|
for (auto &ht_list : radix_partitioned_hts) {
|
@@ -169,4 +195,13 @@ void PartitionableHashTable::Finalize() {
|
|
169
195
|
}
|
170
196
|
}
|
171
197
|
|
198
|
+
void PartitionableHashTable::Append(GroupedAggregateHashTable &ht) {
|
199
|
+
if (unpartitioned_hts.empty()) {
|
200
|
+
unpartitioned_hts.push_back(make_uniq<GroupedAggregateHashTable>(context, allocator, group_types, payload_types,
|
201
|
+
bindings, GetHTEntrySize(),
|
202
|
+
GroupedAggregateHashTable::InitialCapacity()));
|
203
|
+
}
|
204
|
+
unpartitioned_hts.back()->Append(ht);
|
205
|
+
}
|
206
|
+
|
172
207
|
} // namespace duckdb
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/execution/perfect_aggregate_hashtable.hpp"
|
2
|
-
|
2
|
+
|
3
3
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
4
|
+
#include "duckdb/execution/expression_executor.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -93,6 +94,18 @@ static void ComputeGroupLocation(Vector &group, Value &min, uintptr_t *address_d
|
|
93
94
|
case PhysicalType::INT64:
|
94
95
|
ComputeGroupLocationTemplated<int64_t>(vdata, min, address_data, current_shift, count);
|
95
96
|
break;
|
97
|
+
case PhysicalType::UINT8:
|
98
|
+
ComputeGroupLocationTemplated<uint8_t>(vdata, min, address_data, current_shift, count);
|
99
|
+
break;
|
100
|
+
case PhysicalType::UINT16:
|
101
|
+
ComputeGroupLocationTemplated<uint16_t>(vdata, min, address_data, current_shift, count);
|
102
|
+
break;
|
103
|
+
case PhysicalType::UINT32:
|
104
|
+
ComputeGroupLocationTemplated<uint32_t>(vdata, min, address_data, current_shift, count);
|
105
|
+
break;
|
106
|
+
case PhysicalType::UINT64:
|
107
|
+
ComputeGroupLocationTemplated<uint64_t>(vdata, min, address_data, current_shift, count);
|
108
|
+
break;
|
96
109
|
default:
|
97
110
|
throw InternalException("Unsupported group type for perfect aggregate hash table");
|
98
111
|
}
|
@@ -123,7 +136,7 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
|
|
123
136
|
// after finding the group location we update the aggregates
|
124
137
|
idx_t payload_idx = 0;
|
125
138
|
auto &aggregates = layout.GetAggregates();
|
126
|
-
RowOperationsState row_state(aggregate_allocator
|
139
|
+
RowOperationsState row_state(aggregate_allocator);
|
127
140
|
for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) {
|
128
141
|
auto &aggregate = aggregates[aggr_idx];
|
129
142
|
auto input_count = (idx_t)aggregate.child_count;
|
@@ -152,7 +165,7 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
|
|
152
165
|
data_ptr_t source_ptr = other.data;
|
153
166
|
data_ptr_t target_ptr = data;
|
154
167
|
idx_t combine_count = 0;
|
155
|
-
RowOperationsState row_state(aggregate_allocator
|
168
|
+
RowOperationsState row_state(aggregate_allocator);
|
156
169
|
for (idx_t i = 0; i < total_groups; i++) {
|
157
170
|
auto has_entry_source = other.group_is_set[i];
|
158
171
|
// we only have any work to do if the source has an entry for this group
|
@@ -208,6 +221,18 @@ static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t re
|
|
208
221
|
case PhysicalType::INT64:
|
209
222
|
ReconstructGroupVectorTemplated<int64_t>(group_values, min, mask, shift, entry_count, result);
|
210
223
|
break;
|
224
|
+
case PhysicalType::UINT8:
|
225
|
+
ReconstructGroupVectorTemplated<uint8_t>(group_values, min, mask, shift, entry_count, result);
|
226
|
+
break;
|
227
|
+
case PhysicalType::UINT16:
|
228
|
+
ReconstructGroupVectorTemplated<uint16_t>(group_values, min, mask, shift, entry_count, result);
|
229
|
+
break;
|
230
|
+
case PhysicalType::UINT32:
|
231
|
+
ReconstructGroupVectorTemplated<uint32_t>(group_values, min, mask, shift, entry_count, result);
|
232
|
+
break;
|
233
|
+
case PhysicalType::UINT64:
|
234
|
+
ReconstructGroupVectorTemplated<uint64_t>(group_values, min, mask, shift, entry_count, result);
|
235
|
+
break;
|
211
236
|
default:
|
212
237
|
throw InternalException("Invalid type for perfect aggregate HT group");
|
213
238
|
}
|
@@ -243,7 +268,7 @@ void PerfectAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) {
|
|
243
268
|
}
|
244
269
|
// then construct the payloads
|
245
270
|
result.SetCardinality(entry_count);
|
246
|
-
RowOperationsState row_state(aggregate_allocator
|
271
|
+
RowOperationsState row_state(aggregate_allocator);
|
247
272
|
RowOperations::FinalizeStates(row_state, layout, addresses, result, grouping_columns);
|
248
273
|
}
|
249
274
|
|
@@ -264,7 +289,7 @@ void PerfectAggregateHashTable::Destroy() {
|
|
264
289
|
idx_t count = 0;
|
265
290
|
|
266
291
|
// iterate over all initialised slots of the hash table
|
267
|
-
RowOperationsState row_state(aggregate_allocator
|
292
|
+
RowOperationsState row_state(aggregate_allocator);
|
268
293
|
data_ptr_t payload_ptr = data;
|
269
294
|
for (idx_t i = 0; i < total_groups; i++) {
|
270
295
|
if (group_is_set[i]) {
|
@@ -5,12 +5,12 @@
|
|
5
5
|
#include "duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp"
|
6
6
|
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
7
7
|
#include "duckdb/execution/physical_plan_generator.hpp"
|
8
|
+
#include "duckdb/function/function_binder.hpp"
|
8
9
|
#include "duckdb/main/client_context.hpp"
|
9
10
|
#include "duckdb/parser/expression/comparison_expression.hpp"
|
10
11
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
11
|
-
#include "duckdb/planner/operator/logical_aggregate.hpp"
|
12
|
-
#include "duckdb/function/function_binder.hpp"
|
13
12
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
13
|
+
#include "duckdb/planner/operator/logical_aggregate.hpp"
|
14
14
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
@@ -23,6 +23,11 @@ static uint32_t RequiredBitsForValue(uint32_t n) {
|
|
23
23
|
return required_bits;
|
24
24
|
}
|
25
25
|
|
26
|
+
template <class T>
|
27
|
+
hugeint_t GetRangeHugeint(const BaseStatistics &nstats) {
|
28
|
+
return Hugeint::Convert(NumericStats::GetMax<T>(nstats)) - Hugeint::Convert(NumericStats::GetMin<T>(nstats));
|
29
|
+
}
|
30
|
+
|
26
31
|
static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate &op, vector<idx_t> &bits_per_group) {
|
27
32
|
if (op.grouping_sets.size() > 1 || !op.grouping_functions.empty()) {
|
28
33
|
return false;
|
@@ -40,6 +45,10 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
|
|
40
45
|
case PhysicalType::INT16:
|
41
46
|
case PhysicalType::INT32:
|
42
47
|
case PhysicalType::INT64:
|
48
|
+
case PhysicalType::UINT8:
|
49
|
+
case PhysicalType::UINT16:
|
50
|
+
case PhysicalType::UINT32:
|
51
|
+
case PhysicalType::UINT64:
|
43
52
|
break;
|
44
53
|
default:
|
45
54
|
// we only support simple integer types for perfect hashing
|
@@ -53,6 +62,8 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
|
|
53
62
|
switch (group_type.InternalType()) {
|
54
63
|
case PhysicalType::INT8:
|
55
64
|
case PhysicalType::INT16:
|
65
|
+
case PhysicalType::UINT8:
|
66
|
+
case PhysicalType::UINT16:
|
56
67
|
break;
|
57
68
|
default:
|
58
69
|
// type is too large and there are no stats: skip perfect hashing
|
@@ -68,33 +79,55 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
|
|
68
79
|
if (!NumericStats::HasMinMax(nstats)) {
|
69
80
|
return false;
|
70
81
|
}
|
82
|
+
|
83
|
+
if (NumericStats::Max(*stats) < NumericStats::Min(*stats)) {
|
84
|
+
// May result in underflow
|
85
|
+
return false;
|
86
|
+
}
|
87
|
+
|
71
88
|
// we have a min and a max value for the stats: use that to figure out how many bits we have
|
72
89
|
// we add two here, one for the NULL value, and one to make the computation one-indexed
|
73
90
|
// (e.g. if min and max are the same, we still need one entry in total)
|
74
|
-
|
91
|
+
hugeint_t range_h;
|
75
92
|
switch (group_type.InternalType()) {
|
76
93
|
case PhysicalType::INT8:
|
77
|
-
|
94
|
+
range_h = GetRangeHugeint<int8_t>(nstats);
|
78
95
|
break;
|
79
96
|
case PhysicalType::INT16:
|
80
|
-
|
97
|
+
range_h = GetRangeHugeint<int16_t>(nstats);
|
81
98
|
break;
|
82
99
|
case PhysicalType::INT32:
|
83
|
-
|
100
|
+
range_h = GetRangeHugeint<int32_t>(nstats);
|
84
101
|
break;
|
85
102
|
case PhysicalType::INT64:
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
103
|
+
range_h = GetRangeHugeint<int64_t>(nstats);
|
104
|
+
break;
|
105
|
+
case PhysicalType::UINT8:
|
106
|
+
range_h = GetRangeHugeint<uint8_t>(nstats);
|
107
|
+
break;
|
108
|
+
case PhysicalType::UINT16:
|
109
|
+
range_h = GetRangeHugeint<uint16_t>(nstats);
|
110
|
+
break;
|
111
|
+
case PhysicalType::UINT32:
|
112
|
+
range_h = GetRangeHugeint<uint32_t>(nstats);
|
113
|
+
break;
|
114
|
+
case PhysicalType::UINT64:
|
115
|
+
range_h = GetRangeHugeint<uint64_t>(nstats);
|
90
116
|
break;
|
91
117
|
default:
|
92
118
|
throw InternalException("Unsupported type for perfect hash (should be caught before)");
|
93
119
|
}
|
120
|
+
|
121
|
+
uint64_t range;
|
122
|
+
if (!Hugeint::TryCast(range_h, range)) {
|
123
|
+
return false;
|
124
|
+
}
|
125
|
+
|
94
126
|
// bail out on any range bigger than 2^32
|
95
127
|
if (range >= NumericLimits<int32_t>::Maximum()) {
|
96
128
|
return false;
|
97
129
|
}
|
130
|
+
|
98
131
|
range += 2;
|
99
132
|
// figure out how many bits we need
|
100
133
|
idx_t required_bits = RequiredBitsForValue(range);
|