duckdb 0.8.2-dev150.0 → 0.8.2-dev1559.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types.cpp +8 -655
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +13 -22
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,9 +1,11 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/allocator.hpp"
|
4
|
+
#include "duckdb/common/types/batched_data_collection.hpp"
|
5
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
2
6
|
#include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
|
3
7
|
#include "duckdb/parallel/base_pipeline_event.hpp"
|
4
|
-
|
5
|
-
#include "duckdb/common/types/batched_data_collection.hpp"
|
6
|
-
#include "duckdb/common/allocator.hpp"
|
8
|
+
|
7
9
|
#include <algorithm>
|
8
10
|
|
9
11
|
namespace duckdb {
|
@@ -67,7 +69,7 @@ public:
|
|
67
69
|
optional_idx batch_index;
|
68
70
|
|
69
71
|
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
70
|
-
collection = make_uniq<ColumnDataCollection>(
|
72
|
+
collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
|
71
73
|
collection->InitializeAppend(append_state);
|
72
74
|
}
|
73
75
|
};
|
@@ -116,7 +116,7 @@ public:
|
|
116
116
|
optional_idx batch_index;
|
117
117
|
|
118
118
|
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
119
|
-
collection = make_uniq<ColumnDataCollection>(
|
119
|
+
collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
|
120
120
|
collection->InitializeAppend(append_state);
|
121
121
|
}
|
122
122
|
};
|
@@ -353,7 +353,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
|
|
353
353
|
} else {
|
354
354
|
// the collection is too large for a batch - we need to repartition
|
355
355
|
// create an empty collection
|
356
|
-
current_collection = make_uniq<ColumnDataCollection>(
|
356
|
+
current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
|
357
357
|
}
|
358
358
|
if (current_collection) {
|
359
359
|
current_collection->InitializeAppend(append_state);
|
@@ -373,7 +373,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
|
|
373
373
|
}
|
374
374
|
// the collection is full - move it to the result and create a new one
|
375
375
|
gstate.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(current_collection)));
|
376
|
-
current_collection = make_uniq<ColumnDataCollection>(
|
376
|
+
current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
|
377
377
|
current_collection->InitializeAppend(append_state);
|
378
378
|
}
|
379
379
|
}
|
@@ -41,7 +41,7 @@ PhysicalInsert::PhysicalInsert(vector<LogicalType> types_p, TableCatalogEntry &t
|
|
41
41
|
return;
|
42
42
|
}
|
43
43
|
|
44
|
-
D_ASSERT(set_expressions.size() == set_columns.size());
|
44
|
+
D_ASSERT(this->set_expressions.size() == this->set_columns.size());
|
45
45
|
|
46
46
|
// One or more columns are referenced from the existing table,
|
47
47
|
// we use the 'insert_types' to figure out which types these columns have
|
@@ -16,6 +16,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
|
|
16
16
|
pivot_map[bound_pivot.pivot_values[p]] = bound_pivot.group_count + p;
|
17
17
|
}
|
18
18
|
// extract the empty aggregate expressions
|
19
|
+
ArenaAllocator allocator(Allocator::DefaultAllocator());
|
19
20
|
for (auto &aggr_expr : bound_pivot.aggregates) {
|
20
21
|
auto &aggr = aggr_expr->Cast<BoundAggregateExpression>();
|
21
22
|
// for each aggregate, initialize an empty aggregate state and finalize it immediately
|
@@ -23,7 +24,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
|
|
23
24
|
aggr.function.initialize(state.get());
|
24
25
|
Vector state_vector(Value::POINTER(CastPointerToValue(state.get())));
|
25
26
|
Vector result_vector(aggr_expr->return_type);
|
26
|
-
AggregateInputData aggr_input_data(aggr.bind_info.get(),
|
27
|
+
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
|
27
28
|
aggr.function.finalize(state_vector, aggr_input_data, result_vector, 1, 0);
|
28
29
|
empty_aggregates.push_back(result_vector.GetValue(0));
|
29
30
|
}
|
@@ -64,6 +64,9 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline ¤t, MetaPipeline &met
|
|
64
64
|
state.SetPipelineSource(current, delim_join.distinct->Cast<PhysicalOperator>());
|
65
65
|
return;
|
66
66
|
}
|
67
|
+
case PhysicalOperatorType::CTE_SCAN: {
|
68
|
+
break;
|
69
|
+
}
|
67
70
|
case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
|
68
71
|
if (!meta_pipeline.HasRecursiveCTE()) {
|
69
72
|
throw InternalException("Recursive CTE scan found without recursive CTE node");
|
@@ -76,4 +79,20 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline ¤t, MetaPipeline &met
|
|
76
79
|
state.SetPipelineSource(current, *this);
|
77
80
|
}
|
78
81
|
|
82
|
+
string PhysicalColumnDataScan::ParamsToString() const {
|
83
|
+
string result = "";
|
84
|
+
switch (type) {
|
85
|
+
case PhysicalOperatorType::CTE_SCAN:
|
86
|
+
case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
|
87
|
+
result += "\n[INFOSEPARATOR]\n";
|
88
|
+
result += StringUtil::Format("idx: %llu", cte_index);
|
89
|
+
break;
|
90
|
+
}
|
91
|
+
default:
|
92
|
+
break;
|
93
|
+
}
|
94
|
+
|
95
|
+
return result;
|
96
|
+
}
|
97
|
+
|
79
98
|
} // namespace duckdb
|
@@ -16,17 +16,18 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
|
|
16
16
|
: PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
|
17
17
|
function(std::move(function_p)), bind_data(std::move(bind_data_p)), column_ids(std::move(column_ids_p)),
|
18
18
|
names(std::move(names_p)), table_filters(std::move(table_filters_p)) {
|
19
|
+
extra_info.file_filters = "";
|
19
20
|
}
|
20
21
|
|
21
22
|
PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction function_p,
|
22
23
|
unique_ptr<FunctionData> bind_data_p, vector<LogicalType> returned_types_p,
|
23
24
|
vector<column_t> column_ids_p, vector<idx_t> projection_ids_p,
|
24
25
|
vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
|
25
|
-
idx_t estimated_cardinality)
|
26
|
+
idx_t estimated_cardinality, ExtraOperatorInfo extra_info)
|
26
27
|
: PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
|
27
28
|
function(std::move(function_p)), bind_data(std::move(bind_data_p)), returned_types(std::move(returned_types_p)),
|
28
29
|
column_ids(std::move(column_ids_p)), projection_ids(std::move(projection_ids_p)), names(std::move(names_p)),
|
29
|
-
table_filters(std::move(table_filters_p)) {
|
30
|
+
table_filters(std::move(table_filters_p)), extra_info(extra_info) {
|
30
31
|
}
|
31
32
|
|
32
33
|
class TableScanGlobalSourceState : public GlobalSourceState {
|
@@ -149,6 +150,10 @@ string PhysicalTableScan::ParamsToString() const {
|
|
149
150
|
}
|
150
151
|
}
|
151
152
|
}
|
153
|
+
if (!extra_info.file_filters.empty()) {
|
154
|
+
result += "\n[INFOSEPARATOR]\n";
|
155
|
+
result += "File Filters: " + extra_info.file_filters;
|
156
|
+
}
|
152
157
|
result += "\n[INFOSEPARATOR]\n";
|
153
158
|
result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
|
154
159
|
return result;
|
@@ -0,0 +1,160 @@
|
|
1
|
+
#include "duckdb/execution/operator/set/physical_cte.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
4
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
|
+
#include "duckdb/execution/aggregate_hashtable.hpp"
|
6
|
+
#include "duckdb/execution/executor.hpp"
|
7
|
+
#include "duckdb/parallel/event.hpp"
|
8
|
+
#include "duckdb/parallel/meta_pipeline.hpp"
|
9
|
+
#include "duckdb/parallel/pipeline.hpp"
|
10
|
+
#include "duckdb/parallel/task_scheduler.hpp"
|
11
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
PhysicalCTE::PhysicalCTE(string ctename, idx_t table_index, vector<LogicalType> types, unique_ptr<PhysicalOperator> top,
|
16
|
+
unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality)
|
17
|
+
: PhysicalOperator(PhysicalOperatorType::CTE, std::move(types), estimated_cardinality), table_index(table_index),
|
18
|
+
ctename(std::move(ctename)) {
|
19
|
+
children.push_back(std::move(top));
|
20
|
+
children.push_back(std::move(bottom));
|
21
|
+
}
|
22
|
+
|
23
|
+
PhysicalCTE::~PhysicalCTE() {
|
24
|
+
}
|
25
|
+
|
26
|
+
//===--------------------------------------------------------------------===//
|
27
|
+
// Sink
|
28
|
+
//===--------------------------------------------------------------------===//
|
29
|
+
class CTEState : public GlobalSinkState {
|
30
|
+
public:
|
31
|
+
explicit CTEState(ClientContext &context, const PhysicalCTE &op)
|
32
|
+
: intermediate_table(context, op.children[1]->GetTypes()) {
|
33
|
+
}
|
34
|
+
ColumnDataCollection intermediate_table;
|
35
|
+
ColumnDataScanState scan_state;
|
36
|
+
bool initialized = false;
|
37
|
+
bool finished_scan = false;
|
38
|
+
};
|
39
|
+
|
40
|
+
unique_ptr<GlobalSinkState> PhysicalCTE::GetGlobalSinkState(ClientContext &context) const {
|
41
|
+
working_table->Reset();
|
42
|
+
return make_uniq<CTEState>(context, *this);
|
43
|
+
}
|
44
|
+
|
45
|
+
SinkResultType PhysicalCTE::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
46
|
+
auto &gstate = input.global_state.Cast<CTEState>();
|
47
|
+
if (!gstate.finished_scan) {
|
48
|
+
working_table->Append(chunk);
|
49
|
+
} else {
|
50
|
+
gstate.intermediate_table.Append(chunk);
|
51
|
+
}
|
52
|
+
return SinkResultType::NEED_MORE_INPUT;
|
53
|
+
}
|
54
|
+
|
55
|
+
//===--------------------------------------------------------------------===//
|
56
|
+
// Source
|
57
|
+
//===--------------------------------------------------------------------===//
|
58
|
+
SourceResultType PhysicalCTE::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
|
59
|
+
auto &gstate = sink_state->Cast<CTEState>();
|
60
|
+
if (!gstate.initialized) {
|
61
|
+
gstate.intermediate_table.InitializeScan(gstate.scan_state);
|
62
|
+
gstate.finished_scan = false;
|
63
|
+
gstate.initialized = true;
|
64
|
+
}
|
65
|
+
if (!gstate.finished_scan) {
|
66
|
+
gstate.finished_scan = true;
|
67
|
+
ExecuteRecursivePipelines(context);
|
68
|
+
}
|
69
|
+
|
70
|
+
gstate.intermediate_table.Scan(gstate.scan_state, chunk);
|
71
|
+
|
72
|
+
return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
|
73
|
+
}
|
74
|
+
|
75
|
+
void PhysicalCTE::ExecuteRecursivePipelines(ExecutionContext &context) const {
|
76
|
+
if (!recursive_meta_pipeline) {
|
77
|
+
throw InternalException("Missing meta pipeline for recursive CTE");
|
78
|
+
}
|
79
|
+
|
80
|
+
// get and reset pipelines
|
81
|
+
vector<shared_ptr<Pipeline>> pipelines;
|
82
|
+
recursive_meta_pipeline->GetPipelines(pipelines, true);
|
83
|
+
for (auto &pipeline : pipelines) {
|
84
|
+
auto sink = pipeline->GetSink();
|
85
|
+
if (sink.get() != this) {
|
86
|
+
sink->sink_state.reset();
|
87
|
+
}
|
88
|
+
for (auto &op_ref : pipeline->GetOperators()) {
|
89
|
+
auto &op = op_ref.get();
|
90
|
+
op.op_state.reset();
|
91
|
+
}
|
92
|
+
pipeline->ClearSource();
|
93
|
+
}
|
94
|
+
|
95
|
+
// get the MetaPipelines in the recursive_meta_pipeline and reschedule them
|
96
|
+
vector<shared_ptr<MetaPipeline>> meta_pipelines;
|
97
|
+
recursive_meta_pipeline->GetMetaPipelines(meta_pipelines, true, false);
|
98
|
+
auto &executor = recursive_meta_pipeline->GetExecutor();
|
99
|
+
vector<shared_ptr<Event>> events;
|
100
|
+
executor.ReschedulePipelines(meta_pipelines, events);
|
101
|
+
|
102
|
+
while (true) {
|
103
|
+
executor.WorkOnTasks();
|
104
|
+
if (executor.HasError()) {
|
105
|
+
executor.ThrowException();
|
106
|
+
}
|
107
|
+
bool finished = true;
|
108
|
+
for (auto &event : events) {
|
109
|
+
if (!event->IsFinished()) {
|
110
|
+
finished = false;
|
111
|
+
break;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
if (finished) {
|
115
|
+
// all pipelines finished: done!
|
116
|
+
break;
|
117
|
+
}
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
//===--------------------------------------------------------------------===//
|
122
|
+
// Pipeline Construction
|
123
|
+
//===--------------------------------------------------------------------===//
|
124
|
+
void PhysicalCTE::BuildPipelines(Pipeline ¤t, MetaPipeline &meta_pipeline) {
|
125
|
+
D_ASSERT(children.size() == 2);
|
126
|
+
op_state.reset();
|
127
|
+
sink_state.reset();
|
128
|
+
recursive_meta_pipeline.reset();
|
129
|
+
|
130
|
+
auto &state = meta_pipeline.GetState();
|
131
|
+
state.SetPipelineSource(current, *this);
|
132
|
+
|
133
|
+
auto &executor = meta_pipeline.GetExecutor();
|
134
|
+
executor.AddMaterializedCTE(*this);
|
135
|
+
|
136
|
+
auto &child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, *this);
|
137
|
+
child_meta_pipeline.Build(*children[0]);
|
138
|
+
|
139
|
+
// the RHS is the recursive pipeline
|
140
|
+
recursive_meta_pipeline = make_shared<MetaPipeline>(executor, state, this);
|
141
|
+
if (meta_pipeline.HasRecursiveCTE()) {
|
142
|
+
recursive_meta_pipeline->SetRecursiveCTE();
|
143
|
+
}
|
144
|
+
recursive_meta_pipeline->Build(*children[1]);
|
145
|
+
}
|
146
|
+
|
147
|
+
vector<const_reference<PhysicalOperator>> PhysicalCTE::GetSources() const {
|
148
|
+
return {*this};
|
149
|
+
}
|
150
|
+
|
151
|
+
string PhysicalCTE::ParamsToString() const {
|
152
|
+
string result = "";
|
153
|
+
result += "\n[INFOSEPARATOR]\n";
|
154
|
+
result += ctename;
|
155
|
+
result += "\n[INFOSEPARATOR]\n";
|
156
|
+
result += StringUtil::Format("idx: %llu", table_index);
|
157
|
+
return result;
|
158
|
+
}
|
159
|
+
|
160
|
+
} // namespace duckdb
|
@@ -12,10 +12,11 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
|
-
PhysicalRecursiveCTE::PhysicalRecursiveCTE(vector<LogicalType> types, bool union_all,
|
16
|
-
unique_ptr<PhysicalOperator>
|
15
|
+
PhysicalRecursiveCTE::PhysicalRecursiveCTE(string ctename, idx_t table_index, vector<LogicalType> types, bool union_all,
|
16
|
+
unique_ptr<PhysicalOperator> top, unique_ptr<PhysicalOperator> bottom,
|
17
|
+
idx_t estimated_cardinality)
|
17
18
|
: PhysicalOperator(PhysicalOperatorType::RECURSIVE_CTE, std::move(types), estimated_cardinality),
|
18
|
-
union_all(union_all) {
|
19
|
+
ctename(std::move(ctename)), table_index(table_index), union_all(union_all) {
|
19
20
|
children.push_back(std::move(top));
|
20
21
|
children.push_back(std::move(bottom));
|
21
22
|
}
|
@@ -30,8 +31,8 @@ class RecursiveCTEState : public GlobalSinkState {
|
|
30
31
|
public:
|
31
32
|
explicit RecursiveCTEState(ClientContext &context, const PhysicalRecursiveCTE &op)
|
32
33
|
: intermediate_table(context, op.GetTypes()), new_groups(STANDARD_VECTOR_SIZE) {
|
33
|
-
ht = make_uniq<GroupedAggregateHashTable>(context,
|
34
|
-
vector<BoundAggregateExpression *>());
|
34
|
+
ht = make_uniq<GroupedAggregateHashTable>(context, BufferAllocator::Get(context), op.types,
|
35
|
+
vector<LogicalType>(), vector<BoundAggregateExpression *>());
|
35
36
|
}
|
36
37
|
|
37
38
|
unique_ptr<GroupedAggregateHashTable> ht;
|
@@ -195,4 +196,13 @@ vector<const_reference<PhysicalOperator>> PhysicalRecursiveCTE::GetSources() con
|
|
195
196
|
return {*this};
|
196
197
|
}
|
197
198
|
|
199
|
+
string PhysicalRecursiveCTE::ParamsToString() const {
|
200
|
+
string result = "";
|
201
|
+
result += "\n[INFOSEPARATOR]\n";
|
202
|
+
result += ctename;
|
203
|
+
result += "\n[INFOSEPARATOR]\n";
|
204
|
+
result += StringUtil::Format("idx: %llu", table_index);
|
205
|
+
return result;
|
206
|
+
}
|
207
|
+
|
198
208
|
} // namespace duckdb
|
@@ -9,10 +9,10 @@ RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
|
|
9
9
|
radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
|
10
10
|
radix_shift(RadixPartitioning::Shift(radix_bits)) {
|
11
11
|
|
12
|
+
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
|
12
13
|
D_ASSERT(n_partitions > 0);
|
13
|
-
D_ASSERT(n_partitions
|
14
|
+
D_ASSERT(n_partitions == RadixPartitioning::NumberOfPartitions(radix_bits));
|
14
15
|
D_ASSERT(IsPowerOfTwo(n_partitions));
|
15
|
-
D_ASSERT(radix_bits <= 8);
|
16
16
|
}
|
17
17
|
|
18
18
|
PartitionableHashTable::PartitionableHashTable(ClientContext &context, Allocator &allocator,
|
@@ -47,11 +47,21 @@ HtEntryType PartitionableHashTable::GetHTEntrySize() {
|
|
47
47
|
return HtEntryType::HT_WIDTH_32;
|
48
48
|
}
|
49
49
|
|
50
|
+
bool OverMemoryLimit(ClientContext &context, const bool is_partitioned, const RadixPartitionInfo &partition_info,
|
51
|
+
const GroupedAggregateHashTable &ht) {
|
52
|
+
const auto n_partitions = is_partitioned ? partition_info.n_partitions : 1;
|
53
|
+
const auto max_memory = BufferManager::GetBufferManager(context).GetMaxMemory();
|
54
|
+
const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
55
|
+
const auto memory_per_partition = 0.6 * max_memory / num_threads / n_partitions;
|
56
|
+
return ht.TotalSize() > memory_per_partition;
|
57
|
+
}
|
58
|
+
|
50
59
|
idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
|
51
60
|
DataChunk &payload, const unsafe_vector<idx_t> &filter) {
|
52
61
|
// If this is false, a single AddChunk would overflow the max capacity
|
53
62
|
D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
|
54
|
-
if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()
|
63
|
+
if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity() ||
|
64
|
+
OverMemoryLimit(context, is_partitioned, partition_info, *list.back())) {
|
55
65
|
idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
|
56
66
|
if (!list.empty()) {
|
57
67
|
new_capacity = list.back()->Capacity();
|
@@ -70,7 +80,7 @@ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bo
|
|
70
80
|
|
71
81
|
// we partition when we are asked to or when the unpartitioned ht runs out of space
|
72
82
|
if (!IsPartitioned() && do_partition) {
|
73
|
-
Partition();
|
83
|
+
Partition(false);
|
74
84
|
}
|
75
85
|
|
76
86
|
if (!IsPartitioned()) {
|
@@ -117,7 +127,7 @@ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bo
|
|
117
127
|
return group_count;
|
118
128
|
}
|
119
129
|
|
120
|
-
void PartitionableHashTable::Partition() {
|
130
|
+
void PartitionableHashTable::Partition(bool sink_done) {
|
121
131
|
D_ASSERT(!IsPartitioned());
|
122
132
|
D_ASSERT(radix_partitioned_hts.empty());
|
123
133
|
D_ASSERT(partition_info.n_partitions > 1);
|
@@ -130,7 +140,7 @@ void PartitionableHashTable::Partition() {
|
|
130
140
|
context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
|
131
141
|
partition_hts[r] = radix_partitioned_hts[r].back().get();
|
132
142
|
}
|
133
|
-
unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits);
|
143
|
+
unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits, sink_done);
|
134
144
|
unpartitioned_ht.reset();
|
135
145
|
}
|
136
146
|
unpartitioned_hts.clear();
|
@@ -153,6 +163,22 @@ HashTableList PartitionableHashTable::GetUnpartitioned() {
|
|
153
163
|
return std::move(unpartitioned_hts);
|
154
164
|
}
|
155
165
|
|
166
|
+
idx_t PartitionableHashTable::GetPartitionCount(idx_t partition) const {
|
167
|
+
idx_t total_size = 0;
|
168
|
+
for (const auto &ht : radix_partitioned_hts[partition]) {
|
169
|
+
total_size += ht->Count();
|
170
|
+
}
|
171
|
+
return total_size;
|
172
|
+
}
|
173
|
+
|
174
|
+
idx_t PartitionableHashTable::GetPartitionSize(idx_t partition) const {
|
175
|
+
idx_t total_size = 0;
|
176
|
+
for (const auto &ht : radix_partitioned_hts[partition]) {
|
177
|
+
total_size += ht->DataSize();
|
178
|
+
}
|
179
|
+
return total_size;
|
180
|
+
}
|
181
|
+
|
156
182
|
void PartitionableHashTable::Finalize() {
|
157
183
|
if (IsPartitioned()) {
|
158
184
|
for (auto &ht_list : radix_partitioned_hts) {
|
@@ -169,4 +195,13 @@ void PartitionableHashTable::Finalize() {
|
|
169
195
|
}
|
170
196
|
}
|
171
197
|
|
198
|
+
void PartitionableHashTable::Append(GroupedAggregateHashTable &ht) {
|
199
|
+
if (unpartitioned_hts.empty()) {
|
200
|
+
unpartitioned_hts.push_back(make_uniq<GroupedAggregateHashTable>(context, allocator, group_types, payload_types,
|
201
|
+
bindings, GetHTEntrySize(),
|
202
|
+
GroupedAggregateHashTable::InitialCapacity()));
|
203
|
+
}
|
204
|
+
unpartitioned_hts.back()->Append(ht);
|
205
|
+
}
|
206
|
+
|
172
207
|
} // namespace duckdb
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/execution/perfect_aggregate_hashtable.hpp"
|
2
|
-
|
2
|
+
|
3
3
|
#include "duckdb/common/row_operations/row_operations.hpp"
|
4
|
+
#include "duckdb/execution/expression_executor.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -93,6 +94,18 @@ static void ComputeGroupLocation(Vector &group, Value &min, uintptr_t *address_d
|
|
93
94
|
case PhysicalType::INT64:
|
94
95
|
ComputeGroupLocationTemplated<int64_t>(vdata, min, address_data, current_shift, count);
|
95
96
|
break;
|
97
|
+
case PhysicalType::UINT8:
|
98
|
+
ComputeGroupLocationTemplated<uint8_t>(vdata, min, address_data, current_shift, count);
|
99
|
+
break;
|
100
|
+
case PhysicalType::UINT16:
|
101
|
+
ComputeGroupLocationTemplated<uint16_t>(vdata, min, address_data, current_shift, count);
|
102
|
+
break;
|
103
|
+
case PhysicalType::UINT32:
|
104
|
+
ComputeGroupLocationTemplated<uint32_t>(vdata, min, address_data, current_shift, count);
|
105
|
+
break;
|
106
|
+
case PhysicalType::UINT64:
|
107
|
+
ComputeGroupLocationTemplated<uint64_t>(vdata, min, address_data, current_shift, count);
|
108
|
+
break;
|
96
109
|
default:
|
97
110
|
throw InternalException("Unsupported group type for perfect aggregate hash table");
|
98
111
|
}
|
@@ -123,7 +136,7 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
|
|
123
136
|
// after finding the group location we update the aggregates
|
124
137
|
idx_t payload_idx = 0;
|
125
138
|
auto &aggregates = layout.GetAggregates();
|
126
|
-
RowOperationsState row_state(aggregate_allocator
|
139
|
+
RowOperationsState row_state(aggregate_allocator);
|
127
140
|
for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) {
|
128
141
|
auto &aggregate = aggregates[aggr_idx];
|
129
142
|
auto input_count = (idx_t)aggregate.child_count;
|
@@ -152,7 +165,7 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
|
|
152
165
|
data_ptr_t source_ptr = other.data;
|
153
166
|
data_ptr_t target_ptr = data;
|
154
167
|
idx_t combine_count = 0;
|
155
|
-
RowOperationsState row_state(aggregate_allocator
|
168
|
+
RowOperationsState row_state(aggregate_allocator);
|
156
169
|
for (idx_t i = 0; i < total_groups; i++) {
|
157
170
|
auto has_entry_source = other.group_is_set[i];
|
158
171
|
// we only have any work to do if the source has an entry for this group
|
@@ -208,6 +221,18 @@ static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t re
|
|
208
221
|
case PhysicalType::INT64:
|
209
222
|
ReconstructGroupVectorTemplated<int64_t>(group_values, min, mask, shift, entry_count, result);
|
210
223
|
break;
|
224
|
+
case PhysicalType::UINT8:
|
225
|
+
ReconstructGroupVectorTemplated<uint8_t>(group_values, min, mask, shift, entry_count, result);
|
226
|
+
break;
|
227
|
+
case PhysicalType::UINT16:
|
228
|
+
ReconstructGroupVectorTemplated<uint16_t>(group_values, min, mask, shift, entry_count, result);
|
229
|
+
break;
|
230
|
+
case PhysicalType::UINT32:
|
231
|
+
ReconstructGroupVectorTemplated<uint32_t>(group_values, min, mask, shift, entry_count, result);
|
232
|
+
break;
|
233
|
+
case PhysicalType::UINT64:
|
234
|
+
ReconstructGroupVectorTemplated<uint64_t>(group_values, min, mask, shift, entry_count, result);
|
235
|
+
break;
|
211
236
|
default:
|
212
237
|
throw InternalException("Invalid type for perfect aggregate HT group");
|
213
238
|
}
|
@@ -243,7 +268,7 @@ void PerfectAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) {
|
|
243
268
|
}
|
244
269
|
// then construct the payloads
|
245
270
|
result.SetCardinality(entry_count);
|
246
|
-
RowOperationsState row_state(aggregate_allocator
|
271
|
+
RowOperationsState row_state(aggregate_allocator);
|
247
272
|
RowOperations::FinalizeStates(row_state, layout, addresses, result, grouping_columns);
|
248
273
|
}
|
249
274
|
|
@@ -264,7 +289,7 @@ void PerfectAggregateHashTable::Destroy() {
|
|
264
289
|
idx_t count = 0;
|
265
290
|
|
266
291
|
// iterate over all initialised slots of the hash table
|
267
|
-
RowOperationsState row_state(aggregate_allocator
|
292
|
+
RowOperationsState row_state(aggregate_allocator);
|
268
293
|
data_ptr_t payload_ptr = data;
|
269
294
|
for (idx_t i = 0; i < total_groups; i++) {
|
270
295
|
if (group_is_set[i]) {
|
@@ -5,12 +5,12 @@
|
|
5
5
|
#include "duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp"
|
6
6
|
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
7
7
|
#include "duckdb/execution/physical_plan_generator.hpp"
|
8
|
+
#include "duckdb/function/function_binder.hpp"
|
8
9
|
#include "duckdb/main/client_context.hpp"
|
9
10
|
#include "duckdb/parser/expression/comparison_expression.hpp"
|
10
11
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
11
|
-
#include "duckdb/planner/operator/logical_aggregate.hpp"
|
12
|
-
#include "duckdb/function/function_binder.hpp"
|
13
12
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
13
|
+
#include "duckdb/planner/operator/logical_aggregate.hpp"
|
14
14
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
@@ -23,6 +23,11 @@ static uint32_t RequiredBitsForValue(uint32_t n) {
|
|
23
23
|
return required_bits;
|
24
24
|
}
|
25
25
|
|
26
|
+
template <class T>
|
27
|
+
hugeint_t GetRangeHugeint(const BaseStatistics &nstats) {
|
28
|
+
return Hugeint::Convert(NumericStats::GetMax<T>(nstats)) - Hugeint::Convert(NumericStats::GetMin<T>(nstats));
|
29
|
+
}
|
30
|
+
|
26
31
|
static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate &op, vector<idx_t> &bits_per_group) {
|
27
32
|
if (op.grouping_sets.size() > 1 || !op.grouping_functions.empty()) {
|
28
33
|
return false;
|
@@ -40,6 +45,10 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
|
|
40
45
|
case PhysicalType::INT16:
|
41
46
|
case PhysicalType::INT32:
|
42
47
|
case PhysicalType::INT64:
|
48
|
+
case PhysicalType::UINT8:
|
49
|
+
case PhysicalType::UINT16:
|
50
|
+
case PhysicalType::UINT32:
|
51
|
+
case PhysicalType::UINT64:
|
43
52
|
break;
|
44
53
|
default:
|
45
54
|
// we only support simple integer types for perfect hashing
|
@@ -53,6 +62,8 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
|
|
53
62
|
switch (group_type.InternalType()) {
|
54
63
|
case PhysicalType::INT8:
|
55
64
|
case PhysicalType::INT16:
|
65
|
+
case PhysicalType::UINT8:
|
66
|
+
case PhysicalType::UINT16:
|
56
67
|
break;
|
57
68
|
default:
|
58
69
|
// type is too large and there are no stats: skip perfect hashing
|
@@ -68,33 +79,55 @@ static bool CanUsePerfectHashAggregate(ClientContext &context, LogicalAggregate
|
|
68
79
|
if (!NumericStats::HasMinMax(nstats)) {
|
69
80
|
return false;
|
70
81
|
}
|
82
|
+
|
83
|
+
if (NumericStats::Max(*stats) < NumericStats::Min(*stats)) {
|
84
|
+
// May result in underflow
|
85
|
+
return false;
|
86
|
+
}
|
87
|
+
|
71
88
|
// we have a min and a max value for the stats: use that to figure out how many bits we have
|
72
89
|
// we add two here, one for the NULL value, and one to make the computation one-indexed
|
73
90
|
// (e.g. if min and max are the same, we still need one entry in total)
|
74
|
-
|
91
|
+
hugeint_t range_h;
|
75
92
|
switch (group_type.InternalType()) {
|
76
93
|
case PhysicalType::INT8:
|
77
|
-
|
94
|
+
range_h = GetRangeHugeint<int8_t>(nstats);
|
78
95
|
break;
|
79
96
|
case PhysicalType::INT16:
|
80
|
-
|
97
|
+
range_h = GetRangeHugeint<int16_t>(nstats);
|
81
98
|
break;
|
82
99
|
case PhysicalType::INT32:
|
83
|
-
|
100
|
+
range_h = GetRangeHugeint<int32_t>(nstats);
|
84
101
|
break;
|
85
102
|
case PhysicalType::INT64:
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
103
|
+
range_h = GetRangeHugeint<int64_t>(nstats);
|
104
|
+
break;
|
105
|
+
case PhysicalType::UINT8:
|
106
|
+
range_h = GetRangeHugeint<uint8_t>(nstats);
|
107
|
+
break;
|
108
|
+
case PhysicalType::UINT16:
|
109
|
+
range_h = GetRangeHugeint<uint16_t>(nstats);
|
110
|
+
break;
|
111
|
+
case PhysicalType::UINT32:
|
112
|
+
range_h = GetRangeHugeint<uint32_t>(nstats);
|
113
|
+
break;
|
114
|
+
case PhysicalType::UINT64:
|
115
|
+
range_h = GetRangeHugeint<uint64_t>(nstats);
|
90
116
|
break;
|
91
117
|
default:
|
92
118
|
throw InternalException("Unsupported type for perfect hash (should be caught before)");
|
93
119
|
}
|
120
|
+
|
121
|
+
uint64_t range;
|
122
|
+
if (!Hugeint::TryCast(range_h, range)) {
|
123
|
+
return false;
|
124
|
+
}
|
125
|
+
|
94
126
|
// bail out on any range bigger than 2^32
|
95
127
|
if (range >= NumericLimits<int32_t>::Maximum()) {
|
96
128
|
return false;
|
97
129
|
}
|
130
|
+
|
98
131
|
range += 2;
|
99
132
|
// figure out how many bits we need
|
100
133
|
idx_t required_bits = RequiredBitsForValue(range);
|