duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/value.cpp +11 -6
- package/src/duckdb/src/common/types.cpp +9 -656
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/arrow.cpp +19 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/columns.test.ts +24 -1
- package/test/test_all_types.test.ts +234 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,5 +1,8 @@
|
|
1
1
|
#include "duckdb/execution/radix_partitioned_hashtable.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
4
|
+
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
5
|
+
#include "duckdb/execution/executor.hpp"
|
3
6
|
#include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
|
4
7
|
#include "duckdb/parallel/event.hpp"
|
5
8
|
#include "duckdb/parallel/task_scheduler.hpp"
|
@@ -59,8 +62,8 @@ class RadixHTGlobalState : public GlobalSinkState {
|
|
59
62
|
public:
|
60
63
|
explicit RadixHTGlobalState(ClientContext &context)
|
61
64
|
: is_empty(true), multi_scan(true), partitioned(false),
|
62
|
-
partition_info(
|
63
|
-
MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
|
65
|
+
partition_info(make_uniq<RadixPartitionInfo>(
|
66
|
+
MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads()))) {
|
64
67
|
}
|
65
68
|
|
66
69
|
vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
|
@@ -78,8 +81,16 @@ public:
|
|
78
81
|
bool is_finalized = false;
|
79
82
|
bool is_partitioned = false;
|
80
83
|
|
81
|
-
RadixPartitionInfo partition_info;
|
84
|
+
unique_ptr<RadixPartitionInfo> partition_info;
|
82
85
|
AggregateHTAppendState append_state;
|
86
|
+
|
87
|
+
//! Repartitioned HT info
|
88
|
+
bool repartitioned = false;
|
89
|
+
idx_t repartition_tasks_per_partition;
|
90
|
+
vector<vector<unique_ptr<PartitionableHashTable>>> repartition_tasks;
|
91
|
+
unique_array<atomic<idx_t>> repartition_tasks_assigned;
|
92
|
+
unique_array<atomic<idx_t>> repartition_tasks_done;
|
93
|
+
unique_array<atomic<bool>> finalize_assigned;
|
83
94
|
};
|
84
95
|
|
85
96
|
class RadixHTLocalState : public LocalSinkState {
|
@@ -146,9 +157,9 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk
|
|
146
157
|
gstate.is_empty = gstate.is_empty && group_chunk.size() == 0;
|
147
158
|
if (gstate.finalized_hts.empty()) {
|
148
159
|
// Create a finalized ht in the global state, that we can populate
|
149
|
-
gstate.finalized_hts.push_back(
|
150
|
-
|
151
|
-
|
160
|
+
gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
|
161
|
+
context.client, BufferAllocator::Get(context.client), group_types, op.payload_types, op.bindings,
|
162
|
+
HtEntryType::HT_WIDTH_64));
|
152
163
|
}
|
153
164
|
D_ASSERT(gstate.finalized_hts.size() == 1);
|
154
165
|
D_ASSERT(gstate.finalized_hts[0]);
|
@@ -163,12 +174,15 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk
|
|
163
174
|
|
164
175
|
if (!llstate.ht) {
|
165
176
|
llstate.ht =
|
166
|
-
make_uniq<PartitionableHashTable>(context.client,
|
167
|
-
group_types, op.payload_types, op.bindings);
|
177
|
+
make_uniq<PartitionableHashTable>(context.client, BufferAllocator::Get(context.client),
|
178
|
+
*gstate.partition_info, group_types, op.payload_types, op.bindings);
|
179
|
+
if (context.client.config.force_external) {
|
180
|
+
gstate.partitioned = true;
|
181
|
+
}
|
168
182
|
}
|
169
183
|
|
170
184
|
llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
|
171
|
-
gstate.partitioned && gstate.partition_info
|
185
|
+
gstate.partitioned && gstate.partition_info->n_partitions > 1, filter);
|
172
186
|
if (llstate.total_groups >= radix_limit) {
|
173
187
|
gstate.partitioned = true;
|
174
188
|
}
|
@@ -192,8 +206,8 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
|
|
192
206
|
return; // no data
|
193
207
|
}
|
194
208
|
|
195
|
-
if (!llstate.ht->IsPartitioned() && gstate.partition_info
|
196
|
-
llstate.ht->Partition();
|
209
|
+
if (!llstate.ht->IsPartitioned() && gstate.partition_info->n_partitions > 1 && gstate.partitioned) {
|
210
|
+
llstate.ht->Partition(true);
|
197
211
|
}
|
198
212
|
|
199
213
|
// we will never add new values to these HTs so we can drop the first part of the HT
|
@@ -207,13 +221,23 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
|
|
207
221
|
gstate.intermediate_hts.push_back(std::move(llstate.ht));
|
208
222
|
}
|
209
223
|
|
224
|
+
void RadixPartitionedHashTable::InitializeFinalizedHTs(ClientContext &context, GlobalSinkState &gstate_p) const {
|
225
|
+
auto &gstate = gstate_p.Cast<RadixHTGlobalState>();
|
226
|
+
auto &allocator = BufferAllocator::Get(context);
|
227
|
+
gstate.finalized_hts.resize(gstate.partition_info->n_partitions);
|
228
|
+
for (idx_t r = 0; r < gstate.partition_info->n_partitions; r++) {
|
229
|
+
gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
|
230
|
+
context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
210
234
|
bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState &gstate_p) const {
|
211
235
|
auto &gstate = gstate_p.Cast<RadixHTGlobalState>();
|
212
236
|
D_ASSERT(!gstate.is_finalized);
|
213
237
|
gstate.is_finalized = true;
|
214
238
|
|
215
239
|
// special case if we have non-combinable aggregates
|
216
|
-
// we have already
|
240
|
+
// we have already aggregated into a global shared HT that does not require any additional finalization steps
|
217
241
|
if (ForceSingleHT(gstate)) {
|
218
242
|
D_ASSERT(gstate.finalized_hts.size() <= 1);
|
219
243
|
D_ASSERT(gstate.finalized_hts.empty() || gstate.finalized_hts[0]);
|
@@ -221,31 +245,17 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
|
|
221
245
|
}
|
222
246
|
|
223
247
|
// we can have two cases now, non-partitioned for few groups and radix-partitioned for very many groups.
|
224
|
-
|
225
|
-
|
226
|
-
bool any_partitioned = false;
|
227
|
-
for (auto &pht : gstate.intermediate_hts) {
|
228
|
-
if (pht->IsPartitioned()) {
|
229
|
-
any_partitioned = true;
|
230
|
-
break;
|
231
|
-
}
|
232
|
-
}
|
233
|
-
|
234
|
-
auto &allocator = Allocator::Get(context);
|
235
|
-
if (any_partitioned) {
|
248
|
+
auto &allocator = BufferAllocator::Get(context);
|
249
|
+
if (AnyPartitioned(gstate_p)) {
|
236
250
|
// if one is partitioned, all have to be
|
237
251
|
// this should mostly have already happened in Combine, but if not we do it here
|
238
252
|
for (auto &pht : gstate.intermediate_hts) {
|
239
253
|
if (!pht->IsPartitioned()) {
|
240
|
-
pht->Partition();
|
254
|
+
pht->Partition(true);
|
241
255
|
}
|
242
256
|
}
|
243
257
|
// schedule additional tasks to combine the partial HTs
|
244
|
-
|
245
|
-
for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
|
246
|
-
gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
|
247
|
-
context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
|
248
|
-
}
|
258
|
+
InitializeFinalizedHTs(context, gstate_p);
|
249
259
|
gstate.is_partitioned = true;
|
250
260
|
return true;
|
251
261
|
} else { // in the non-partitioned case we immediately combine all the unpartitioned hts created by the threads.
|
@@ -269,7 +279,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
|
|
269
279
|
}
|
270
280
|
}
|
271
281
|
|
272
|
-
// this task is run in multiple threads and combines the radix-partitioned hash tables into a single
|
282
|
+
// this task is run in multiple threads and combines the radix-partitioned hash tables into a single one and then
|
273
283
|
// folds them into the global ht finally.
|
274
284
|
class RadixAggregateFinalizeTask : public ExecutorTask {
|
275
285
|
public:
|
@@ -279,10 +289,21 @@ public:
|
|
279
289
|
}
|
280
290
|
|
281
291
|
static void FinalizeHT(RadixHTGlobalState &gstate, idx_t radix) {
|
282
|
-
D_ASSERT(gstate.partition_info
|
292
|
+
D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
|
283
293
|
D_ASSERT(gstate.finalized_hts[radix]);
|
284
|
-
|
285
|
-
|
294
|
+
|
295
|
+
idx_t pht_idx_from = 0;
|
296
|
+
idx_t pht_idx_to = gstate.intermediate_hts.size();
|
297
|
+
if (gstate.repartitioned) {
|
298
|
+
const auto num_partitions_before = gstate.repartition_tasks.size();
|
299
|
+
const auto multiplier = gstate.partition_info->n_partitions / num_partitions_before;
|
300
|
+
const auto radix_before = radix / multiplier;
|
301
|
+
pht_idx_from = radix_before * gstate.repartition_tasks_per_partition;
|
302
|
+
pht_idx_to = pht_idx_from + gstate.repartition_tasks_per_partition;
|
303
|
+
}
|
304
|
+
|
305
|
+
for (idx_t i = pht_idx_from; i < pht_idx_to; i++) {
|
306
|
+
for (auto &ht : gstate.intermediate_hts[i]->GetPartition(radix)) {
|
286
307
|
gstate.finalized_hts[radix]->Combine(*ht);
|
287
308
|
ht.reset();
|
288
309
|
}
|
@@ -302,22 +323,247 @@ private:
|
|
302
323
|
idx_t radix;
|
303
324
|
};
|
304
325
|
|
326
|
+
class RadixAggregateRepartitionTask : public ExecutorTask {
|
327
|
+
public:
|
328
|
+
RadixAggregateRepartitionTask(Executor &executor, shared_ptr<Event> event_p, RadixHTGlobalState &state_p,
|
329
|
+
idx_t num_partitions_before_p)
|
330
|
+
: ExecutorTask(executor), event(std::move(event_p)), state(state_p),
|
331
|
+
num_partitions_before(num_partitions_before_p) {
|
332
|
+
}
|
333
|
+
|
334
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
335
|
+
const auto multiplier = state.partition_info->n_partitions / num_partitions_before;
|
336
|
+
|
337
|
+
idx_t repartition_radix = 0;
|
338
|
+
idx_t finalize_radix = 0;
|
339
|
+
while (repartition_radix < num_partitions_before && finalize_radix < state.partition_info->n_partitions) {
|
340
|
+
// Loop over original partitions until we find one that we can repartition
|
341
|
+
for (; repartition_radix < num_partitions_before; repartition_radix++) {
|
342
|
+
auto task_idx = state.repartition_tasks_assigned[repartition_radix]++;
|
343
|
+
if (task_idx >= state.repartition_tasks_per_partition) {
|
344
|
+
continue;
|
345
|
+
}
|
346
|
+
auto &ht = state.repartition_tasks[repartition_radix][task_idx];
|
347
|
+
ht->Partition(true);
|
348
|
+
state.intermediate_hts[repartition_radix * state.repartition_tasks_per_partition + task_idx] =
|
349
|
+
std::move(ht);
|
350
|
+
state.repartition_tasks_done[repartition_radix]++;
|
351
|
+
break;
|
352
|
+
}
|
353
|
+
|
354
|
+
// Loop over repartitioned partitions
|
355
|
+
for (; finalize_radix < state.partition_info->n_partitions; finalize_radix++) {
|
356
|
+
const auto original_radix = finalize_radix / multiplier;
|
357
|
+
if (state.repartition_tasks_done[original_radix] != state.repartition_tasks_per_partition) {
|
358
|
+
break; // Needs more repartitioning
|
359
|
+
}
|
360
|
+
|
361
|
+
if (state.finalize_assigned[finalize_radix]) {
|
362
|
+
continue; // Already assigned
|
363
|
+
}
|
364
|
+
|
365
|
+
{
|
366
|
+
lock_guard<mutex> guard(state.lock);
|
367
|
+
if (state.finalize_assigned[finalize_radix]) {
|
368
|
+
// LCOV_EXCL_START
|
369
|
+
continue; // Check again with lock, but already assigned
|
370
|
+
// LCOV_EXCL_STOP
|
371
|
+
}
|
372
|
+
state.finalize_assigned[finalize_radix] = true;
|
373
|
+
}
|
374
|
+
|
375
|
+
// We can finalize!
|
376
|
+
RadixAggregateFinalizeTask::FinalizeHT(state, finalize_radix);
|
377
|
+
}
|
378
|
+
}
|
379
|
+
event->FinishTask();
|
380
|
+
return TaskExecutionResult::TASK_FINISHED;
|
381
|
+
}
|
382
|
+
|
383
|
+
private:
|
384
|
+
shared_ptr<Event> event;
|
385
|
+
RadixHTGlobalState &state;
|
386
|
+
const idx_t num_partitions_before;
|
387
|
+
};
|
388
|
+
|
305
389
|
void RadixPartitionedHashTable::ScheduleTasks(Executor &executor, const shared_ptr<Event> &event,
|
306
390
|
GlobalSinkState &state, vector<shared_ptr<Task>> &tasks) const {
|
307
391
|
auto &gstate = state.Cast<RadixHTGlobalState>();
|
308
392
|
if (!gstate.is_partitioned) {
|
309
393
|
return;
|
310
394
|
}
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
395
|
+
|
396
|
+
idx_t repartition_radix_bits;
|
397
|
+
idx_t concurrent_repartitions;
|
398
|
+
idx_t tasks_per_partition;
|
399
|
+
GetRepartitionInfo(executor.context, state, repartition_radix_bits, concurrent_repartitions, tasks_per_partition);
|
400
|
+
if (repartition_radix_bits == gstate.partition_info->radix_bits) {
|
401
|
+
// No repartitioning necessary
|
402
|
+
for (idx_t r = 0; r < gstate.partition_info->n_partitions; r++) {
|
403
|
+
D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
|
404
|
+
D_ASSERT(gstate.finalized_hts[r]);
|
405
|
+
tasks.push_back(make_uniq<RadixAggregateFinalizeTask>(executor, event, gstate, r));
|
406
|
+
}
|
407
|
+
} else {
|
408
|
+
// Schedule repartition / finalize tasks
|
409
|
+
ScheduleRepartitionTasks(executor, event, state, tasks, repartition_radix_bits, concurrent_repartitions,
|
410
|
+
tasks_per_partition);
|
411
|
+
}
|
412
|
+
}
|
413
|
+
|
414
|
+
void RadixPartitionedHashTable::ScheduleRepartitionTasks(Executor &executor, const shared_ptr<Event> &event,
|
415
|
+
GlobalSinkState &state, vector<shared_ptr<Task>> &tasks,
|
416
|
+
const idx_t repartition_radix_bits,
|
417
|
+
const idx_t concurrent_repartitions,
|
418
|
+
const idx_t tasks_per_partition) const {
|
419
|
+
auto &gstate = state.Cast<RadixHTGlobalState>();
|
420
|
+
D_ASSERT(repartition_radix_bits > gstate.partition_info->radix_bits);
|
421
|
+
const auto num_partitions_before = gstate.partition_info->n_partitions;
|
422
|
+
const auto multiplier = RadixPartitioning::NumberOfPartitions(repartition_radix_bits) / num_partitions_before;
|
423
|
+
|
424
|
+
// Inititialize gstate
|
425
|
+
auto new_partition_info =
|
426
|
+
make_uniq<RadixPartitionInfo>(RadixPartitioning::NumberOfPartitions(repartition_radix_bits));
|
427
|
+
gstate.repartitioned = true;
|
428
|
+
gstate.repartition_tasks_per_partition = tasks_per_partition;
|
429
|
+
gstate.repartition_tasks.resize(num_partitions_before);
|
430
|
+
gstate.repartition_tasks_assigned = make_uniq_array<atomic<idx_t>>(num_partitions_before);
|
431
|
+
gstate.repartition_tasks_done = make_uniq_array<atomic<idx_t>>(num_partitions_before);
|
432
|
+
gstate.finalize_assigned = make_uniq_array<atomic<bool>>(new_partition_info->n_partitions);
|
433
|
+
for (idx_t partition_idx = 0; partition_idx < num_partitions_before; partition_idx++) {
|
434
|
+
gstate.repartition_tasks_assigned[partition_idx] = 0;
|
435
|
+
gstate.repartition_tasks_done[partition_idx] = 0;
|
436
|
+
|
437
|
+
// Grab intermediate data from gstate
|
438
|
+
HashTableList partition_list;
|
439
|
+
for (auto &pht : gstate.intermediate_hts) {
|
440
|
+
for (auto &ht : pht->GetPartition(partition_idx)) {
|
441
|
+
partition_list.push_back(std::move(ht));
|
442
|
+
}
|
443
|
+
}
|
444
|
+
|
445
|
+
// Spread the data across the tasks
|
446
|
+
const idx_t hts_per_task = (partition_list.size() + tasks_per_partition - 1) / tasks_per_partition;
|
447
|
+
idx_t ht_idx = 0;
|
448
|
+
for (idx_t task_idx = 0; task_idx < tasks_per_partition; task_idx++) {
|
449
|
+
auto task_ht =
|
450
|
+
make_uniq<PartitionableHashTable>(executor.context, BufferAllocator::Get(executor.context),
|
451
|
+
*new_partition_info, group_types, op.payload_types, op.bindings);
|
452
|
+
auto ht_idx_to = MinValue<idx_t>(ht_idx + hts_per_task, partition_list.size());
|
453
|
+
for (; ht_idx < ht_idx_to; ht_idx++) {
|
454
|
+
auto &ht = partition_list[ht_idx];
|
455
|
+
task_ht->Append(*ht);
|
456
|
+
ht.reset();
|
457
|
+
}
|
458
|
+
gstate.repartition_tasks[partition_idx].push_back(std::move(task_ht));
|
459
|
+
}
|
460
|
+
|
461
|
+
for (idx_t i = 0; i < multiplier; i++) {
|
462
|
+
gstate.finalize_assigned[partition_idx * multiplier + i] = false;
|
463
|
+
}
|
464
|
+
}
|
465
|
+
|
466
|
+
// Schedule tasks equal to number of therads
|
467
|
+
const idx_t num_threads = TaskScheduler::GetScheduler(executor.context).NumberOfThreads();
|
468
|
+
for (idx_t i = 0; i < num_threads; i++) {
|
469
|
+
tasks.emplace_back(make_shared<RadixAggregateRepartitionTask>(executor, event, gstate, num_partitions_before));
|
470
|
+
}
|
471
|
+
|
472
|
+
gstate.intermediate_hts.clear();
|
473
|
+
gstate.intermediate_hts.resize(num_partitions_before * tasks_per_partition);
|
474
|
+
|
475
|
+
gstate.partition_info = std::move(new_partition_info);
|
476
|
+
InitializeFinalizedHTs(executor.context, state);
|
477
|
+
}
|
478
|
+
|
479
|
+
bool RadixPartitionedHashTable::ForceSingleHT(GlobalSinkState &state) {
|
480
|
+
auto &gstate = state.Cast<RadixHTGlobalState>();
|
481
|
+
return gstate.partition_info->n_partitions < 2;
|
482
|
+
}
|
483
|
+
|
484
|
+
bool RadixPartitionedHashTable::AnyPartitioned(GlobalSinkState &state) {
|
485
|
+
auto &gstate = state.Cast<RadixHTGlobalState>();
|
486
|
+
for (auto &pht : gstate.intermediate_hts) {
|
487
|
+
if (pht->IsPartitioned()) {
|
488
|
+
return true;
|
489
|
+
}
|
315
490
|
}
|
491
|
+
return false;
|
316
492
|
}
|
317
493
|
|
318
|
-
|
494
|
+
void RadixPartitionedHashTable::GetRepartitionInfo(ClientContext &context, GlobalSinkState &state,
|
495
|
+
idx_t &repartition_radix_bits, idx_t &concurrent_repartitions,
|
496
|
+
idx_t &tasks_per_partition) {
|
319
497
|
auto &gstate = state.Cast<RadixHTGlobalState>();
|
320
|
-
|
498
|
+
const auto num_partitions = gstate.partition_info->n_partitions;
|
499
|
+
const auto radix_bits = gstate.partition_info->radix_bits;
|
500
|
+
D_ASSERT(IsPowerOfTwo(num_partitions));
|
501
|
+
|
502
|
+
vector<idx_t> partition_counts(num_partitions, 0);
|
503
|
+
vector<idx_t> partition_sizes(num_partitions, 0);
|
504
|
+
for (const auto &ht : gstate.intermediate_hts) {
|
505
|
+
for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
|
506
|
+
partition_counts[partition_idx] += ht->GetPartitionCount(partition_idx);
|
507
|
+
partition_sizes[partition_idx] += ht->GetPartitionSize(partition_idx);
|
508
|
+
}
|
509
|
+
}
|
510
|
+
|
511
|
+
idx_t total_size = 0;
|
512
|
+
idx_t max_partition_idx = 0;
|
513
|
+
idx_t max_partition_size = 0;
|
514
|
+
for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
|
515
|
+
const auto &partition_count = partition_counts[partition_idx];
|
516
|
+
const auto &partition_size = partition_sizes[partition_idx];
|
517
|
+
auto partition_ht_size =
|
518
|
+
partition_size + GroupedAggregateHashTable::FirstPartSize(partition_count, HtEntryType::HT_WIDTH_64);
|
519
|
+
if (partition_ht_size > max_partition_size) {
|
520
|
+
max_partition_idx = partition_idx;
|
521
|
+
max_partition_size = partition_ht_size;
|
522
|
+
}
|
523
|
+
total_size += partition_ht_size;
|
524
|
+
}
|
525
|
+
|
526
|
+
// Switch to out-of-core finalize at ~60%
|
527
|
+
const auto max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
|
528
|
+
const idx_t n_threads = PreviousPowerOfTwo(TaskScheduler::GetScheduler(context).NumberOfThreads());
|
529
|
+
D_ASSERT(IsPowerOfTwo(n_threads));
|
530
|
+
if (!context.config.force_external && total_size < max_ht_size) {
|
531
|
+
// In-memory finalize
|
532
|
+
if (num_partitions >= n_threads) { // Can already keep all threads busy
|
533
|
+
repartition_radix_bits = radix_bits;
|
534
|
+
tasks_per_partition = 1;
|
535
|
+
} else { // Repartition to keep all threads busy
|
536
|
+
// Can't have coverage because RadixHTGlobalState::MAX_RADIX_PARTITIONS > threads on github actions
|
537
|
+
// LCOV_EXCL_START
|
538
|
+
repartition_radix_bits = RadixPartitioning::RadixBits(NextPowerOfTwo(n_threads));
|
539
|
+
tasks_per_partition = n_threads / num_partitions;
|
540
|
+
// LCOV_EXCL_STOP
|
541
|
+
}
|
542
|
+
concurrent_repartitions = num_partitions;
|
543
|
+
return;
|
544
|
+
}
|
545
|
+
|
546
|
+
// Out-of-core finalize
|
547
|
+
const auto partition_count = partition_counts[max_partition_idx];
|
548
|
+
const auto partition_size = partition_sizes[max_partition_idx];
|
549
|
+
|
550
|
+
const auto max_added_bits = RadixPartitioning::MAX_RADIX_BITS - radix_bits;
|
551
|
+
idx_t added_bits;
|
552
|
+
for (added_bits = 1; added_bits < max_added_bits; added_bits++) {
|
553
|
+
double partition_multiplier = RadixPartitioning::NumberOfPartitions(added_bits);
|
554
|
+
|
555
|
+
auto new_estimated_count = double(partition_count) / partition_multiplier;
|
556
|
+
auto new_estimated_size = double(partition_size) / partition_multiplier;
|
557
|
+
auto new_estimated_ht_size = new_estimated_size + GroupedAggregateHashTable::FirstPartSize(
|
558
|
+
new_estimated_count, HtEntryType::HT_WIDTH_64);
|
559
|
+
|
560
|
+
if (new_estimated_ht_size <= max_ht_size / n_threads) {
|
561
|
+
break; // Max HT size is safe
|
562
|
+
}
|
563
|
+
}
|
564
|
+
repartition_radix_bits = radix_bits + added_bits;
|
565
|
+
concurrent_repartitions = MinValue<idx_t>(MaxValue<idx_t>(1, max_ht_size / max_partition_size), n_threads);
|
566
|
+
tasks_per_partition = NextPowerOfTwo(n_threads / concurrent_repartitions);
|
321
567
|
}
|
322
568
|
|
323
569
|
//===--------------------------------------------------------------------===//
|
@@ -342,7 +588,7 @@ public:
|
|
342
588
|
class RadixHTLocalSourceState : public LocalSourceState {
|
343
589
|
public:
|
344
590
|
explicit RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &ht) {
|
345
|
-
auto &allocator =
|
591
|
+
auto &allocator = BufferAllocator::Get(context.client);
|
346
592
|
auto scan_chunk_types = ht.group_types;
|
347
593
|
for (auto &aggr_type : ht.op.aggregate_return_types) {
|
348
594
|
scan_chunk_types.push_back(aggr_type);
|
@@ -361,7 +607,7 @@ public:
|
|
361
607
|
};
|
362
608
|
|
363
609
|
unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
|
364
|
-
return make_uniq<RadixHTGlobalSourceState>(
|
610
|
+
return make_uniq<RadixHTGlobalSourceState>(BufferAllocator::Get(context), *this);
|
365
611
|
}
|
366
612
|
|
367
613
|
unique_ptr<LocalSourceState> RadixPartitionedHashTable::GetLocalSourceState(ExecutionContext &context) const {
|
@@ -401,13 +647,14 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
|
|
401
647
|
chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
|
402
648
|
ConstantVector::SetNull(chunk.data[null_group], true);
|
403
649
|
}
|
650
|
+
ArenaAllocator allocator(BufferAllocator::Get(context.client));
|
404
651
|
for (idx_t i = 0; i < op.aggregates.size(); i++) {
|
405
652
|
D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
|
406
653
|
auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
|
407
654
|
auto aggr_state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
|
408
655
|
aggr.function.initialize(aggr_state.get());
|
409
656
|
|
410
|
-
AggregateInputData aggr_input_data(aggr.bind_info.get(),
|
657
|
+
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
|
411
658
|
Vector state_vector(Value::POINTER(CastPointerToValue(aggr_state.get())));
|
412
659
|
aggr.function.finalize(state_vector, aggr_input_data, chunk.data[null_groups.size() + i], 1, 0);
|
413
660
|
if (aggr.function.destructor) {
|