duckdb 0.8.2-dev11.0 → 0.8.2-dev1182.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +14 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/lib/duckdb.d.ts +59 -0
- package/lib/duckdb.js +21 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -7
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +191 -19
- package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -5
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -10
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +42 -5
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/file_system.cpp +15 -0
- package/src/duckdb/src/common/local_file_system.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
- package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/vector.cpp +15 -14
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +413 -282
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_operator.cpp +17 -14
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/function.cpp +2 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/config.cpp +2 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/settings/settings.cpp +40 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
- package/src/duckdb/src/parser/query_node.cpp +18 -1
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb_node.hpp +1 -0
- package/src/statement.cpp +103 -4
- package/test/columns.test.ts +243 -0
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,5 +1,8 @@
|
|
1
1
|
#include "duckdb/execution/radix_partitioned_hashtable.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
4
|
+
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
5
|
+
#include "duckdb/execution/executor.hpp"
|
3
6
|
#include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
|
4
7
|
#include "duckdb/parallel/event.hpp"
|
5
8
|
#include "duckdb/parallel/task_scheduler.hpp"
|
@@ -59,8 +62,8 @@ class RadixHTGlobalState : public GlobalSinkState {
|
|
59
62
|
public:
|
60
63
|
explicit RadixHTGlobalState(ClientContext &context)
|
61
64
|
: is_empty(true), multi_scan(true), partitioned(false),
|
62
|
-
partition_info(
|
63
|
-
MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
|
65
|
+
partition_info(make_uniq<RadixPartitionInfo>(
|
66
|
+
MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads()))) {
|
64
67
|
}
|
65
68
|
|
66
69
|
vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
|
@@ -78,8 +81,16 @@ public:
|
|
78
81
|
bool is_finalized = false;
|
79
82
|
bool is_partitioned = false;
|
80
83
|
|
81
|
-
RadixPartitionInfo partition_info;
|
84
|
+
unique_ptr<RadixPartitionInfo> partition_info;
|
82
85
|
AggregateHTAppendState append_state;
|
86
|
+
|
87
|
+
//! Repartitioned HT info
|
88
|
+
bool repartitioned = false;
|
89
|
+
idx_t repartition_tasks_per_partition;
|
90
|
+
vector<vector<unique_ptr<PartitionableHashTable>>> repartition_tasks;
|
91
|
+
unique_array<atomic<idx_t>> repartition_tasks_assigned;
|
92
|
+
unique_array<atomic<idx_t>> repartition_tasks_done;
|
93
|
+
unique_array<atomic<bool>> finalize_assigned;
|
83
94
|
};
|
84
95
|
|
85
96
|
class RadixHTLocalState : public LocalSinkState {
|
@@ -146,9 +157,9 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk
|
|
146
157
|
gstate.is_empty = gstate.is_empty && group_chunk.size() == 0;
|
147
158
|
if (gstate.finalized_hts.empty()) {
|
148
159
|
// Create a finalized ht in the global state, that we can populate
|
149
|
-
gstate.finalized_hts.push_back(
|
150
|
-
|
151
|
-
|
160
|
+
gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
|
161
|
+
context.client, BufferAllocator::Get(context.client), group_types, op.payload_types, op.bindings,
|
162
|
+
HtEntryType::HT_WIDTH_64));
|
152
163
|
}
|
153
164
|
D_ASSERT(gstate.finalized_hts.size() == 1);
|
154
165
|
D_ASSERT(gstate.finalized_hts[0]);
|
@@ -163,12 +174,15 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk
|
|
163
174
|
|
164
175
|
if (!llstate.ht) {
|
165
176
|
llstate.ht =
|
166
|
-
make_uniq<PartitionableHashTable>(context.client,
|
167
|
-
group_types, op.payload_types, op.bindings);
|
177
|
+
make_uniq<PartitionableHashTable>(context.client, BufferAllocator::Get(context.client),
|
178
|
+
*gstate.partition_info, group_types, op.payload_types, op.bindings);
|
179
|
+
if (context.client.config.force_external) {
|
180
|
+
gstate.partitioned = true;
|
181
|
+
}
|
168
182
|
}
|
169
183
|
|
170
184
|
llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
|
171
|
-
gstate.partitioned && gstate.partition_info
|
185
|
+
gstate.partitioned && gstate.partition_info->n_partitions > 1, filter);
|
172
186
|
if (llstate.total_groups >= radix_limit) {
|
173
187
|
gstate.partitioned = true;
|
174
188
|
}
|
@@ -192,8 +206,8 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
|
|
192
206
|
return; // no data
|
193
207
|
}
|
194
208
|
|
195
|
-
if (!llstate.ht->IsPartitioned() && gstate.partition_info
|
196
|
-
llstate.ht->Partition();
|
209
|
+
if (!llstate.ht->IsPartitioned() && gstate.partition_info->n_partitions > 1 && gstate.partitioned) {
|
210
|
+
llstate.ht->Partition(true);
|
197
211
|
}
|
198
212
|
|
199
213
|
// we will never add new values to these HTs so we can drop the first part of the HT
|
@@ -207,13 +221,23 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
|
|
207
221
|
gstate.intermediate_hts.push_back(std::move(llstate.ht));
|
208
222
|
}
|
209
223
|
|
224
|
+
void RadixPartitionedHashTable::InitializeFinalizedHTs(ClientContext &context, GlobalSinkState &gstate_p) const {
|
225
|
+
auto &gstate = gstate_p.Cast<RadixHTGlobalState>();
|
226
|
+
auto &allocator = BufferAllocator::Get(context);
|
227
|
+
gstate.finalized_hts.resize(gstate.partition_info->n_partitions);
|
228
|
+
for (idx_t r = 0; r < gstate.partition_info->n_partitions; r++) {
|
229
|
+
gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
|
230
|
+
context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
210
234
|
bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState &gstate_p) const {
|
211
235
|
auto &gstate = gstate_p.Cast<RadixHTGlobalState>();
|
212
236
|
D_ASSERT(!gstate.is_finalized);
|
213
237
|
gstate.is_finalized = true;
|
214
238
|
|
215
239
|
// special case if we have non-combinable aggregates
|
216
|
-
// we have already
|
240
|
+
// we have already aggregated into a global shared HT that does not require any additional finalization steps
|
217
241
|
if (ForceSingleHT(gstate)) {
|
218
242
|
D_ASSERT(gstate.finalized_hts.size() <= 1);
|
219
243
|
D_ASSERT(gstate.finalized_hts.empty() || gstate.finalized_hts[0]);
|
@@ -221,31 +245,17 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
|
|
221
245
|
}
|
222
246
|
|
223
247
|
// we can have two cases now, non-partitioned for few groups and radix-partitioned for very many groups.
|
224
|
-
|
225
|
-
|
226
|
-
bool any_partitioned = false;
|
227
|
-
for (auto &pht : gstate.intermediate_hts) {
|
228
|
-
if (pht->IsPartitioned()) {
|
229
|
-
any_partitioned = true;
|
230
|
-
break;
|
231
|
-
}
|
232
|
-
}
|
233
|
-
|
234
|
-
auto &allocator = Allocator::Get(context);
|
235
|
-
if (any_partitioned) {
|
248
|
+
auto &allocator = BufferAllocator::Get(context);
|
249
|
+
if (AnyPartitioned(gstate_p)) {
|
236
250
|
// if one is partitioned, all have to be
|
237
251
|
// this should mostly have already happened in Combine, but if not we do it here
|
238
252
|
for (auto &pht : gstate.intermediate_hts) {
|
239
253
|
if (!pht->IsPartitioned()) {
|
240
|
-
pht->Partition();
|
254
|
+
pht->Partition(true);
|
241
255
|
}
|
242
256
|
}
|
243
257
|
// schedule additional tasks to combine the partial HTs
|
244
|
-
|
245
|
-
for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
|
246
|
-
gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
|
247
|
-
context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
|
248
|
-
}
|
258
|
+
InitializeFinalizedHTs(context, gstate_p);
|
249
259
|
gstate.is_partitioned = true;
|
250
260
|
return true;
|
251
261
|
} else { // in the non-partitioned case we immediately combine all the unpartitioned hts created by the threads.
|
@@ -269,7 +279,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
|
|
269
279
|
}
|
270
280
|
}
|
271
281
|
|
272
|
-
// this task is run in multiple threads and combines the radix-partitioned hash tables into a single
|
282
|
+
// this task is run in multiple threads and combines the radix-partitioned hash tables into a single one and then
|
273
283
|
// folds them into the global ht finally.
|
274
284
|
class RadixAggregateFinalizeTask : public ExecutorTask {
|
275
285
|
public:
|
@@ -279,10 +289,21 @@ public:
|
|
279
289
|
}
|
280
290
|
|
281
291
|
static void FinalizeHT(RadixHTGlobalState &gstate, idx_t radix) {
|
282
|
-
D_ASSERT(gstate.partition_info
|
292
|
+
D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
|
283
293
|
D_ASSERT(gstate.finalized_hts[radix]);
|
284
|
-
|
285
|
-
|
294
|
+
|
295
|
+
idx_t pht_idx_from = 0;
|
296
|
+
idx_t pht_idx_to = gstate.intermediate_hts.size();
|
297
|
+
if (gstate.repartitioned) {
|
298
|
+
const auto num_partitions_before = gstate.repartition_tasks.size();
|
299
|
+
const auto multiplier = gstate.partition_info->n_partitions / num_partitions_before;
|
300
|
+
const auto radix_before = radix / multiplier;
|
301
|
+
pht_idx_from = radix_before * gstate.repartition_tasks_per_partition;
|
302
|
+
pht_idx_to = pht_idx_from + gstate.repartition_tasks_per_partition;
|
303
|
+
}
|
304
|
+
|
305
|
+
for (idx_t i = pht_idx_from; i < pht_idx_to; i++) {
|
306
|
+
for (auto &ht : gstate.intermediate_hts[i]->GetPartition(radix)) {
|
286
307
|
gstate.finalized_hts[radix]->Combine(*ht);
|
287
308
|
ht.reset();
|
288
309
|
}
|
@@ -302,22 +323,247 @@ private:
|
|
302
323
|
idx_t radix;
|
303
324
|
};
|
304
325
|
|
326
|
+
class RadixAggregateRepartitionTask : public ExecutorTask {
|
327
|
+
public:
|
328
|
+
RadixAggregateRepartitionTask(Executor &executor, shared_ptr<Event> event_p, RadixHTGlobalState &state_p,
|
329
|
+
idx_t num_partitions_before_p)
|
330
|
+
: ExecutorTask(executor), event(std::move(event_p)), state(state_p),
|
331
|
+
num_partitions_before(num_partitions_before_p) {
|
332
|
+
}
|
333
|
+
|
334
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
335
|
+
const auto multiplier = state.partition_info->n_partitions / num_partitions_before;
|
336
|
+
|
337
|
+
idx_t repartition_radix = 0;
|
338
|
+
idx_t finalize_radix = 0;
|
339
|
+
while (repartition_radix < num_partitions_before && finalize_radix < state.partition_info->n_partitions) {
|
340
|
+
// Loop over original partitions until we find one that we can repartition
|
341
|
+
for (; repartition_radix < num_partitions_before; repartition_radix++) {
|
342
|
+
auto task_idx = state.repartition_tasks_assigned[repartition_radix]++;
|
343
|
+
if (task_idx >= state.repartition_tasks_per_partition) {
|
344
|
+
continue;
|
345
|
+
}
|
346
|
+
auto &ht = state.repartition_tasks[repartition_radix][task_idx];
|
347
|
+
ht->Partition(true);
|
348
|
+
state.intermediate_hts[repartition_radix * state.repartition_tasks_per_partition + task_idx] =
|
349
|
+
std::move(ht);
|
350
|
+
state.repartition_tasks_done[repartition_radix]++;
|
351
|
+
break;
|
352
|
+
}
|
353
|
+
|
354
|
+
// Loop over repartitioned partitions
|
355
|
+
for (; finalize_radix < state.partition_info->n_partitions; finalize_radix++) {
|
356
|
+
const auto original_radix = finalize_radix / multiplier;
|
357
|
+
if (state.repartition_tasks_done[original_radix] != state.repartition_tasks_per_partition) {
|
358
|
+
break; // Needs more repartitioning
|
359
|
+
}
|
360
|
+
|
361
|
+
if (state.finalize_assigned[finalize_radix]) {
|
362
|
+
continue; // Already assigned
|
363
|
+
}
|
364
|
+
|
365
|
+
{
|
366
|
+
lock_guard<mutex> guard(state.lock);
|
367
|
+
if (state.finalize_assigned[finalize_radix]) {
|
368
|
+
// LCOV_EXCL_START
|
369
|
+
continue; // Check again with lock, but already assigned
|
370
|
+
// LCOV_EXCL_STOP
|
371
|
+
}
|
372
|
+
state.finalize_assigned[finalize_radix] = true;
|
373
|
+
}
|
374
|
+
|
375
|
+
// We can finalize!
|
376
|
+
RadixAggregateFinalizeTask::FinalizeHT(state, finalize_radix);
|
377
|
+
}
|
378
|
+
}
|
379
|
+
event->FinishTask();
|
380
|
+
return TaskExecutionResult::TASK_FINISHED;
|
381
|
+
}
|
382
|
+
|
383
|
+
private:
|
384
|
+
shared_ptr<Event> event;
|
385
|
+
RadixHTGlobalState &state;
|
386
|
+
const idx_t num_partitions_before;
|
387
|
+
};
|
388
|
+
|
305
389
|
void RadixPartitionedHashTable::ScheduleTasks(Executor &executor, const shared_ptr<Event> &event,
|
306
390
|
GlobalSinkState &state, vector<shared_ptr<Task>> &tasks) const {
|
307
391
|
auto &gstate = state.Cast<RadixHTGlobalState>();
|
308
392
|
if (!gstate.is_partitioned) {
|
309
393
|
return;
|
310
394
|
}
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
395
|
+
|
396
|
+
idx_t repartition_radix_bits;
|
397
|
+
idx_t concurrent_repartitions;
|
398
|
+
idx_t tasks_per_partition;
|
399
|
+
GetRepartitionInfo(executor.context, state, repartition_radix_bits, concurrent_repartitions, tasks_per_partition);
|
400
|
+
if (repartition_radix_bits == gstate.partition_info->radix_bits) {
|
401
|
+
// No repartitioning necessary
|
402
|
+
for (idx_t r = 0; r < gstate.partition_info->n_partitions; r++) {
|
403
|
+
D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
|
404
|
+
D_ASSERT(gstate.finalized_hts[r]);
|
405
|
+
tasks.push_back(make_uniq<RadixAggregateFinalizeTask>(executor, event, gstate, r));
|
406
|
+
}
|
407
|
+
} else {
|
408
|
+
// Schedule repartition / finalize tasks
|
409
|
+
ScheduleRepartitionTasks(executor, event, state, tasks, repartition_radix_bits, concurrent_repartitions,
|
410
|
+
tasks_per_partition);
|
411
|
+
}
|
412
|
+
}
|
413
|
+
|
414
|
+
void RadixPartitionedHashTable::ScheduleRepartitionTasks(Executor &executor, const shared_ptr<Event> &event,
|
415
|
+
GlobalSinkState &state, vector<shared_ptr<Task>> &tasks,
|
416
|
+
const idx_t repartition_radix_bits,
|
417
|
+
const idx_t concurrent_repartitions,
|
418
|
+
const idx_t tasks_per_partition) const {
|
419
|
+
auto &gstate = state.Cast<RadixHTGlobalState>();
|
420
|
+
D_ASSERT(repartition_radix_bits > gstate.partition_info->radix_bits);
|
421
|
+
const auto num_partitions_before = gstate.partition_info->n_partitions;
|
422
|
+
const auto multiplier = RadixPartitioning::NumberOfPartitions(repartition_radix_bits) / num_partitions_before;
|
423
|
+
|
424
|
+
// Inititialize gstate
|
425
|
+
auto new_partition_info =
|
426
|
+
make_uniq<RadixPartitionInfo>(RadixPartitioning::NumberOfPartitions(repartition_radix_bits));
|
427
|
+
gstate.repartitioned = true;
|
428
|
+
gstate.repartition_tasks_per_partition = tasks_per_partition;
|
429
|
+
gstate.repartition_tasks.resize(num_partitions_before);
|
430
|
+
gstate.repartition_tasks_assigned = make_uniq_array<atomic<idx_t>>(num_partitions_before);
|
431
|
+
gstate.repartition_tasks_done = make_uniq_array<atomic<idx_t>>(num_partitions_before);
|
432
|
+
gstate.finalize_assigned = make_uniq_array<atomic<bool>>(new_partition_info->n_partitions);
|
433
|
+
for (idx_t partition_idx = 0; partition_idx < num_partitions_before; partition_idx++) {
|
434
|
+
gstate.repartition_tasks_assigned[partition_idx] = 0;
|
435
|
+
gstate.repartition_tasks_done[partition_idx] = 0;
|
436
|
+
|
437
|
+
// Grab intermediate data from gstate
|
438
|
+
HashTableList partition_list;
|
439
|
+
for (auto &pht : gstate.intermediate_hts) {
|
440
|
+
for (auto &ht : pht->GetPartition(partition_idx)) {
|
441
|
+
partition_list.push_back(std::move(ht));
|
442
|
+
}
|
443
|
+
}
|
444
|
+
|
445
|
+
// Spread the data across the tasks
|
446
|
+
const idx_t hts_per_task = (partition_list.size() + tasks_per_partition - 1) / tasks_per_partition;
|
447
|
+
idx_t ht_idx = 0;
|
448
|
+
for (idx_t task_idx = 0; task_idx < tasks_per_partition; task_idx++) {
|
449
|
+
auto task_ht =
|
450
|
+
make_uniq<PartitionableHashTable>(executor.context, BufferAllocator::Get(executor.context),
|
451
|
+
*new_partition_info, group_types, op.payload_types, op.bindings);
|
452
|
+
auto ht_idx_to = MinValue<idx_t>(ht_idx + hts_per_task, partition_list.size());
|
453
|
+
for (; ht_idx < ht_idx_to; ht_idx++) {
|
454
|
+
auto &ht = partition_list[ht_idx];
|
455
|
+
task_ht->Append(*ht);
|
456
|
+
ht.reset();
|
457
|
+
}
|
458
|
+
gstate.repartition_tasks[partition_idx].push_back(std::move(task_ht));
|
459
|
+
}
|
460
|
+
|
461
|
+
for (idx_t i = 0; i < multiplier; i++) {
|
462
|
+
gstate.finalize_assigned[partition_idx * multiplier + i] = false;
|
463
|
+
}
|
464
|
+
}
|
465
|
+
|
466
|
+
// Schedule tasks equal to number of therads
|
467
|
+
const idx_t num_threads = TaskScheduler::GetScheduler(executor.context).NumberOfThreads();
|
468
|
+
for (idx_t i = 0; i < num_threads; i++) {
|
469
|
+
tasks.emplace_back(make_shared<RadixAggregateRepartitionTask>(executor, event, gstate, num_partitions_before));
|
470
|
+
}
|
471
|
+
|
472
|
+
gstate.intermediate_hts.clear();
|
473
|
+
gstate.intermediate_hts.resize(num_partitions_before * tasks_per_partition);
|
474
|
+
|
475
|
+
gstate.partition_info = std::move(new_partition_info);
|
476
|
+
InitializeFinalizedHTs(executor.context, state);
|
477
|
+
}
|
478
|
+
|
479
|
+
bool RadixPartitionedHashTable::ForceSingleHT(GlobalSinkState &state) {
|
480
|
+
auto &gstate = state.Cast<RadixHTGlobalState>();
|
481
|
+
return gstate.partition_info->n_partitions < 2;
|
482
|
+
}
|
483
|
+
|
484
|
+
bool RadixPartitionedHashTable::AnyPartitioned(GlobalSinkState &state) {
|
485
|
+
auto &gstate = state.Cast<RadixHTGlobalState>();
|
486
|
+
for (auto &pht : gstate.intermediate_hts) {
|
487
|
+
if (pht->IsPartitioned()) {
|
488
|
+
return true;
|
489
|
+
}
|
315
490
|
}
|
491
|
+
return false;
|
316
492
|
}
|
317
493
|
|
318
|
-
|
494
|
+
void RadixPartitionedHashTable::GetRepartitionInfo(ClientContext &context, GlobalSinkState &state,
|
495
|
+
idx_t &repartition_radix_bits, idx_t &concurrent_repartitions,
|
496
|
+
idx_t &tasks_per_partition) {
|
319
497
|
auto &gstate = state.Cast<RadixHTGlobalState>();
|
320
|
-
|
498
|
+
const auto num_partitions = gstate.partition_info->n_partitions;
|
499
|
+
const auto radix_bits = gstate.partition_info->radix_bits;
|
500
|
+
D_ASSERT(IsPowerOfTwo(num_partitions));
|
501
|
+
|
502
|
+
vector<idx_t> partition_counts(num_partitions, 0);
|
503
|
+
vector<idx_t> partition_sizes(num_partitions, 0);
|
504
|
+
for (const auto &ht : gstate.intermediate_hts) {
|
505
|
+
for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
|
506
|
+
partition_counts[partition_idx] += ht->GetPartitionCount(partition_idx);
|
507
|
+
partition_sizes[partition_idx] += ht->GetPartitionSize(partition_idx);
|
508
|
+
}
|
509
|
+
}
|
510
|
+
|
511
|
+
idx_t total_size = 0;
|
512
|
+
idx_t max_partition_idx = 0;
|
513
|
+
idx_t max_partition_size = 0;
|
514
|
+
for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
|
515
|
+
const auto &partition_count = partition_counts[partition_idx];
|
516
|
+
const auto &partition_size = partition_sizes[partition_idx];
|
517
|
+
auto partition_ht_size =
|
518
|
+
partition_size + GroupedAggregateHashTable::FirstPartSize(partition_count, HtEntryType::HT_WIDTH_64);
|
519
|
+
if (partition_ht_size > max_partition_size) {
|
520
|
+
max_partition_idx = partition_idx;
|
521
|
+
max_partition_size = partition_ht_size;
|
522
|
+
}
|
523
|
+
total_size += partition_ht_size;
|
524
|
+
}
|
525
|
+
|
526
|
+
// Switch to out-of-core finalize at ~60%
|
527
|
+
const auto max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
|
528
|
+
const idx_t n_threads = PreviousPowerOfTwo(TaskScheduler::GetScheduler(context).NumberOfThreads());
|
529
|
+
D_ASSERT(IsPowerOfTwo(n_threads));
|
530
|
+
if (!context.config.force_external && total_size < max_ht_size) {
|
531
|
+
// In-memory finalize
|
532
|
+
if (num_partitions >= n_threads) { // Can already keep all threads busy
|
533
|
+
repartition_radix_bits = radix_bits;
|
534
|
+
tasks_per_partition = 1;
|
535
|
+
} else { // Repartition to keep all threads busy
|
536
|
+
// Can't have coverage because RadixHTGlobalState::MAX_RADIX_PARTITIONS > threads on github actions
|
537
|
+
// LCOV_EXCL_START
|
538
|
+
repartition_radix_bits = RadixPartitioning::RadixBits(NextPowerOfTwo(n_threads));
|
539
|
+
tasks_per_partition = n_threads / num_partitions;
|
540
|
+
// LCOV_EXCL_STOP
|
541
|
+
}
|
542
|
+
concurrent_repartitions = num_partitions;
|
543
|
+
return;
|
544
|
+
}
|
545
|
+
|
546
|
+
// Out-of-core finalize
|
547
|
+
const auto partition_count = partition_counts[max_partition_idx];
|
548
|
+
const auto partition_size = partition_sizes[max_partition_idx];
|
549
|
+
|
550
|
+
const auto max_added_bits = RadixPartitioning::MAX_RADIX_BITS - radix_bits;
|
551
|
+
idx_t added_bits;
|
552
|
+
for (added_bits = 1; added_bits < max_added_bits; added_bits++) {
|
553
|
+
double partition_multiplier = RadixPartitioning::NumberOfPartitions(added_bits);
|
554
|
+
|
555
|
+
auto new_estimated_count = double(partition_count) / partition_multiplier;
|
556
|
+
auto new_estimated_size = double(partition_size) / partition_multiplier;
|
557
|
+
auto new_estimated_ht_size = new_estimated_size + GroupedAggregateHashTable::FirstPartSize(
|
558
|
+
new_estimated_count, HtEntryType::HT_WIDTH_64);
|
559
|
+
|
560
|
+
if (new_estimated_ht_size <= max_ht_size / n_threads) {
|
561
|
+
break; // Max HT size is safe
|
562
|
+
}
|
563
|
+
}
|
564
|
+
repartition_radix_bits = radix_bits + added_bits;
|
565
|
+
concurrent_repartitions = MinValue<idx_t>(MaxValue<idx_t>(1, max_ht_size / max_partition_size), n_threads);
|
566
|
+
tasks_per_partition = NextPowerOfTwo(n_threads / concurrent_repartitions);
|
321
567
|
}
|
322
568
|
|
323
569
|
//===--------------------------------------------------------------------===//
|
@@ -342,7 +588,7 @@ public:
|
|
342
588
|
class RadixHTLocalSourceState : public LocalSourceState {
|
343
589
|
public:
|
344
590
|
explicit RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &ht) {
|
345
|
-
auto &allocator =
|
591
|
+
auto &allocator = BufferAllocator::Get(context.client);
|
346
592
|
auto scan_chunk_types = ht.group_types;
|
347
593
|
for (auto &aggr_type : ht.op.aggregate_return_types) {
|
348
594
|
scan_chunk_types.push_back(aggr_type);
|
@@ -361,7 +607,7 @@ public:
|
|
361
607
|
};
|
362
608
|
|
363
609
|
unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
|
364
|
-
return make_uniq<RadixHTGlobalSourceState>(
|
610
|
+
return make_uniq<RadixHTGlobalSourceState>(BufferAllocator::Get(context), *this);
|
365
611
|
}
|
366
612
|
|
367
613
|
unique_ptr<LocalSourceState> RadixPartitionedHashTable::GetLocalSourceState(ExecutionContext &context) const {
|
@@ -401,13 +647,14 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
|
|
401
647
|
chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
|
402
648
|
ConstantVector::SetNull(chunk.data[null_group], true);
|
403
649
|
}
|
650
|
+
ArenaAllocator allocator(BufferAllocator::Get(context.client));
|
404
651
|
for (idx_t i = 0; i < op.aggregates.size(); i++) {
|
405
652
|
D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
|
406
653
|
auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
|
407
654
|
auto aggr_state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
|
408
655
|
aggr.function.initialize(aggr_state.get());
|
409
656
|
|
410
|
-
AggregateInputData aggr_input_data(aggr.bind_info.get(),
|
657
|
+
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
|
411
658
|
Vector state_vector(Value::POINTER(CastPointerToValue(aggr_state.get())));
|
412
659
|
aggr.function.finalize(state_vector, aggr_input_data, chunk.data[null_groups.size() + i], 1, 0);
|
413
660
|
if (aggr.function.destructor) {
|