duckdb 0.8.2-dev37.0 → 0.8.2-dev3989.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/binding.gyp +29 -13
- package/binding.gyp.in +1 -1
- package/configure.py +11 -3
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +10 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +162 -41
- package/src/duckdb/extension/icu/icu-datesub.cpp +3 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +2 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +19 -6
- package/src/duckdb/extension/icu/icu-strptime.cpp +5 -24
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +10 -12
- package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_deserializer.cpp +37 -73
- package/src/duckdb/extension/json/json_enums.cpp +105 -0
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +93 -38
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +26 -25
- package/src/duckdb/extension/json/json_scan.cpp +47 -6
- package/src/duckdb/extension/json/json_serializer.cpp +29 -72
- package/src/duckdb/extension/json/serialize_json.cpp +92 -0
- package/src/duckdb/extension/parquet/column_reader.cpp +37 -25
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +4 -0
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +1 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +28 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +258 -40
- package/src/duckdb/extension/parquet/parquet_reader.cpp +10 -10
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +25 -8
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +6 -0
- package/src/duckdb/extension/parquet/parquet_writer.cpp +149 -31
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog.cpp +147 -70
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -11
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +17 -41
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +2 -10
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +4 -14
- package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +11 -28
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +11 -42
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -26
- package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +11 -27
- package/src/duckdb/src/catalog/catalog_entry.cpp +25 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/catalog_set.cpp +0 -63
- package/src/duckdb/src/catalog/default/default_functions.cpp +21 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +0 -36
- package/src/duckdb/src/common/adbc/adbc.cpp +541 -171
- package/src/duckdb/src/common/adbc/driver_manager.cpp +92 -39
- package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
- package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
- package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
- package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
- package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +95 -666
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +65 -37
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -42
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/constants.cpp +2 -1
- package/src/duckdb/src/common/enum_util.cpp +4979 -4458
- package/src/duckdb/src/common/enums/date_part_specifier.cpp +2 -0
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +15 -2
- package/src/duckdb/src/common/extra_type_info.cpp +487 -0
- package/src/duckdb/src/common/field_writer.cpp +1 -1
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/file_system.cpp +46 -12
- package/src/duckdb/src/common/filename_pattern.cpp +1 -1
- package/src/duckdb/src/common/gzip_file_system.cpp +7 -12
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/http_state.cpp +78 -0
- package/src/duckdb/src/common/local_file_system.cpp +36 -28
- package/src/duckdb/src/common/multi_file_reader.cpp +193 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +92 -1
- package/src/duckdb/src/common/operator/string_cast.cpp +45 -8
- package/src/duckdb/src/common/radix_partitioning.cpp +34 -39
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +63 -73
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +85 -80
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +0 -9
- package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +102 -74
- package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +68 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +20 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +46 -10
- package/src/duckdb/src/common/types/date.cpp +15 -0
- package/src/duckdb/src/common/types/hugeint.cpp +40 -0
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +6 -0
- package/src/duckdb/src/common/types/list_segment.cpp +56 -198
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +251 -131
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +41 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/string_heap.cpp +4 -0
- package/src/duckdb/src/common/types/time.cpp +105 -0
- package/src/duckdb/src/common/types/timestamp.cpp +7 -0
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/value.cpp +99 -60
- package/src/duckdb/src/common/types/vector.cpp +73 -80
- package/src/duckdb/src/common/types.cpp +38 -724
- package/src/duckdb/src/common/virtual_file_system.cpp +142 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +64 -19
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +30 -0
- package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +1 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +83 -59
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +10 -4
- package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +380 -89
- package/src/duckdb/src/core_functions/scalar/date/date_sub.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +4 -0
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
- package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
- package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +16 -12
- package/src/duckdb/src/core_functions/scalar/generic/current_setting.cpp +3 -1
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +23 -3
- package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
- package/src/duckdb/src/core_functions/scalar/union/union_tag.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +226 -346
- package/src/duckdb/src/execution/column_binding_resolver.cpp +10 -7
- package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +219 -259
- package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +11 -15
- package/src/duckdb/src/execution/index/art/iterator.cpp +130 -214
- package/src/duckdb/src/execution/index/art/leaf.cpp +300 -266
- package/src/duckdb/src/execution/index/art/node.cpp +211 -205
- package/src/duckdb/src/execution/index/art/node16.cpp +10 -19
- package/src/duckdb/src/execution/index/art/node256.cpp +10 -18
- package/src/duckdb/src/execution/index/art/node4.cpp +21 -23
- package/src/duckdb/src/execution/index/art/node48.cpp +10 -20
- package/src/duckdb/src/execution/index/art/prefix.cpp +308 -338
- package/src/duckdb/src/execution/join_hashtable.cpp +9 -10
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +250 -317
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +231 -190
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +367 -1068
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +157 -174
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +67 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +46 -47
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +12 -9
- package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +10 -8
- package/src/duckdb/src/execution/operator/helper/physical_load.cpp +2 -1
- package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +3 -1
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +3 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +449 -288
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +28 -15
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +35 -17
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +31 -10
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +14 -10
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +11 -9
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +9 -7
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +14 -12
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +11 -11
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +24 -27
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -12
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +2 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +198 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -6
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +16 -7
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +37 -6
- package/src/duckdb/src/execution/physical_operator.cpp +20 -16
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +57 -35
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +32 -15
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +45 -34
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +2 -5
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -11
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +636 -349
- package/src/duckdb/src/execution/window_executor.cpp +1285 -0
- package/src/duckdb/src/execution/window_segment_tree.cpp +408 -144
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -13
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +6 -12
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/cast/string_cast.cpp +2 -2
- package/src/duckdb/src/function/cast/time_casts.cpp +7 -6
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +7 -2
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +7 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/operators/add.cpp +9 -0
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +6 -3
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +39 -5
- package/src/duckdb/src/function/scalar_function.cpp +5 -20
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
- package/src/duckdb/src/function/table/arrow.cpp +110 -88
- package/src/duckdb/src/function/table/arrow_conversion.cpp +86 -73
- package/src/duckdb/src/function/table/copy_csv.cpp +102 -97
- package/src/duckdb/src/function/table/read_csv.cpp +263 -141
- package/src/duckdb/src/function/table/system/test_all_types.cpp +48 -21
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +42 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +49 -2
- package/src/duckdb/src/function/table_function.cpp +4 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +20 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +3 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +1 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -6
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +2 -13
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +14 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +0 -6
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -5
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
- package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
- package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/constants.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +681 -577
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +4 -3
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +15 -1
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +215 -0
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +10 -8
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/index_vector.hpp +12 -0
- package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +11 -5
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +12 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/operator/add.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +65 -4
- package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +71 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +48 -39
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +128 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +186 -133
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +166 -121
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +27 -4
- package/src/duckdb/src/include/duckdb/common/serializer.hpp +0 -7
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +34 -13
- package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +11 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -5
- package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +46 -3
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +11 -15
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +46 -11
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +10 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types/time.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +16 -10
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +10 -3
- package/src/duckdb/src/include/duckdb/common/types.hpp +6 -25
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +40 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +7 -5
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +6 -4
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +4 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +12 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +128 -131
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -12
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +32 -28
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +46 -51
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +134 -53
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +7 -9
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -7
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +6 -5
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +5 -12
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +19 -19
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +61 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +22 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/schema/{physical_create_index.hpp → physical_create_art_index.hpp} +14 -7
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +6 -5
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +19 -21
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +79 -63
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -4
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +6 -1
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +80 -0
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +12 -4
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -3
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +24 -12
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +5 -1
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +4 -3
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +43 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +16 -14
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +16 -1
- package/src/duckdb/src/include/duckdb/main/connection.hpp +3 -4
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +27 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +210 -144
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +41 -6
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
- package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +71 -11
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +38 -64
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +8 -22
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -12
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +19 -30
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -3
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +9 -1
- package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +6 -5
- package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/constraint.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/check_constraint.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/foreign_key_constraint.hpp +6 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/not_null_constraint.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +6 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +21 -4
- package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +18 -2
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +12 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +66 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +8 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +8 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +9 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +9 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/load_info.hpp +17 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +22 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/show_select_info.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +23 -26
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +16 -5
- package/src/duckdb/src/include/duckdb/planner/bound_constraint.hpp +0 -8
- package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +9 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +3 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +24 -6
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +9 -2
- package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/joinside.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +3 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +12 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create.hpp +9 -6
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +12 -23
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +10 -6
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cross_product.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +9 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +7 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +6 -10
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_empty_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +8 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +11 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +52 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +7 -35
- package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_positional_join.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +10 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_sample.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_simple.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_pivotref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +33 -4
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +11 -11
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +8 -5
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +7 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +18 -3
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +8 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +98 -1
- package/src/duckdb/src/main/appender.cpp +3 -1
- package/src/duckdb/src/main/attached_database.cpp +2 -2
- package/src/duckdb/src/main/capi/arrow-c.cpp +196 -8
- package/src/duckdb/src/main/capi/duckdb-c.cpp +16 -0
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/capi/pending-c.cpp +23 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +106 -28
- package/src/duckdb/src/main/capi/result-c.cpp +3 -1
- package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
- package/src/duckdb/src/main/chunk_scan_state.cpp +48 -0
- package/src/duckdb/src/main/client_context.cpp +42 -19
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +18 -0
- package/src/duckdb/src/main/config.cpp +9 -3
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/database.cpp +3 -12
- package/src/duckdb/src/main/db_instance_cache.cpp +14 -6
- package/src/duckdb/src/main/extension/extension_helper.cpp +164 -88
- package/src/duckdb/src/main/extension/extension_install.cpp +76 -15
- package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
- package/src/duckdb/src/main/extension/extension_util.cpp +16 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/main/prepared_statement.cpp +38 -11
- package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
- package/src/duckdb/src/main/query_result.cpp +0 -21
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +6 -6
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/main/relation.cpp +10 -9
- package/src/duckdb/src/main/settings/settings.cpp +125 -33
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +2 -4
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +477 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +180 -323
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +23 -6
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1047
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +52 -41
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +34 -7
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +27 -10
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +3 -5
- package/src/duckdb/src/parallel/executor.cpp +25 -1
- package/src/duckdb/src/parallel/pipeline.cpp +0 -17
- package/src/duckdb/src/parallel/pipeline_executor.cpp +33 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
- package/src/duckdb/src/parallel/task_scheduler.cpp +18 -2
- package/src/duckdb/src/parser/column_definition.cpp +20 -32
- package/src/duckdb/src/parser/column_list.cpp +8 -0
- package/src/duckdb/src/parser/constraints/foreign_key_constraint.cpp +3 -0
- package/src/duckdb/src/parser/constraints/unique_constraint.cpp +3 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -25
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -19
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +5 -2
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +38 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +17 -1
- package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +2 -0
- package/src/duckdb/src/parser/parsed_data/detach_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/drop_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +0 -18
- package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +4 -1
- package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +62 -36
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -47
- package/src/duckdb/src/parser/result_modifier.cpp +0 -87
- package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
- package/src/duckdb/src/parser/statement/select_statement.cpp +0 -10
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -55
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +16 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +32 -17
- package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_load.cpp +1 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +44 -25
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +36 -33
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +14 -52
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +0 -23
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +70 -29
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +93 -28
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +67 -31
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +24 -1
- package/src/duckdb/src/planner/expression/bound_between_expression.cpp +4 -0
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +13 -8
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +28 -20
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +48 -4
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +5 -4
- package/src/duckdb/src/planner/expression_binder.cpp +23 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +19 -7
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -6
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +4 -2
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
- package/src/duckdb/src/planner/operator/logical_create.cpp +14 -0
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +36 -7
- package/src/duckdb/src/planner/operator/logical_create_table.cpp +16 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_delete.cpp +9 -2
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_distinct.cpp +13 -0
- package/src/duckdb/src/planner/operator/logical_explain.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +39 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +82 -4
- package/src/duckdb/src/planner/operator/logical_insert.cpp +8 -2
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +22 -0
- package/src/duckdb/src/planner/operator/logical_order.cpp +39 -0
- package/src/duckdb/src/planner/operator/logical_pivot.cpp +3 -0
- package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +5 -5
- package/src/duckdb/src/planner/operator/logical_sample.cpp +3 -0
- package/src/duckdb/src/planner/operator/logical_update.cpp +8 -2
- package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +4 -2
- package/src/duckdb/src/planner/planner.cpp +18 -7
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +13 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +13 -9
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
- package/src/duckdb/src/storage/checkpoint_manager.cpp +78 -72
- package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
- package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
- package/src/duckdb/src/storage/compression/fsst.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +52 -13
- package/src/duckdb/src/storage/data_table.cpp +36 -25
- package/src/duckdb/src/storage/index.cpp +4 -26
- package/src/duckdb/src/storage/local_storage.cpp +3 -4
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +98 -0
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +194 -0
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +283 -0
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +762 -0
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +62 -0
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +461 -0
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +421 -0
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +342 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +97 -0
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +22 -0
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +164 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +69 -51
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +44 -2
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +3 -2
- package/src/duckdb/src/storage/storage_manager.cpp +11 -5
- package/src/duckdb/src/storage/table/chunk_info.cpp +99 -3
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
- package/src/duckdb/src/storage/table/list_column_data.cpp +6 -3
- package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
- package/src/duckdb/src/storage/table/row_group.cpp +102 -20
- package/src/duckdb/src/storage/table/row_group_collection.cpp +23 -19
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/storage/wal_replay.cpp +26 -26
- package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +5 -2
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +11 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11019 -10364
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +10 -0
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +31 -1
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_common.cpp +4 -0
- package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
- package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
- package/src/duckdb/ub_src_common_serializer.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_execution.cpp +2 -2
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +1 -11
- package/src/duckdb/ub_src_execution_operator_schema.cpp +1 -1
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +2 -0
- package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +3 -3
- package/src/duckdb/ub_src_storage.cpp +0 -4
- package/src/duckdb/ub_src_storage_compression.cpp +2 -0
- package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +30 -0
- package/src/duckdb_node.hpp +1 -0
- package/src/statement.cpp +10 -5
- package/test/columns.test.ts +25 -3
- package/test/extension.test.ts +1 -1
- package/test/test_all_types.test.ts +234 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -193
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -172
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -69
- package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +0 -27
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_join.hpp +0 -32
- package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
- package/src/duckdb/src/parser/common_table_expression_info.cpp +0 -19
- package/src/duckdb/src/planner/operator/logical_asof_join.cpp +0 -14
- package/src/duckdb/src/planner/operator/logical_delim_join.cpp +0 -27
- package/src/duckdb/src/storage/meta_block_reader.cpp +0 -78
- package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -14,7 +14,7 @@
|
|
14
14
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
15
15
|
#include "duckdb/common/windows_undefs.hpp"
|
16
16
|
#include "duckdb/execution/expression_executor.hpp"
|
17
|
-
#include "duckdb/execution/
|
17
|
+
#include "duckdb/execution/window_executor.hpp"
|
18
18
|
#include "duckdb/execution/window_segment_tree.hpp"
|
19
19
|
#include "duckdb/main/client_config.hpp"
|
20
20
|
#include "duckdb/main/config.hpp"
|
@@ -32,7 +32,7 @@ namespace duckdb {
|
|
32
32
|
class WindowGlobalSinkState : public GlobalSinkState {
|
33
33
|
public:
|
34
34
|
WindowGlobalSinkState(const PhysicalWindow &op, ClientContext &context)
|
35
|
-
: mode(DBConfig::GetConfig(context).options.window_mode) {
|
35
|
+
: op(op), mode(DBConfig::GetConfig(context).options.window_mode) {
|
36
36
|
|
37
37
|
D_ASSERT(op.select_list[0]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
38
38
|
auto &wexpr = op.select_list[0]->Cast<BoundWindowExpression>();
|
@@ -42,6 +42,7 @@ public:
|
|
42
42
|
wexpr.partitions_stats, op.estimated_cardinality);
|
43
43
|
}
|
44
44
|
|
45
|
+
const PhysicalWindow &op;
|
45
46
|
unique_ptr<PartitionGlobalSinkState> global_partition;
|
46
47
|
WindowAggregationMode mode;
|
47
48
|
};
|
@@ -78,977 +79,40 @@ PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expr
|
|
78
79
|
}
|
79
80
|
}
|
80
81
|
|
81
|
-
static
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
return r;
|
110
|
-
}
|
111
|
-
|
112
|
-
static idx_t FindPrevStart(const ValidityMask &mask, const idx_t l, idx_t r, idx_t &n) {
|
113
|
-
if (mask.AllValid()) {
|
114
|
-
auto start = (r <= l + n) ? l : r - n;
|
115
|
-
n -= r - start;
|
116
|
-
return start;
|
117
|
-
}
|
118
|
-
|
119
|
-
while (l < r) {
|
120
|
-
// If r is aligned with the start of a block, and the previous block is blank,
|
121
|
-
// then skip backwards one block.
|
122
|
-
idx_t entry_idx;
|
123
|
-
idx_t shift;
|
124
|
-
mask.GetEntryIndex(r - 1, entry_idx, shift);
|
125
|
-
|
126
|
-
const auto block = mask.GetValidityEntry(entry_idx);
|
127
|
-
if (mask.NoneValid(block) && (shift + 1 == ValidityMask::BITS_PER_VALUE)) {
|
128
|
-
// r is nonzero (> l) and word aligned, so this will not underflow.
|
129
|
-
r -= ValidityMask::BITS_PER_VALUE;
|
130
|
-
continue;
|
131
|
-
}
|
132
|
-
|
133
|
-
// Loop backwards over the block
|
134
|
-
// shift is probing r-1 >= l >= 0
|
135
|
-
for (++shift; shift-- > 0; --r) {
|
136
|
-
if (mask.RowIsValid(block, shift) && --n == 0) {
|
137
|
-
return MaxValue(l, r - 1);
|
138
|
-
}
|
139
|
-
}
|
140
|
-
}
|
141
|
-
|
142
|
-
// Didn't find a start so return the start of the range
|
143
|
-
return l;
|
144
|
-
}
|
145
|
-
|
146
|
-
static void PrepareInputExpressions(vector<unique_ptr<Expression>> &exprs, ExpressionExecutor &executor,
|
147
|
-
DataChunk &chunk) {
|
148
|
-
if (exprs.empty()) {
|
149
|
-
return;
|
150
|
-
}
|
151
|
-
|
152
|
-
vector<LogicalType> types;
|
153
|
-
for (idx_t expr_idx = 0; expr_idx < exprs.size(); ++expr_idx) {
|
154
|
-
types.push_back(exprs[expr_idx]->return_type);
|
155
|
-
executor.AddExpression(*exprs[expr_idx]);
|
156
|
-
}
|
157
|
-
|
158
|
-
if (!types.empty()) {
|
159
|
-
auto &allocator = executor.GetAllocator();
|
160
|
-
chunk.Initialize(allocator, types);
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
static void PrepareInputExpression(Expression &expr, ExpressionExecutor &executor, DataChunk &chunk) {
|
165
|
-
vector<LogicalType> types;
|
166
|
-
types.push_back(expr.return_type);
|
167
|
-
executor.AddExpression(expr);
|
168
|
-
|
169
|
-
auto &allocator = executor.GetAllocator();
|
170
|
-
chunk.Initialize(allocator, types);
|
171
|
-
}
|
172
|
-
|
173
|
-
struct WindowInputExpression {
|
174
|
-
WindowInputExpression(optional_ptr<Expression> expr_p, ClientContext &context)
|
175
|
-
: expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(context) {
|
176
|
-
if (expr) {
|
177
|
-
PrepareInputExpression(*expr, executor, chunk);
|
178
|
-
ptype = expr->return_type.InternalType();
|
179
|
-
scalar = expr->IsScalar();
|
180
|
-
}
|
181
|
-
}
|
182
|
-
|
183
|
-
void Execute(DataChunk &input_chunk) {
|
184
|
-
if (expr) {
|
185
|
-
chunk.Reset();
|
186
|
-
executor.Execute(input_chunk, chunk);
|
187
|
-
chunk.Verify();
|
188
|
-
}
|
189
|
-
}
|
190
|
-
|
191
|
-
template <typename T>
|
192
|
-
inline T GetCell(idx_t i) const {
|
193
|
-
D_ASSERT(!chunk.data.empty());
|
194
|
-
const auto data = FlatVector::GetData<T>(chunk.data[0]);
|
195
|
-
return data[scalar ? 0 : i];
|
196
|
-
}
|
197
|
-
|
198
|
-
inline bool CellIsNull(idx_t i) const {
|
199
|
-
D_ASSERT(!chunk.data.empty());
|
200
|
-
if (chunk.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
201
|
-
return ConstantVector::IsNull(chunk.data[0]);
|
202
|
-
}
|
203
|
-
return FlatVector::IsNull(chunk.data[0], i);
|
204
|
-
}
|
205
|
-
|
206
|
-
inline void CopyCell(Vector &target, idx_t target_offset) const {
|
207
|
-
D_ASSERT(!chunk.data.empty());
|
208
|
-
auto &source = chunk.data[0];
|
209
|
-
auto source_offset = scalar ? 0 : target_offset;
|
210
|
-
VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
|
211
|
-
}
|
212
|
-
|
213
|
-
optional_ptr<Expression> expr;
|
214
|
-
PhysicalType ptype;
|
215
|
-
bool scalar;
|
216
|
-
ExpressionExecutor executor;
|
217
|
-
DataChunk chunk;
|
218
|
-
};
|
219
|
-
|
220
|
-
struct WindowInputColumn {
|
221
|
-
WindowInputColumn(Expression *expr_p, ClientContext &context, idx_t capacity_p)
|
222
|
-
: input_expr(expr_p, context), count(0), capacity(capacity_p) {
|
223
|
-
if (input_expr.expr) {
|
224
|
-
target = make_uniq<Vector>(input_expr.chunk.data[0].GetType(), capacity);
|
225
|
-
}
|
226
|
-
}
|
227
|
-
|
228
|
-
void Append(DataChunk &input_chunk) {
|
229
|
-
if (input_expr.expr) {
|
230
|
-
const auto source_count = input_chunk.size();
|
231
|
-
D_ASSERT(count + source_count <= capacity);
|
232
|
-
if (!input_expr.scalar || !count) {
|
233
|
-
input_expr.Execute(input_chunk);
|
234
|
-
auto &source = input_expr.chunk.data[0];
|
235
|
-
VectorOperations::Copy(source, *target, source_count, 0, count);
|
236
|
-
}
|
237
|
-
count += source_count;
|
238
|
-
}
|
239
|
-
}
|
240
|
-
|
241
|
-
inline bool CellIsNull(idx_t i) {
|
242
|
-
D_ASSERT(target);
|
243
|
-
D_ASSERT(i < count);
|
244
|
-
return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
|
245
|
-
}
|
246
|
-
|
247
|
-
template <typename T>
|
248
|
-
inline T GetCell(idx_t i) const {
|
249
|
-
D_ASSERT(target);
|
250
|
-
D_ASSERT(i < count);
|
251
|
-
const auto data = FlatVector::GetData<T>(*target);
|
252
|
-
return data[input_expr.scalar ? 0 : i];
|
253
|
-
}
|
254
|
-
|
255
|
-
WindowInputExpression input_expr;
|
256
|
-
|
257
|
-
private:
|
258
|
-
unique_ptr<Vector> target;
|
259
|
-
idx_t count;
|
260
|
-
idx_t capacity;
|
261
|
-
};
|
262
|
-
|
263
|
-
static inline bool BoundaryNeedsPeer(const WindowBoundary &boundary) {
|
264
|
-
switch (boundary) {
|
265
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
266
|
-
case WindowBoundary::EXPR_PRECEDING_RANGE:
|
267
|
-
case WindowBoundary::EXPR_FOLLOWING_RANGE:
|
268
|
-
return true;
|
269
|
-
default:
|
270
|
-
return false;
|
271
|
-
}
|
272
|
-
}
|
273
|
-
|
274
|
-
struct WindowBoundariesState {
|
275
|
-
static inline bool IsScalar(const unique_ptr<Expression> &expr) {
|
276
|
-
return expr ? expr->IsScalar() : true;
|
277
|
-
}
|
278
|
-
|
279
|
-
WindowBoundariesState(BoundWindowExpression &wexpr, const idx_t input_size)
|
280
|
-
: type(wexpr.type), input_size(input_size), start_boundary(wexpr.start), end_boundary(wexpr.end),
|
281
|
-
partition_count(wexpr.partitions.size()), order_count(wexpr.orders.size()),
|
282
|
-
range_sense(wexpr.orders.empty() ? OrderType::INVALID : wexpr.orders[0].type),
|
283
|
-
has_preceding_range(wexpr.start == WindowBoundary::EXPR_PRECEDING_RANGE ||
|
284
|
-
wexpr.end == WindowBoundary::EXPR_PRECEDING_RANGE),
|
285
|
-
has_following_range(wexpr.start == WindowBoundary::EXPR_FOLLOWING_RANGE ||
|
286
|
-
wexpr.end == WindowBoundary::EXPR_FOLLOWING_RANGE),
|
287
|
-
needs_peer(BoundaryNeedsPeer(wexpr.end) || wexpr.type == ExpressionType::WINDOW_CUME_DIST) {
|
288
|
-
}
|
289
|
-
|
290
|
-
void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t source_offset,
|
291
|
-
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
292
|
-
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
293
|
-
|
294
|
-
// Cached lookups
|
295
|
-
const ExpressionType type;
|
296
|
-
const idx_t input_size;
|
297
|
-
const WindowBoundary start_boundary;
|
298
|
-
const WindowBoundary end_boundary;
|
299
|
-
const size_t partition_count;
|
300
|
-
const size_t order_count;
|
301
|
-
const OrderType range_sense;
|
302
|
-
const bool has_preceding_range;
|
303
|
-
const bool has_following_range;
|
304
|
-
const bool needs_peer;
|
305
|
-
|
306
|
-
idx_t partition_start = 0;
|
307
|
-
idx_t partition_end = 0;
|
308
|
-
idx_t peer_start = 0;
|
309
|
-
idx_t peer_end = 0;
|
310
|
-
idx_t valid_start = 0;
|
311
|
-
idx_t valid_end = 0;
|
312
|
-
int64_t window_start = -1;
|
313
|
-
int64_t window_end = -1;
|
314
|
-
bool is_same_partition = false;
|
315
|
-
bool is_peer = false;
|
316
|
-
};
|
317
|
-
|
318
|
-
static bool WindowNeedsRank(const BoundWindowExpression &wexpr) {
|
319
|
-
return wexpr.type == ExpressionType::WINDOW_PERCENT_RANK || wexpr.type == ExpressionType::WINDOW_RANK ||
|
320
|
-
wexpr.type == ExpressionType::WINDOW_RANK_DENSE || wexpr.type == ExpressionType::WINDOW_CUME_DIST;
|
321
|
-
}
|
322
|
-
|
323
|
-
template <typename T>
|
324
|
-
static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
|
325
|
-
D_ASSERT(chunk.ColumnCount() > column);
|
326
|
-
auto &source = chunk.data[column];
|
327
|
-
const auto data = FlatVector::GetData<T>(source);
|
328
|
-
return data[index];
|
329
|
-
}
|
330
|
-
|
331
|
-
static bool CellIsNull(DataChunk &chunk, idx_t column, idx_t index) {
|
332
|
-
D_ASSERT(chunk.ColumnCount() > column);
|
333
|
-
auto &source = chunk.data[column];
|
334
|
-
return FlatVector::IsNull(source, index);
|
335
|
-
}
|
336
|
-
|
337
|
-
static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target, idx_t target_offset) {
|
338
|
-
D_ASSERT(chunk.ColumnCount() > column);
|
339
|
-
auto &source = chunk.data[column];
|
340
|
-
VectorOperations::Copy(source, target, index + 1, index, target_offset);
|
341
|
-
}
|
342
|
-
|
343
|
-
template <typename T>
|
344
|
-
struct WindowColumnIterator {
|
345
|
-
using iterator = WindowColumnIterator<T>;
|
346
|
-
using iterator_category = std::forward_iterator_tag;
|
347
|
-
using difference_type = std::ptrdiff_t;
|
348
|
-
using value_type = T;
|
349
|
-
using reference = T;
|
350
|
-
using pointer = idx_t;
|
351
|
-
|
352
|
-
explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
|
353
|
-
}
|
354
|
-
|
355
|
-
inline reference operator*() const {
|
356
|
-
return coll->GetCell<T>(pos);
|
357
|
-
}
|
358
|
-
inline explicit operator pointer() const {
|
359
|
-
return pos;
|
360
|
-
}
|
361
|
-
|
362
|
-
inline iterator &operator++() {
|
363
|
-
++pos;
|
364
|
-
return *this;
|
365
|
-
}
|
366
|
-
inline iterator operator++(int) {
|
367
|
-
auto result = *this;
|
368
|
-
++(*this);
|
369
|
-
return result;
|
370
|
-
}
|
371
|
-
|
372
|
-
friend inline bool operator==(const iterator &a, const iterator &b) {
|
373
|
-
return a.pos == b.pos;
|
374
|
-
}
|
375
|
-
friend inline bool operator!=(const iterator &a, const iterator &b) {
|
376
|
-
return a.pos != b.pos;
|
377
|
-
}
|
378
|
-
|
379
|
-
private:
|
380
|
-
optional_ptr<WindowInputColumn> coll;
|
381
|
-
pointer pos;
|
382
|
-
};
|
383
|
-
|
384
|
-
template <typename T, typename OP>
|
385
|
-
struct OperationCompare : public std::function<bool(T, T)> {
|
386
|
-
inline bool operator()(const T &lhs, const T &val) const {
|
387
|
-
return OP::template Operation(lhs, val);
|
388
|
-
}
|
389
|
-
};
|
390
|
-
|
391
|
-
template <typename T, typename OP, bool FROM>
|
392
|
-
static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
393
|
-
WindowInputExpression &boundary, const idx_t boundary_row) {
|
394
|
-
D_ASSERT(!boundary.CellIsNull(boundary_row));
|
395
|
-
const auto val = boundary.GetCell<T>(boundary_row);
|
396
|
-
|
397
|
-
OperationCompare<T, OP> comp;
|
398
|
-
WindowColumnIterator<T> begin(over, order_begin);
|
399
|
-
WindowColumnIterator<T> end(over, order_end);
|
400
|
-
if (FROM) {
|
401
|
-
return idx_t(std::lower_bound(begin, end, val, comp));
|
402
|
-
} else {
|
403
|
-
return idx_t(std::upper_bound(begin, end, val, comp));
|
404
|
-
}
|
405
|
-
}
|
406
|
-
|
407
|
-
template <typename OP, bool FROM>
|
408
|
-
static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
409
|
-
WindowInputExpression &boundary, const idx_t expr_idx) {
|
410
|
-
D_ASSERT(boundary.chunk.ColumnCount() == 1);
|
411
|
-
D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
|
412
|
-
|
413
|
-
switch (over.input_expr.ptype) {
|
414
|
-
case PhysicalType::INT8:
|
415
|
-
return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
416
|
-
case PhysicalType::INT16:
|
417
|
-
return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
418
|
-
case PhysicalType::INT32:
|
419
|
-
return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
420
|
-
case PhysicalType::INT64:
|
421
|
-
return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
422
|
-
case PhysicalType::UINT8:
|
423
|
-
return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
424
|
-
case PhysicalType::UINT16:
|
425
|
-
return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
426
|
-
case PhysicalType::UINT32:
|
427
|
-
return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
428
|
-
case PhysicalType::UINT64:
|
429
|
-
return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
430
|
-
case PhysicalType::INT128:
|
431
|
-
return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
432
|
-
case PhysicalType::FLOAT:
|
433
|
-
return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
434
|
-
case PhysicalType::DOUBLE:
|
435
|
-
return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
436
|
-
case PhysicalType::INTERVAL:
|
437
|
-
return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
438
|
-
default:
|
439
|
-
throw InternalException("Unsupported column type for RANGE");
|
440
|
-
}
|
441
|
-
}
|
442
|
-
|
443
|
-
template <bool FROM>
|
444
|
-
static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
|
445
|
-
const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
|
446
|
-
switch (range_sense) {
|
447
|
-
case OrderType::ASCENDING:
|
448
|
-
return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
449
|
-
case OrderType::DESCENDING:
|
450
|
-
return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
451
|
-
default:
|
452
|
-
throw InternalException("Unsupported ORDER BY sense for RANGE");
|
453
|
-
}
|
454
|
-
}
|
455
|
-
|
456
|
-
void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t expr_idx,
|
457
|
-
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
458
|
-
const ValidityMask &partition_mask, const ValidityMask &order_mask) {
|
459
|
-
|
460
|
-
auto &bounds = *this;
|
461
|
-
if (bounds.partition_count + bounds.order_count > 0) {
|
462
|
-
|
463
|
-
// determine partition and peer group boundaries to ultimately figure out window size
|
464
|
-
bounds.is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
|
465
|
-
bounds.is_peer = !order_mask.RowIsValidUnsafe(row_idx);
|
466
|
-
|
467
|
-
// when the partition changes, recompute the boundaries
|
468
|
-
if (!bounds.is_same_partition) {
|
469
|
-
bounds.partition_start = row_idx;
|
470
|
-
bounds.peer_start = row_idx;
|
471
|
-
|
472
|
-
// find end of partition
|
473
|
-
bounds.partition_end = bounds.input_size;
|
474
|
-
if (bounds.partition_count) {
|
475
|
-
idx_t n = 1;
|
476
|
-
bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, bounds.input_size, n);
|
477
|
-
}
|
478
|
-
|
479
|
-
// Find valid ordering values for the new partition
|
480
|
-
// so we can exclude NULLs from RANGE expression computations
|
481
|
-
bounds.valid_start = bounds.partition_start;
|
482
|
-
bounds.valid_end = bounds.partition_end;
|
483
|
-
|
484
|
-
if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
|
485
|
-
// Exclude any leading NULLs
|
486
|
-
if (range_collection.CellIsNull(bounds.valid_start)) {
|
487
|
-
idx_t n = 1;
|
488
|
-
bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
|
489
|
-
}
|
490
|
-
}
|
491
|
-
|
492
|
-
if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
|
493
|
-
// Exclude any trailing NULLs
|
494
|
-
if (range_collection.CellIsNull(bounds.valid_end - 1)) {
|
495
|
-
idx_t n = 1;
|
496
|
-
bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
|
497
|
-
}
|
498
|
-
}
|
499
|
-
|
500
|
-
} else if (!bounds.is_peer) {
|
501
|
-
bounds.peer_start = row_idx;
|
502
|
-
}
|
503
|
-
|
504
|
-
if (bounds.needs_peer) {
|
505
|
-
bounds.peer_end = bounds.partition_end;
|
506
|
-
if (bounds.order_count) {
|
507
|
-
idx_t n = 1;
|
508
|
-
bounds.peer_end = FindNextStart(order_mask, bounds.peer_start + 1, bounds.partition_end, n);
|
509
|
-
}
|
510
|
-
}
|
511
|
-
|
512
|
-
} else {
|
513
|
-
bounds.is_same_partition = false;
|
514
|
-
bounds.is_peer = true;
|
515
|
-
bounds.partition_end = bounds.input_size;
|
516
|
-
bounds.peer_end = bounds.partition_end;
|
517
|
-
}
|
518
|
-
|
519
|
-
// determine window boundaries depending on the type of expression
|
520
|
-
bounds.window_start = -1;
|
521
|
-
bounds.window_end = -1;
|
522
|
-
|
523
|
-
switch (bounds.start_boundary) {
|
524
|
-
case WindowBoundary::UNBOUNDED_PRECEDING:
|
525
|
-
bounds.window_start = bounds.partition_start;
|
526
|
-
break;
|
527
|
-
case WindowBoundary::CURRENT_ROW_ROWS:
|
528
|
-
bounds.window_start = row_idx;
|
529
|
-
break;
|
530
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
531
|
-
bounds.window_start = bounds.peer_start;
|
532
|
-
break;
|
533
|
-
case WindowBoundary::EXPR_PRECEDING_ROWS: {
|
534
|
-
if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
|
535
|
-
bounds.window_start)) {
|
536
|
-
throw OutOfRangeException("Overflow computing ROWS PRECEDING start");
|
537
|
-
}
|
538
|
-
break;
|
539
|
-
}
|
540
|
-
case WindowBoundary::EXPR_FOLLOWING_ROWS: {
|
541
|
-
if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
|
542
|
-
bounds.window_start)) {
|
543
|
-
throw OutOfRangeException("Overflow computing ROWS FOLLOWING start");
|
544
|
-
}
|
545
|
-
break;
|
546
|
-
}
|
547
|
-
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
548
|
-
if (boundary_start.CellIsNull(expr_idx)) {
|
549
|
-
bounds.window_start = bounds.peer_start;
|
550
|
-
} else {
|
551
|
-
bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, bounds.valid_start,
|
552
|
-
row_idx, boundary_start, expr_idx);
|
553
|
-
}
|
554
|
-
break;
|
555
|
-
}
|
556
|
-
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
557
|
-
if (boundary_start.CellIsNull(expr_idx)) {
|
558
|
-
bounds.window_start = bounds.peer_start;
|
559
|
-
} else {
|
560
|
-
bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, row_idx,
|
561
|
-
bounds.valid_end, boundary_start, expr_idx);
|
562
|
-
}
|
82
|
+
static unique_ptr<WindowExecutor> WindowExecutorFactory(BoundWindowExpression &wexpr, ClientContext &context,
|
83
|
+
const ValidityMask &partition_mask,
|
84
|
+
const ValidityMask &order_mask, const idx_t payload_count,
|
85
|
+
WindowAggregationMode mode) {
|
86
|
+
switch (wexpr.type) {
|
87
|
+
case ExpressionType::WINDOW_AGGREGATE:
|
88
|
+
return make_uniq<WindowAggregateExecutor>(wexpr, context, payload_count, partition_mask, order_mask, mode);
|
89
|
+
case ExpressionType::WINDOW_ROW_NUMBER:
|
90
|
+
return make_uniq<WindowRowNumberExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
91
|
+
case ExpressionType::WINDOW_RANK_DENSE:
|
92
|
+
return make_uniq<WindowDenseRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
93
|
+
case ExpressionType::WINDOW_RANK:
|
94
|
+
return make_uniq<WindowRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
95
|
+
case ExpressionType::WINDOW_PERCENT_RANK:
|
96
|
+
return make_uniq<WindowPercentRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
97
|
+
case ExpressionType::WINDOW_CUME_DIST:
|
98
|
+
return make_uniq<WindowCumeDistExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
99
|
+
case ExpressionType::WINDOW_NTILE:
|
100
|
+
return make_uniq<WindowNtileExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
101
|
+
case ExpressionType::WINDOW_LEAD:
|
102
|
+
case ExpressionType::WINDOW_LAG:
|
103
|
+
return make_uniq<WindowLeadLagExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
104
|
+
case ExpressionType::WINDOW_FIRST_VALUE:
|
105
|
+
return make_uniq<WindowFirstValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
106
|
+
case ExpressionType::WINDOW_LAST_VALUE:
|
107
|
+
return make_uniq<WindowLastValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
108
|
+
case ExpressionType::WINDOW_NTH_VALUE:
|
109
|
+
return make_uniq<WindowNthValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
563
110
|
break;
|
564
|
-
}
|
565
111
|
default:
|
566
|
-
throw InternalException("
|
567
|
-
}
|
568
|
-
|
569
|
-
switch (bounds.end_boundary) {
|
570
|
-
case WindowBoundary::CURRENT_ROW_ROWS:
|
571
|
-
bounds.window_end = row_idx + 1;
|
572
|
-
break;
|
573
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
574
|
-
bounds.window_end = bounds.peer_end;
|
575
|
-
break;
|
576
|
-
case WindowBoundary::UNBOUNDED_FOLLOWING:
|
577
|
-
bounds.window_end = bounds.partition_end;
|
578
|
-
break;
|
579
|
-
case WindowBoundary::EXPR_PRECEDING_ROWS:
|
580
|
-
if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
|
581
|
-
bounds.window_end)) {
|
582
|
-
throw OutOfRangeException("Overflow computing ROWS PRECEDING end");
|
583
|
-
}
|
584
|
-
break;
|
585
|
-
case WindowBoundary::EXPR_FOLLOWING_ROWS:
|
586
|
-
if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
|
587
|
-
bounds.window_end)) {
|
588
|
-
throw OutOfRangeException("Overflow computing ROWS FOLLOWING end");
|
589
|
-
}
|
590
|
-
break;
|
591
|
-
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
592
|
-
if (boundary_end.CellIsNull(expr_idx)) {
|
593
|
-
bounds.window_end = bounds.peer_end;
|
594
|
-
} else {
|
595
|
-
bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, bounds.valid_start,
|
596
|
-
row_idx, boundary_end, expr_idx);
|
597
|
-
}
|
598
|
-
break;
|
599
|
-
}
|
600
|
-
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
601
|
-
if (boundary_end.CellIsNull(expr_idx)) {
|
602
|
-
bounds.window_end = bounds.peer_end;
|
603
|
-
} else {
|
604
|
-
bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, row_idx,
|
605
|
-
bounds.valid_end, boundary_end, expr_idx);
|
606
|
-
}
|
607
|
-
break;
|
608
|
-
}
|
609
|
-
default:
|
610
|
-
throw InternalException("Unsupported window end boundary");
|
611
|
-
}
|
612
|
-
|
613
|
-
// clamp windows to partitions if they should exceed
|
614
|
-
if (bounds.window_start < (int64_t)bounds.partition_start) {
|
615
|
-
bounds.window_start = bounds.partition_start;
|
616
|
-
}
|
617
|
-
if (bounds.window_start > (int64_t)bounds.partition_end) {
|
618
|
-
bounds.window_start = bounds.partition_end;
|
619
|
-
}
|
620
|
-
if (bounds.window_end < (int64_t)bounds.partition_start) {
|
621
|
-
bounds.window_end = bounds.partition_start;
|
622
|
-
}
|
623
|
-
if (bounds.window_end > (int64_t)bounds.partition_end) {
|
624
|
-
bounds.window_end = bounds.partition_end;
|
625
|
-
}
|
626
|
-
|
627
|
-
if (bounds.window_start < 0 || bounds.window_end < 0) {
|
628
|
-
throw InternalException("Failed to compute window boundaries");
|
112
|
+
throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
|
629
113
|
}
|
630
114
|
}
|
631
115
|
|
632
|
-
struct WindowExecutor {
|
633
|
-
static bool IsConstantAggregate(const BoundWindowExpression &wexpr);
|
634
|
-
|
635
|
-
WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
|
636
|
-
const idx_t count);
|
637
|
-
|
638
|
-
void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
|
639
|
-
void Finalize(WindowAggregationMode mode);
|
640
|
-
|
641
|
-
void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
642
|
-
const ValidityMask &order_mask);
|
643
|
-
|
644
|
-
// The function
|
645
|
-
BoundWindowExpression &wexpr;
|
646
|
-
|
647
|
-
// Frame management
|
648
|
-
WindowBoundariesState bounds;
|
649
|
-
uint64_t dense_rank = 1;
|
650
|
-
uint64_t rank_equal = 0;
|
651
|
-
uint64_t rank = 1;
|
652
|
-
|
653
|
-
// Expression collections
|
654
|
-
DataChunk payload_collection;
|
655
|
-
ExpressionExecutor payload_executor;
|
656
|
-
DataChunk payload_chunk;
|
657
|
-
|
658
|
-
ExpressionExecutor filter_executor;
|
659
|
-
ValidityMask filter_mask;
|
660
|
-
vector<validity_t> filter_bits;
|
661
|
-
SelectionVector filter_sel;
|
662
|
-
|
663
|
-
// LEAD/LAG Evaluation
|
664
|
-
WindowInputExpression leadlag_offset;
|
665
|
-
WindowInputExpression leadlag_default;
|
666
|
-
|
667
|
-
// evaluate boundaries if present. Parser has checked boundary types.
|
668
|
-
WindowInputExpression boundary_start;
|
669
|
-
WindowInputExpression boundary_end;
|
670
|
-
|
671
|
-
// evaluate RANGE expressions, if needed
|
672
|
-
WindowInputColumn range;
|
673
|
-
|
674
|
-
// IGNORE NULLS
|
675
|
-
ValidityMask ignore_nulls;
|
676
|
-
|
677
|
-
// build a segment tree for frame-adhering aggregates
|
678
|
-
// see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
|
679
|
-
unique_ptr<WindowSegmentTree> segment_tree = nullptr;
|
680
|
-
|
681
|
-
// all aggregate values are the same for each partition
|
682
|
-
unique_ptr<WindowConstantAggregate> constant_aggregate = nullptr;
|
683
|
-
};
|
684
|
-
|
685
|
-
bool WindowExecutor::IsConstantAggregate(const BoundWindowExpression &wexpr) {
|
686
|
-
if (!wexpr.aggregate) {
|
687
|
-
return false;
|
688
|
-
}
|
689
|
-
|
690
|
-
// COUNT(*) is already handled efficiently by segment trees.
|
691
|
-
if (wexpr.children.empty()) {
|
692
|
-
return false;
|
693
|
-
}
|
694
|
-
|
695
|
-
/*
|
696
|
-
The default framing option is RANGE UNBOUNDED PRECEDING, which
|
697
|
-
is the same as RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
|
698
|
-
ROW; it sets the frame to be all rows from the partition start
|
699
|
-
up through the current row's last peer (a row that the window's
|
700
|
-
ORDER BY clause considers equivalent to the current row; all
|
701
|
-
rows are peers if there is no ORDER BY). In general, UNBOUNDED
|
702
|
-
PRECEDING means that the frame starts with the first row of the
|
703
|
-
partition, and similarly UNBOUNDED FOLLOWING means that the
|
704
|
-
frame ends with the last row of the partition, regardless of
|
705
|
-
RANGE, ROWS or GROUPS mode. In ROWS mode, CURRENT ROW means that
|
706
|
-
the frame starts or ends with the current row; but in RANGE or
|
707
|
-
GROUPS mode it means that the frame starts or ends with the
|
708
|
-
current row's first or last peer in the ORDER BY ordering. The
|
709
|
-
offset PRECEDING and offset FOLLOWING options vary in meaning
|
710
|
-
depending on the frame mode.
|
711
|
-
*/
|
712
|
-
switch (wexpr.start) {
|
713
|
-
case WindowBoundary::UNBOUNDED_PRECEDING:
|
714
|
-
break;
|
715
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
716
|
-
if (!wexpr.orders.empty()) {
|
717
|
-
return false;
|
718
|
-
}
|
719
|
-
break;
|
720
|
-
default:
|
721
|
-
return false;
|
722
|
-
}
|
723
|
-
|
724
|
-
switch (wexpr.end) {
|
725
|
-
case WindowBoundary::UNBOUNDED_FOLLOWING:
|
726
|
-
break;
|
727
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
728
|
-
if (!wexpr.orders.empty()) {
|
729
|
-
return false;
|
730
|
-
}
|
731
|
-
break;
|
732
|
-
default:
|
733
|
-
return false;
|
734
|
-
}
|
735
|
-
|
736
|
-
return true;
|
737
|
-
}
|
738
|
-
|
739
|
-
WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
|
740
|
-
const idx_t count)
|
741
|
-
: wexpr(wexpr), bounds(wexpr, count), payload_collection(), payload_executor(context), filter_executor(context),
|
742
|
-
leadlag_offset(wexpr.offset_expr.get(), context), leadlag_default(wexpr.default_expr.get(), context),
|
743
|
-
boundary_start(wexpr.start_expr.get(), context), boundary_end(wexpr.end_expr.get(), context),
|
744
|
-
range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr.orders[0].expression.get() : nullptr,
|
745
|
-
context, count)
|
746
|
-
|
747
|
-
{
|
748
|
-
// TODO we could evaluate those expressions in parallel
|
749
|
-
|
750
|
-
// Check for constant aggregate
|
751
|
-
if (IsConstantAggregate(wexpr)) {
|
752
|
-
constant_aggregate =
|
753
|
-
make_uniq<WindowConstantAggregate>(AggregateObject(wexpr), wexpr.return_type, partition_mask, count);
|
754
|
-
}
|
755
|
-
|
756
|
-
// evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
|
757
|
-
if (wexpr.filter_expr) {
|
758
|
-
// Start with all invalid and set the ones that pass
|
759
|
-
filter_bits.resize(ValidityMask::ValidityMaskSize(count), 0);
|
760
|
-
filter_mask.Initialize(filter_bits.data());
|
761
|
-
filter_executor.AddExpression(*wexpr.filter_expr);
|
762
|
-
filter_sel.Initialize(STANDARD_VECTOR_SIZE);
|
763
|
-
}
|
764
|
-
|
765
|
-
// TODO: child may be a scalar, don't need to materialize the whole collection then
|
766
|
-
|
767
|
-
// evaluate inner expressions of window functions, could be more complex
|
768
|
-
PrepareInputExpressions(wexpr.children, payload_executor, payload_chunk);
|
769
|
-
|
770
|
-
auto types = payload_chunk.GetTypes();
|
771
|
-
if (!types.empty()) {
|
772
|
-
payload_collection.Initialize(Allocator::Get(context), types);
|
773
|
-
}
|
774
|
-
}
|
775
|
-
|
776
|
-
void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
|
777
|
-
// Single pass over the input to produce the global data.
|
778
|
-
// Vectorisation for the win...
|
779
|
-
|
780
|
-
// Set up a validity mask for IGNORE NULLS
|
781
|
-
bool check_nulls = false;
|
782
|
-
if (wexpr.ignore_nulls) {
|
783
|
-
switch (wexpr.type) {
|
784
|
-
case ExpressionType::WINDOW_LEAD:
|
785
|
-
case ExpressionType::WINDOW_LAG:
|
786
|
-
case ExpressionType::WINDOW_FIRST_VALUE:
|
787
|
-
case ExpressionType::WINDOW_LAST_VALUE:
|
788
|
-
case ExpressionType::WINDOW_NTH_VALUE:
|
789
|
-
check_nulls = true;
|
790
|
-
break;
|
791
|
-
default:
|
792
|
-
break;
|
793
|
-
}
|
794
|
-
}
|
795
|
-
|
796
|
-
const auto count = input_chunk.size();
|
797
|
-
|
798
|
-
idx_t filtered = 0;
|
799
|
-
SelectionVector *filtering = nullptr;
|
800
|
-
if (wexpr.filter_expr) {
|
801
|
-
filtering = &filter_sel;
|
802
|
-
filtered = filter_executor.SelectExpression(input_chunk, filter_sel);
|
803
|
-
for (idx_t f = 0; f < filtered; ++f) {
|
804
|
-
filter_mask.SetValid(input_idx + filter_sel[f]);
|
805
|
-
}
|
806
|
-
}
|
807
|
-
|
808
|
-
if (!wexpr.children.empty()) {
|
809
|
-
payload_chunk.Reset();
|
810
|
-
payload_executor.Execute(input_chunk, payload_chunk);
|
811
|
-
payload_chunk.Verify();
|
812
|
-
if (constant_aggregate) {
|
813
|
-
constant_aggregate->Sink(payload_chunk, filtering, filtered);
|
814
|
-
} else {
|
815
|
-
payload_collection.Append(payload_chunk, true);
|
816
|
-
}
|
817
|
-
|
818
|
-
// process payload chunks while they are still piping hot
|
819
|
-
if (check_nulls) {
|
820
|
-
UnifiedVectorFormat vdata;
|
821
|
-
payload_chunk.data[0].ToUnifiedFormat(count, vdata);
|
822
|
-
if (!vdata.validity.AllValid()) {
|
823
|
-
// Lazily materialise the contents when we find the first NULL
|
824
|
-
if (ignore_nulls.AllValid()) {
|
825
|
-
ignore_nulls.Initialize(total_count);
|
826
|
-
}
|
827
|
-
// Write to the current position
|
828
|
-
if (input_idx % ValidityMask::BITS_PER_VALUE == 0) {
|
829
|
-
// If we are at the edge of an output entry, just copy the entries
|
830
|
-
auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
|
831
|
-
auto src = vdata.validity.GetData();
|
832
|
-
for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
|
833
|
-
*dst++ = *src++;
|
834
|
-
}
|
835
|
-
} else {
|
836
|
-
// If not, we have ragged data and need to copy one bit at a time.
|
837
|
-
for (idx_t i = 0; i < count; ++i) {
|
838
|
-
ignore_nulls.Set(input_idx + i, vdata.validity.RowIsValid(i));
|
839
|
-
}
|
840
|
-
}
|
841
|
-
}
|
842
|
-
}
|
843
|
-
}
|
844
|
-
|
845
|
-
range.Append(input_chunk);
|
846
|
-
}
|
847
|
-
|
848
|
-
void WindowExecutor::Finalize(WindowAggregationMode mode) {
|
849
|
-
// build a segment tree for frame-adhering aggregates
|
850
|
-
// see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
|
851
|
-
if (constant_aggregate) {
|
852
|
-
constant_aggregate->Finalize();
|
853
|
-
} else if (wexpr.aggregate) {
|
854
|
-
segment_tree = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, &payload_collection,
|
855
|
-
filter_mask, mode);
|
856
|
-
}
|
857
|
-
}
|
858
|
-
|
859
|
-
void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
860
|
-
const ValidityMask &order_mask) {
|
861
|
-
// Evaluate the row-level arguments
|
862
|
-
boundary_start.Execute(input_chunk);
|
863
|
-
boundary_end.Execute(input_chunk);
|
864
|
-
|
865
|
-
leadlag_offset.Execute(input_chunk);
|
866
|
-
leadlag_default.Execute(input_chunk);
|
867
|
-
|
868
|
-
// this is the main loop, go through all sorted rows and compute window function result
|
869
|
-
for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
|
870
|
-
// special case, OVER (), aggregate over everything
|
871
|
-
bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
|
872
|
-
if (WindowNeedsRank(wexpr)) {
|
873
|
-
if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
|
874
|
-
dense_rank = 1;
|
875
|
-
rank = 1;
|
876
|
-
rank_equal = 0;
|
877
|
-
} else if (!bounds.is_peer) {
|
878
|
-
dense_rank++;
|
879
|
-
rank += rank_equal;
|
880
|
-
rank_equal = 0;
|
881
|
-
}
|
882
|
-
rank_equal++;
|
883
|
-
}
|
884
|
-
|
885
|
-
// if no values are read for window, result is NULL
|
886
|
-
if (bounds.window_start >= bounds.window_end) {
|
887
|
-
FlatVector::SetNull(result, output_offset, true);
|
888
|
-
continue;
|
889
|
-
}
|
890
|
-
|
891
|
-
switch (wexpr.type) {
|
892
|
-
case ExpressionType::WINDOW_AGGREGATE: {
|
893
|
-
if (constant_aggregate) {
|
894
|
-
constant_aggregate->Compute(result, output_offset, bounds.window_start, bounds.window_end);
|
895
|
-
} else {
|
896
|
-
segment_tree->Compute(result, output_offset, bounds.window_start, bounds.window_end);
|
897
|
-
}
|
898
|
-
break;
|
899
|
-
}
|
900
|
-
case ExpressionType::WINDOW_ROW_NUMBER: {
|
901
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
902
|
-
rdata[output_offset] = row_idx - bounds.partition_start + 1;
|
903
|
-
break;
|
904
|
-
}
|
905
|
-
case ExpressionType::WINDOW_RANK_DENSE: {
|
906
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
907
|
-
rdata[output_offset] = dense_rank;
|
908
|
-
break;
|
909
|
-
}
|
910
|
-
case ExpressionType::WINDOW_RANK: {
|
911
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
912
|
-
rdata[output_offset] = rank;
|
913
|
-
break;
|
914
|
-
}
|
915
|
-
case ExpressionType::WINDOW_PERCENT_RANK: {
|
916
|
-
int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start - 1;
|
917
|
-
double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0;
|
918
|
-
auto rdata = FlatVector::GetData<double>(result);
|
919
|
-
rdata[output_offset] = percent_rank;
|
920
|
-
break;
|
921
|
-
}
|
922
|
-
case ExpressionType::WINDOW_CUME_DIST: {
|
923
|
-
int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start;
|
924
|
-
double cume_dist = denom > 0 ? ((double)(bounds.peer_end - bounds.partition_start)) / denom : 0;
|
925
|
-
auto rdata = FlatVector::GetData<double>(result);
|
926
|
-
rdata[output_offset] = cume_dist;
|
927
|
-
break;
|
928
|
-
}
|
929
|
-
case ExpressionType::WINDOW_NTILE: {
|
930
|
-
D_ASSERT(payload_collection.ColumnCount() == 1);
|
931
|
-
if (CellIsNull(payload_collection, 0, row_idx)) {
|
932
|
-
FlatVector::SetNull(result, output_offset, true);
|
933
|
-
} else {
|
934
|
-
auto n_param = GetCell<int64_t>(payload_collection, 0, row_idx);
|
935
|
-
if (n_param < 1) {
|
936
|
-
throw InvalidInputException("Argument for ntile must be greater than zero");
|
937
|
-
}
|
938
|
-
// With thanks from SQLite's ntileValueFunc()
|
939
|
-
int64_t n_total = bounds.partition_end - bounds.partition_start;
|
940
|
-
if (n_param > n_total) {
|
941
|
-
// more groups allowed than we have values
|
942
|
-
// map every entry to a unique group
|
943
|
-
n_param = n_total;
|
944
|
-
}
|
945
|
-
int64_t n_size = (n_total / n_param);
|
946
|
-
// find the row idx within the group
|
947
|
-
D_ASSERT(row_idx >= bounds.partition_start);
|
948
|
-
int64_t adjusted_row_idx = row_idx - bounds.partition_start;
|
949
|
-
// now compute the ntile
|
950
|
-
int64_t n_large = n_total - n_param * n_size;
|
951
|
-
int64_t i_small = n_large * (n_size + 1);
|
952
|
-
int64_t result_ntile;
|
953
|
-
|
954
|
-
D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
|
955
|
-
|
956
|
-
if (adjusted_row_idx < i_small) {
|
957
|
-
result_ntile = 1 + adjusted_row_idx / (n_size + 1);
|
958
|
-
} else {
|
959
|
-
result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
|
960
|
-
}
|
961
|
-
// result has to be between [1, NTILE]
|
962
|
-
D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
|
963
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
964
|
-
rdata[output_offset] = result_ntile;
|
965
|
-
}
|
966
|
-
break;
|
967
|
-
}
|
968
|
-
case ExpressionType::WINDOW_LEAD:
|
969
|
-
case ExpressionType::WINDOW_LAG: {
|
970
|
-
int64_t offset = 1;
|
971
|
-
if (wexpr.offset_expr) {
|
972
|
-
offset = leadlag_offset.GetCell<int64_t>(output_offset);
|
973
|
-
}
|
974
|
-
int64_t val_idx = (int64_t)row_idx;
|
975
|
-
if (wexpr.type == ExpressionType::WINDOW_LEAD) {
|
976
|
-
val_idx += offset;
|
977
|
-
} else {
|
978
|
-
val_idx -= offset;
|
979
|
-
}
|
980
|
-
|
981
|
-
idx_t delta = 0;
|
982
|
-
if (val_idx < (int64_t)row_idx) {
|
983
|
-
// Count backwards
|
984
|
-
delta = idx_t(row_idx - val_idx);
|
985
|
-
val_idx = FindPrevStart(ignore_nulls, bounds.partition_start, row_idx, delta);
|
986
|
-
} else if (val_idx > (int64_t)row_idx) {
|
987
|
-
delta = idx_t(val_idx - row_idx);
|
988
|
-
val_idx = FindNextStart(ignore_nulls, row_idx + 1, bounds.partition_end, delta);
|
989
|
-
}
|
990
|
-
// else offset is zero, so don't move.
|
991
|
-
|
992
|
-
if (!delta) {
|
993
|
-
CopyCell(payload_collection, 0, val_idx, result, output_offset);
|
994
|
-
} else if (wexpr.default_expr) {
|
995
|
-
leadlag_default.CopyCell(result, output_offset);
|
996
|
-
} else {
|
997
|
-
FlatVector::SetNull(result, output_offset, true);
|
998
|
-
}
|
999
|
-
break;
|
1000
|
-
}
|
1001
|
-
case ExpressionType::WINDOW_FIRST_VALUE: {
|
1002
|
-
// Same as NTH_VALUE(..., 1)
|
1003
|
-
idx_t n = 1;
|
1004
|
-
const auto first_idx = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
|
1005
|
-
if (!n) {
|
1006
|
-
CopyCell(payload_collection, 0, first_idx, result, output_offset);
|
1007
|
-
} else {
|
1008
|
-
FlatVector::SetNull(result, output_offset, true);
|
1009
|
-
}
|
1010
|
-
break;
|
1011
|
-
}
|
1012
|
-
case ExpressionType::WINDOW_LAST_VALUE: {
|
1013
|
-
idx_t n = 1;
|
1014
|
-
const auto last_idx = FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
|
1015
|
-
if (!n) {
|
1016
|
-
CopyCell(payload_collection, 0, last_idx, result, output_offset);
|
1017
|
-
} else {
|
1018
|
-
FlatVector::SetNull(result, output_offset, true);
|
1019
|
-
}
|
1020
|
-
break;
|
1021
|
-
}
|
1022
|
-
case ExpressionType::WINDOW_NTH_VALUE: {
|
1023
|
-
D_ASSERT(payload_collection.ColumnCount() == 2);
|
1024
|
-
// Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
|
1025
|
-
// returns NULL if there is no such row.
|
1026
|
-
if (CellIsNull(payload_collection, 1, row_idx)) {
|
1027
|
-
FlatVector::SetNull(result, output_offset, true);
|
1028
|
-
} else {
|
1029
|
-
auto n_param = GetCell<int64_t>(payload_collection, 1, row_idx);
|
1030
|
-
if (n_param < 1) {
|
1031
|
-
FlatVector::SetNull(result, output_offset, true);
|
1032
|
-
} else {
|
1033
|
-
auto n = idx_t(n_param);
|
1034
|
-
const auto nth_index = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
|
1035
|
-
if (!n) {
|
1036
|
-
CopyCell(payload_collection, 0, nth_index, result, output_offset);
|
1037
|
-
} else {
|
1038
|
-
FlatVector::SetNull(result, output_offset, true);
|
1039
|
-
}
|
1040
|
-
}
|
1041
|
-
}
|
1042
|
-
break;
|
1043
|
-
}
|
1044
|
-
default:
|
1045
|
-
throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
|
1046
|
-
}
|
1047
|
-
}
|
1048
|
-
|
1049
|
-
result.Verify(input_chunk.size());
|
1050
|
-
}
|
1051
|
-
|
1052
116
|
//===--------------------------------------------------------------------===//
|
1053
117
|
// Sink
|
1054
118
|
//===--------------------------------------------------------------------===//
|
@@ -1060,9 +124,11 @@ SinkResultType PhysicalWindow::Sink(ExecutionContext &context, DataChunk &chunk,
|
|
1060
124
|
return SinkResultType::NEED_MORE_INPUT;
|
1061
125
|
}
|
1062
126
|
|
1063
|
-
|
1064
|
-
auto &lstate =
|
127
|
+
SinkCombineResultType PhysicalWindow::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
|
128
|
+
auto &lstate = input.local_state.Cast<WindowLocalSinkState>();
|
1065
129
|
lstate.Combine();
|
130
|
+
|
131
|
+
return SinkCombineResultType::FINISHED;
|
1066
132
|
}
|
1067
133
|
|
1068
134
|
unique_ptr<LocalSinkState> PhysicalWindow::GetLocalSinkState(ExecutionContext &context) const {
|
@@ -1075,8 +141,8 @@ unique_ptr<GlobalSinkState> PhysicalWindow::GetGlobalSinkState(ClientContext &co
|
|
1075
141
|
}
|
1076
142
|
|
1077
143
|
SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
1078
|
-
|
1079
|
-
auto &state =
|
144
|
+
OperatorSinkFinalizeInput &input) const {
|
145
|
+
auto &state = input.global_state.Cast<WindowGlobalSinkState>();
|
1080
146
|
|
1081
147
|
// Did we get any data?
|
1082
148
|
if (!state.global_partition->count) {
|
@@ -1106,64 +172,97 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
1106
172
|
//===--------------------------------------------------------------------===//
|
1107
173
|
// Source
|
1108
174
|
//===--------------------------------------------------------------------===//
|
175
|
+
class WindowPartitionSourceState;
|
176
|
+
|
1109
177
|
class WindowGlobalSourceState : public GlobalSourceState {
|
1110
178
|
public:
|
1111
|
-
|
1112
|
-
|
179
|
+
using HashGroupSourcePtr = unique_ptr<WindowPartitionSourceState>;
|
180
|
+
using ScannerPtr = unique_ptr<RowDataCollectionScanner>;
|
181
|
+
using Task = std::pair<WindowPartitionSourceState *, ScannerPtr>;
|
1113
182
|
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
183
|
+
WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p);
|
184
|
+
|
185
|
+
//! Get the next task
|
186
|
+
Task NextTask(idx_t hash_bin);
|
187
|
+
|
188
|
+
//! Context for executing computations
|
189
|
+
ClientContext &context;
|
190
|
+
//! All the sunk data
|
191
|
+
WindowGlobalSinkState &gsink;
|
192
|
+
//! The next group to build.
|
193
|
+
atomic<idx_t> next_build;
|
194
|
+
//! The built groups
|
195
|
+
vector<HashGroupSourcePtr> built;
|
196
|
+
//! Serialise access to the built hash groups
|
197
|
+
mutable mutex built_lock;
|
198
|
+
//! The number of unfinished tasks
|
199
|
+
atomic<idx_t> tasks_remaining;
|
1117
200
|
|
1118
201
|
public:
|
1119
202
|
idx_t MaxThreads() override {
|
1120
|
-
|
1121
|
-
|
1122
|
-
return 1;
|
1123
|
-
}
|
203
|
+
return tasks_remaining;
|
204
|
+
}
|
1124
205
|
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
206
|
+
private:
|
207
|
+
Task CreateTask(idx_t hash_bin);
|
208
|
+
Task StealWork();
|
209
|
+
};
|
210
|
+
|
211
|
+
WindowGlobalSourceState::WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p)
|
212
|
+
: context(context_p), gsink(gsink_p), next_build(0), tasks_remaining(0) {
|
213
|
+
auto &hash_groups = gsink.global_partition->hash_groups;
|
214
|
+
|
215
|
+
auto &gpart = gsink.global_partition;
|
216
|
+
if (hash_groups.empty()) {
|
217
|
+
// OVER()
|
218
|
+
built.resize(1);
|
219
|
+
if (gpart->rows) {
|
220
|
+
tasks_remaining += gpart->rows->blocks.size();
|
1128
221
|
}
|
222
|
+
} else {
|
223
|
+
built.resize(hash_groups.size());
|
224
|
+
idx_t batch_base = 0;
|
225
|
+
for (auto &hash_group : hash_groups) {
|
226
|
+
if (!hash_group) {
|
227
|
+
continue;
|
228
|
+
}
|
229
|
+
auto &global_sort_state = *hash_group->global_sort;
|
230
|
+
if (global_sort_state.sorted_blocks.empty()) {
|
231
|
+
continue;
|
232
|
+
}
|
233
|
+
|
234
|
+
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
235
|
+
auto &sb = *global_sort_state.sorted_blocks[0];
|
236
|
+
auto &sd = *sb.payload_data;
|
237
|
+
tasks_remaining += sd.data_blocks.size();
|
1129
238
|
|
1130
|
-
|
239
|
+
hash_group->batch_base = batch_base;
|
240
|
+
batch_base += sd.data_blocks.size();
|
241
|
+
}
|
1131
242
|
}
|
1132
|
-
}
|
243
|
+
}
|
1133
244
|
|
1134
|
-
// Per-
|
1135
|
-
class
|
245
|
+
// Per-bin evaluation state (build and evaluate)
|
246
|
+
class WindowPartitionSourceState {
|
1136
247
|
public:
|
1137
248
|
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
1138
|
-
using
|
1139
|
-
using
|
249
|
+
using ExecutorPtr = unique_ptr<WindowExecutor>;
|
250
|
+
using Executors = vector<ExecutorPtr>;
|
1140
251
|
|
1141
|
-
|
1142
|
-
: context(context
|
1143
|
-
|
1144
|
-
vector<LogicalType> output_types;
|
1145
|
-
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1146
|
-
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
1147
|
-
auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
|
1148
|
-
output_types.emplace_back(wexpr.return_type);
|
1149
|
-
}
|
1150
|
-
output_chunk.Initialize(Allocator::Get(context.client), output_types);
|
1151
|
-
|
1152
|
-
const auto &input_types = gsink.payload_types;
|
1153
|
-
layout.Initialize(input_types);
|
1154
|
-
input_chunk.Initialize(gsink.allocator, input_types);
|
252
|
+
WindowPartitionSourceState(ClientContext &context, WindowGlobalSourceState &gsource)
|
253
|
+
: context(context), op(gsource.gsink.op), gsource(gsource), read_block_idx(0), unscanned(0) {
|
254
|
+
layout.Initialize(gsource.gsink.global_partition->payload_types);
|
1155
255
|
}
|
1156
256
|
|
257
|
+
unique_ptr<RowDataCollectionScanner> GetScanner() const;
|
1157
258
|
void MaterializeSortedData();
|
1158
|
-
void
|
1159
|
-
void Scan(DataChunk &chunk);
|
259
|
+
void BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
|
1160
260
|
|
1161
|
-
HashGroupPtr hash_group;
|
1162
261
|
ClientContext &context;
|
1163
262
|
const PhysicalWindow &op;
|
263
|
+
WindowGlobalSourceState &gsource;
|
1164
264
|
|
1165
|
-
|
1166
|
-
|
265
|
+
HashGroupPtr hash_group;
|
1167
266
|
//! The generated input chunks
|
1168
267
|
unique_ptr<RowDataCollection> rows;
|
1169
268
|
unique_ptr<RowDataCollection> heap;
|
@@ -1174,20 +273,21 @@ public:
|
|
1174
273
|
//! The order boundary mask
|
1175
274
|
vector<validity_t> order_bits;
|
1176
275
|
ValidityMask order_mask;
|
276
|
+
//! External paging
|
277
|
+
bool external;
|
1177
278
|
//! The current execution functions
|
1178
|
-
|
279
|
+
Executors executors;
|
1179
280
|
|
1180
|
-
//! The
|
281
|
+
//! The bin number
|
1181
282
|
idx_t hash_bin;
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
DataChunk output_chunk;
|
283
|
+
|
284
|
+
//! The next block to read.
|
285
|
+
mutable atomic<idx_t> read_block_idx;
|
286
|
+
//! The number of remaining unscanned blocks.
|
287
|
+
atomic<idx_t> unscanned;
|
1188
288
|
};
|
1189
289
|
|
1190
|
-
void
|
290
|
+
void WindowPartitionSourceState::MaterializeSortedData() {
|
1191
291
|
auto &global_sort_state = *hash_group->global_sort;
|
1192
292
|
if (global_sort_state.sorted_blocks.empty()) {
|
1193
293
|
return;
|
@@ -1226,7 +326,21 @@ void WindowLocalSourceState::MaterializeSortedData() {
|
|
1226
326
|
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
1227
327
|
}
|
1228
328
|
|
1229
|
-
|
329
|
+
unique_ptr<RowDataCollectionScanner> WindowPartitionSourceState::GetScanner() const {
|
330
|
+
auto &gsink = *gsource.gsink.global_partition;
|
331
|
+
if ((gsink.rows && !hash_bin) || hash_bin < gsink.hash_groups.size()) {
|
332
|
+
const auto block_idx = read_block_idx++;
|
333
|
+
if (block_idx >= rows->blocks.size()) {
|
334
|
+
return nullptr;
|
335
|
+
}
|
336
|
+
// Second pass can flush
|
337
|
+
--gsource.tasks_remaining;
|
338
|
+
return make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, block_idx, true);
|
339
|
+
}
|
340
|
+
return nullptr;
|
341
|
+
}
|
342
|
+
|
343
|
+
void WindowPartitionSourceState::BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
|
1230
344
|
// Get rid of any stale data
|
1231
345
|
hash_bin = hash_bin_p;
|
1232
346
|
|
@@ -1236,11 +350,12 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1236
350
|
// 3. Multiple partitions (sorting and hashing)
|
1237
351
|
|
1238
352
|
// How big is the partition?
|
353
|
+
auto &gpart = *gsource.gsink.global_partition;
|
1239
354
|
idx_t count = 0;
|
1240
|
-
if (hash_bin <
|
1241
|
-
count =
|
1242
|
-
} else if (
|
1243
|
-
count =
|
355
|
+
if (hash_bin < gpart.hash_groups.size() && gpart.hash_groups[hash_bin]) {
|
356
|
+
count = gpart.hash_groups[hash_bin]->count;
|
357
|
+
} else if (gpart.rows && !hash_bin) {
|
358
|
+
count = gpart.count;
|
1244
359
|
} else {
|
1245
360
|
return;
|
1246
361
|
}
|
@@ -1256,19 +371,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1256
371
|
order_mask.Initialize(order_bits.data());
|
1257
372
|
|
1258
373
|
// Scan the sorted data into new Collections
|
1259
|
-
|
1260
|
-
if (
|
374
|
+
external = gpart.external;
|
375
|
+
if (gpart.rows && !hash_bin) {
|
1261
376
|
// Simple mask
|
1262
377
|
partition_mask.SetValidUnsafe(0);
|
1263
378
|
order_mask.SetValidUnsafe(0);
|
1264
379
|
// No partition - align the heap blocks with the row blocks
|
1265
|
-
rows =
|
1266
|
-
heap =
|
1267
|
-
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *
|
380
|
+
rows = gpart.rows->CloneEmpty(gpart.rows->keep_pinned);
|
381
|
+
heap = gpart.strings->CloneEmpty(gpart.strings->keep_pinned);
|
382
|
+
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gpart.rows, *gpart.strings, layout);
|
1268
383
|
external = true;
|
1269
|
-
} else if (hash_bin <
|
384
|
+
} else if (hash_bin < gpart.hash_groups.size()) {
|
1270
385
|
// Overwrite the collections with the sorted data
|
1271
|
-
|
386
|
+
D_ASSERT(gpart.hash_groups[hash_bin].get());
|
387
|
+
hash_group = std::move(gpart.hash_groups[hash_bin]);
|
1272
388
|
hash_group->ComputeMasks(partition_mask, order_mask);
|
1273
389
|
external = hash_group->global_sort->external;
|
1274
390
|
MaterializeSortedData();
|
@@ -1277,17 +393,18 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1277
393
|
}
|
1278
394
|
|
1279
395
|
// Create the executors for each function
|
1280
|
-
|
396
|
+
executors.clear();
|
1281
397
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1282
398
|
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
1283
399
|
auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
|
1284
|
-
auto wexec =
|
1285
|
-
|
400
|
+
auto wexec = WindowExecutorFactory(wexpr, context, partition_mask, order_mask, count, gstate.mode);
|
401
|
+
executors.emplace_back(std::move(wexec));
|
1286
402
|
}
|
1287
403
|
|
1288
404
|
// First pass over the input without flushing
|
1289
|
-
|
1290
|
-
|
405
|
+
DataChunk input_chunk;
|
406
|
+
input_chunk.Initialize(gpart.allocator, gpart.payload_types);
|
407
|
+
auto scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
|
1291
408
|
idx_t input_idx = 0;
|
1292
409
|
while (true) {
|
1293
410
|
input_chunk.Reset();
|
@@ -1297,38 +414,221 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1297
414
|
}
|
1298
415
|
|
1299
416
|
// TODO: Parallelization opportunity
|
1300
|
-
for (auto &wexec :
|
417
|
+
for (auto &wexec : executors) {
|
1301
418
|
wexec->Sink(input_chunk, input_idx, scanner->Count());
|
1302
419
|
}
|
1303
420
|
input_idx += input_chunk.size();
|
1304
421
|
}
|
1305
422
|
|
1306
423
|
// TODO: Parallelization opportunity
|
1307
|
-
for (auto &wexec :
|
1308
|
-
wexec->Finalize(
|
424
|
+
for (auto &wexec : executors) {
|
425
|
+
wexec->Finalize();
|
1309
426
|
}
|
1310
427
|
|
1311
428
|
// External scanning assumes all blocks are swizzled.
|
1312
429
|
scanner->ReSwizzle();
|
1313
430
|
|
1314
|
-
//
|
1315
|
-
|
431
|
+
// Start the block countdown
|
432
|
+
unscanned = rows->blocks.size();
|
433
|
+
}
|
434
|
+
|
435
|
+
// Per-thread scan state
|
436
|
+
class WindowLocalSourceState : public LocalSourceState {
|
437
|
+
public:
|
438
|
+
using ReadStatePtr = unique_ptr<WindowExecutorState>;
|
439
|
+
using ReadStates = vector<ReadStatePtr>;
|
440
|
+
|
441
|
+
explicit WindowLocalSourceState(WindowGlobalSourceState &gsource);
|
442
|
+
void UpdateBatchIndex();
|
443
|
+
bool NextPartition();
|
444
|
+
void Scan(DataChunk &chunk);
|
445
|
+
|
446
|
+
//! The shared source state
|
447
|
+
WindowGlobalSourceState &gsource;
|
448
|
+
//! The current bin being processed
|
449
|
+
idx_t hash_bin;
|
450
|
+
//! The current batch index (for output reordering)
|
451
|
+
idx_t batch_index;
|
452
|
+
//! The current source being processed
|
453
|
+
optional_ptr<WindowPartitionSourceState> partition_source;
|
454
|
+
//! The read cursor
|
455
|
+
unique_ptr<RowDataCollectionScanner> scanner;
|
456
|
+
//! Buffer for the inputs
|
457
|
+
DataChunk input_chunk;
|
458
|
+
//! Executor read states.
|
459
|
+
ReadStates read_states;
|
460
|
+
//! Buffer for window results
|
461
|
+
DataChunk output_chunk;
|
462
|
+
};
|
463
|
+
|
464
|
+
WindowLocalSourceState::WindowLocalSourceState(WindowGlobalSourceState &gsource)
|
465
|
+
: gsource(gsource), hash_bin(gsource.built.size()), batch_index(0) {
|
466
|
+
auto &gsink = *gsource.gsink.global_partition;
|
467
|
+
auto &op = gsource.gsink.op;
|
468
|
+
|
469
|
+
input_chunk.Initialize(gsink.allocator, gsink.payload_types);
|
470
|
+
|
471
|
+
vector<LogicalType> output_types;
|
472
|
+
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
473
|
+
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
474
|
+
auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
|
475
|
+
output_types.emplace_back(wexpr.return_type);
|
476
|
+
}
|
477
|
+
output_chunk.Initialize(Allocator::Get(gsource.context), output_types);
|
478
|
+
}
|
479
|
+
|
480
|
+
WindowGlobalSourceState::Task WindowGlobalSourceState::CreateTask(idx_t hash_bin) {
|
481
|
+
// Build outside the lock so no one tries to steal before we are done.
|
482
|
+
auto partition_source = make_uniq<WindowPartitionSourceState>(context, *this);
|
483
|
+
partition_source->BuildPartition(gsink, hash_bin);
|
484
|
+
Task result(partition_source.get(), partition_source->GetScanner());
|
485
|
+
|
486
|
+
// Is there any data to scan?
|
487
|
+
if (result.second) {
|
488
|
+
lock_guard<mutex> built_guard(built_lock);
|
489
|
+
built[hash_bin] = std::move(partition_source);
|
490
|
+
|
491
|
+
return result;
|
492
|
+
}
|
493
|
+
|
494
|
+
return Task();
|
495
|
+
}
|
496
|
+
|
497
|
+
WindowGlobalSourceState::Task WindowGlobalSourceState::StealWork() {
|
498
|
+
for (idx_t hash_bin = 0; hash_bin < built.size(); ++hash_bin) {
|
499
|
+
lock_guard<mutex> built_guard(built_lock);
|
500
|
+
auto &partition_source = built[hash_bin];
|
501
|
+
if (!partition_source) {
|
502
|
+
continue;
|
503
|
+
}
|
504
|
+
|
505
|
+
Task result(partition_source.get(), partition_source->GetScanner());
|
506
|
+
|
507
|
+
// Is there any data to scan?
|
508
|
+
if (result.second) {
|
509
|
+
return result;
|
510
|
+
}
|
511
|
+
}
|
512
|
+
|
513
|
+
// Nothing to steal
|
514
|
+
return Task();
|
515
|
+
}
|
516
|
+
|
517
|
+
WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin) {
|
518
|
+
auto &hash_groups = gsink.global_partition->hash_groups;
|
519
|
+
const auto bin_count = built.size();
|
520
|
+
|
521
|
+
// Flush unneeded data
|
522
|
+
if (hash_bin < bin_count) {
|
523
|
+
// Lock and delete when all blocks have been scanned
|
524
|
+
// We do this here instead of in NextScan so the WindowLocalSourceState
|
525
|
+
// has a chance to delete its state objects first,
|
526
|
+
// which may reference the partition_source
|
527
|
+
|
528
|
+
// Delete data outside the lock in case it is slow
|
529
|
+
HashGroupSourcePtr killed;
|
530
|
+
lock_guard<mutex> built_guard(built_lock);
|
531
|
+
auto &partition_source = built[hash_bin];
|
532
|
+
if (partition_source && !partition_source->unscanned) {
|
533
|
+
killed = std::move(partition_source);
|
534
|
+
}
|
535
|
+
}
|
536
|
+
|
537
|
+
hash_bin = next_build++;
|
538
|
+
if (hash_bin < bin_count) {
|
539
|
+
// Find a non-empty hash group.
|
540
|
+
for (; hash_bin < hash_groups.size(); hash_bin = next_build++) {
|
541
|
+
if (hash_groups[hash_bin]) {
|
542
|
+
auto result = CreateTask(hash_bin);
|
543
|
+
if (result.second) {
|
544
|
+
return result;
|
545
|
+
}
|
546
|
+
}
|
547
|
+
}
|
548
|
+
|
549
|
+
// OVER() doesn't have a hash_group
|
550
|
+
if (hash_groups.empty()) {
|
551
|
+
auto result = CreateTask(hash_bin);
|
552
|
+
if (result.second) {
|
553
|
+
return result;
|
554
|
+
}
|
555
|
+
}
|
556
|
+
}
|
557
|
+
|
558
|
+
// Work stealing
|
559
|
+
while (!context.interrupted && tasks_remaining) {
|
560
|
+
auto result = StealWork();
|
561
|
+
if (result.second) {
|
562
|
+
return result;
|
563
|
+
}
|
564
|
+
|
565
|
+
// If there is nothing to steal but there are unfinished partitions,
|
566
|
+
// yield until any pending builds are done.
|
567
|
+
TaskScheduler::GetScheduler(context).YieldThread();
|
568
|
+
}
|
569
|
+
|
570
|
+
return Task();
|
571
|
+
}
|
572
|
+
|
573
|
+
void WindowLocalSourceState::UpdateBatchIndex() {
|
574
|
+
D_ASSERT(partition_source);
|
575
|
+
D_ASSERT(scanner.get());
|
576
|
+
|
577
|
+
batch_index = partition_source->hash_group ? partition_source->hash_group->batch_base : 0;
|
578
|
+
batch_index += scanner->BlockIndex();
|
579
|
+
}
|
580
|
+
|
581
|
+
bool WindowLocalSourceState::NextPartition() {
|
582
|
+
// Release old states before the source
|
583
|
+
scanner.reset();
|
584
|
+
read_states.clear();
|
585
|
+
|
586
|
+
// Get a partition_source that is not finished
|
587
|
+
while (!scanner) {
|
588
|
+
auto task = gsource.NextTask(hash_bin);
|
589
|
+
if (!task.first) {
|
590
|
+
return false;
|
591
|
+
}
|
592
|
+
partition_source = task.first;
|
593
|
+
scanner = std::move(task.second);
|
594
|
+
hash_bin = partition_source->hash_bin;
|
595
|
+
UpdateBatchIndex();
|
596
|
+
}
|
597
|
+
|
598
|
+
for (auto &wexec : partition_source->executors) {
|
599
|
+
read_states.emplace_back(wexec->GetExecutorState());
|
600
|
+
}
|
601
|
+
|
602
|
+
return true;
|
1316
603
|
}
|
1317
604
|
|
1318
605
|
void WindowLocalSourceState::Scan(DataChunk &result) {
|
1319
606
|
D_ASSERT(scanner);
|
1320
607
|
if (!scanner->Remaining()) {
|
1321
|
-
|
608
|
+
lock_guard<mutex> built_guard(gsource.built_lock);
|
609
|
+
--partition_source->unscanned;
|
610
|
+
scanner = partition_source->GetScanner();
|
611
|
+
|
612
|
+
if (!scanner) {
|
613
|
+
partition_source = nullptr;
|
614
|
+
read_states.clear();
|
615
|
+
return;
|
616
|
+
}
|
617
|
+
|
618
|
+
UpdateBatchIndex();
|
1322
619
|
}
|
1323
620
|
|
1324
621
|
const auto position = scanner->Scanned();
|
1325
622
|
input_chunk.Reset();
|
1326
623
|
scanner->Scan(input_chunk);
|
1327
624
|
|
625
|
+
auto &executors = partition_source->executors;
|
1328
626
|
output_chunk.Reset();
|
1329
|
-
for (idx_t expr_idx = 0; expr_idx <
|
1330
|
-
auto &executor = *
|
1331
|
-
|
627
|
+
for (idx_t expr_idx = 0; expr_idx < executors.size(); ++expr_idx) {
|
628
|
+
auto &executor = *executors[expr_idx];
|
629
|
+
auto &lstate = *read_states[expr_idx];
|
630
|
+
auto &result = output_chunk.data[expr_idx];
|
631
|
+
executor.Evaluate(position, input_chunk, result, lstate);
|
1332
632
|
}
|
1333
633
|
output_chunk.SetCardinality(input_chunk);
|
1334
634
|
output_chunk.Verify();
|
@@ -1345,43 +645,42 @@ void WindowLocalSourceState::Scan(DataChunk &result) {
|
|
1345
645
|
}
|
1346
646
|
|
1347
647
|
unique_ptr<LocalSourceState> PhysicalWindow::GetLocalSourceState(ExecutionContext &context,
|
1348
|
-
GlobalSourceState &
|
1349
|
-
auto &
|
1350
|
-
return make_uniq<WindowLocalSourceState>(
|
648
|
+
GlobalSourceState &gsource_p) const {
|
649
|
+
auto &gsource = gsource_p.Cast<WindowGlobalSourceState>();
|
650
|
+
return make_uniq<WindowLocalSourceState>(gsource);
|
1351
651
|
}
|
1352
652
|
|
1353
653
|
unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext &context) const {
|
1354
654
|
auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
|
1355
|
-
return make_uniq<WindowGlobalSourceState>(gsink);
|
655
|
+
return make_uniq<WindowGlobalSourceState>(context, gsink);
|
656
|
+
}
|
657
|
+
|
658
|
+
bool PhysicalWindow::SupportsBatchIndex() const {
|
659
|
+
// We can only preserve order for single partitioning
|
660
|
+
// or work stealing causes out of order batch numbers
|
661
|
+
auto &wexpr = select_list[0]->Cast<BoundWindowExpression>();
|
662
|
+
return wexpr.partitions.empty() && !wexpr.orders.empty();
|
663
|
+
}
|
664
|
+
|
665
|
+
OrderPreservationType PhysicalWindow::SourceOrder() const {
|
666
|
+
return SupportsBatchIndex() ? OrderPreservationType::FIXED_ORDER : OrderPreservationType::NO_ORDER;
|
667
|
+
}
|
668
|
+
|
669
|
+
idx_t PhysicalWindow::GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
|
670
|
+
LocalSourceState &lstate_p) const {
|
671
|
+
auto &lstate = lstate_p.Cast<WindowLocalSourceState>();
|
672
|
+
return lstate.batch_index;
|
1356
673
|
}
|
1357
674
|
|
1358
675
|
SourceResultType PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk,
|
1359
676
|
OperatorSourceInput &input) const {
|
1360
677
|
auto &lsource = input.local_state.Cast<WindowLocalSourceState>();
|
1361
|
-
auto &gsource = input.global_state.Cast<WindowGlobalSourceState>();
|
1362
|
-
auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
|
1363
|
-
|
1364
|
-
auto &hash_groups = gsink.global_partition->hash_groups;
|
1365
|
-
const auto bin_count = hash_groups.empty() ? 1 : hash_groups.size();
|
1366
|
-
|
1367
678
|
while (chunk.size() == 0) {
|
1368
679
|
// Move to the next bin if we are done.
|
1369
|
-
while (!lsource.scanner
|
1370
|
-
lsource.
|
1371
|
-
lsource.rows.reset();
|
1372
|
-
lsource.heap.reset();
|
1373
|
-
lsource.hash_group.reset();
|
1374
|
-
auto hash_bin = gsource.next_bin++;
|
1375
|
-
if (hash_bin >= bin_count) {
|
680
|
+
while (!lsource.scanner) {
|
681
|
+
if (!lsource.NextPartition()) {
|
1376
682
|
return chunk.size() > 0 ? SourceResultType::HAVE_MORE_OUTPUT : SourceResultType::FINISHED;
|
1377
683
|
}
|
1378
|
-
|
1379
|
-
for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
|
1380
|
-
if (hash_groups[hash_bin]) {
|
1381
|
-
break;
|
1382
|
-
}
|
1383
|
-
}
|
1384
|
-
lsource.GeneratePartition(gsink, hash_bin);
|
1385
684
|
}
|
1386
685
|
|
1387
686
|
lsource.Scan(chunk);
|