duckdb 0.8.2-dev161.0 → 0.8.2-dev1724.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/value.cpp +11 -6
- package/src/duckdb/src/common/types.cpp +9 -656
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/arrow.cpp +19 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/columns.test.ts +24 -1
- package/test/test_all_types.test.ts +234 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -271,6 +271,8 @@ static inline bool BoundaryNeedsPeer(const WindowBoundary &boundary) {
|
|
271
271
|
}
|
272
272
|
}
|
273
273
|
|
274
|
+
enum WindowBounds : uint8_t { PARTITION_BEGIN, PARTITION_END, PEER_BEGIN, PEER_END, WINDOW_BEGIN, WINDOW_END };
|
275
|
+
|
274
276
|
struct WindowBoundariesState {
|
275
277
|
static inline bool IsScalar(const unique_ptr<Expression> &expr) {
|
276
278
|
return expr ? expr->IsScalar() : true;
|
@@ -287,7 +289,11 @@ struct WindowBoundariesState {
|
|
287
289
|
needs_peer(BoundaryNeedsPeer(wexpr.end) || wexpr.type == ExpressionType::WINDOW_CUME_DIST) {
|
288
290
|
}
|
289
291
|
|
290
|
-
void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t
|
292
|
+
void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t chunk_idx,
|
293
|
+
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
294
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
295
|
+
|
296
|
+
void Bounds(DataChunk &bounds, idx_t row_idx, WindowInputColumn &range, const idx_t count,
|
291
297
|
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
292
298
|
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
293
299
|
|
@@ -311,15 +317,9 @@ struct WindowBoundariesState {
|
|
311
317
|
idx_t valid_end = 0;
|
312
318
|
int64_t window_start = -1;
|
313
319
|
int64_t window_end = -1;
|
314
|
-
|
315
|
-
bool is_peer = false;
|
320
|
+
FrameBounds prev;
|
316
321
|
};
|
317
322
|
|
318
|
-
static bool WindowNeedsRank(const BoundWindowExpression &wexpr) {
|
319
|
-
return wexpr.type == ExpressionType::WINDOW_PERCENT_RANK || wexpr.type == ExpressionType::WINDOW_RANK ||
|
320
|
-
wexpr.type == ExpressionType::WINDOW_RANK_DENSE || wexpr.type == ExpressionType::WINDOW_CUME_DIST;
|
321
|
-
}
|
322
|
-
|
323
323
|
template <typename T>
|
324
324
|
static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
|
325
325
|
D_ASSERT(chunk.ColumnCount() > column);
|
@@ -343,7 +343,7 @@ static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target
|
|
343
343
|
template <typename T>
|
344
344
|
struct WindowColumnIterator {
|
345
345
|
using iterator = WindowColumnIterator<T>;
|
346
|
-
using iterator_category = std::
|
346
|
+
using iterator_category = std::random_access_iterator_tag;
|
347
347
|
using difference_type = std::ptrdiff_t;
|
348
348
|
using value_type = T;
|
349
349
|
using reference = T;
|
@@ -352,6 +352,7 @@ struct WindowColumnIterator {
|
|
352
352
|
explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
|
353
353
|
}
|
354
354
|
|
355
|
+
// Forward iterator
|
355
356
|
inline reference operator*() const {
|
356
357
|
return coll->GetCell<T>(pos);
|
357
358
|
}
|
@@ -369,12 +370,64 @@ struct WindowColumnIterator {
|
|
369
370
|
return result;
|
370
371
|
}
|
371
372
|
|
373
|
+
// Bidirectional iterator
|
374
|
+
inline iterator &operator--() {
|
375
|
+
--pos;
|
376
|
+
return *this;
|
377
|
+
}
|
378
|
+
inline iterator operator--(int) {
|
379
|
+
auto result = *this;
|
380
|
+
--(*this);
|
381
|
+
return result;
|
382
|
+
}
|
383
|
+
|
384
|
+
// Random Access
|
385
|
+
inline iterator &operator+=(difference_type n) {
|
386
|
+
pos += n;
|
387
|
+
return *this;
|
388
|
+
}
|
389
|
+
inline iterator &operator-=(difference_type n) {
|
390
|
+
pos -= n;
|
391
|
+
return *this;
|
392
|
+
}
|
393
|
+
|
394
|
+
inline reference operator[](difference_type m) const {
|
395
|
+
return coll->GetCell<T>(pos + m);
|
396
|
+
}
|
397
|
+
|
398
|
+
friend inline iterator operator+(const iterator &a, difference_type n) {
|
399
|
+
return iterator(a.coll, a.pos + n);
|
400
|
+
}
|
401
|
+
|
402
|
+
friend inline iterator operator-(const iterator &a, difference_type n) {
|
403
|
+
return iterator(a.coll, a.pos - n);
|
404
|
+
}
|
405
|
+
|
406
|
+
friend inline iterator operator+(difference_type n, const iterator &a) {
|
407
|
+
return a + n;
|
408
|
+
}
|
409
|
+
friend inline difference_type operator-(const iterator &a, const iterator &b) {
|
410
|
+
return difference_type(a.pos - b.pos);
|
411
|
+
}
|
412
|
+
|
372
413
|
friend inline bool operator==(const iterator &a, const iterator &b) {
|
373
414
|
return a.pos == b.pos;
|
374
415
|
}
|
375
416
|
friend inline bool operator!=(const iterator &a, const iterator &b) {
|
376
417
|
return a.pos != b.pos;
|
377
418
|
}
|
419
|
+
friend inline bool operator<(const iterator &a, const iterator &b) {
|
420
|
+
return a.pos < b.pos;
|
421
|
+
}
|
422
|
+
friend inline bool operator<=(const iterator &a, const iterator &b) {
|
423
|
+
return a.pos <= b.pos;
|
424
|
+
}
|
425
|
+
friend inline bool operator>(const iterator &a, const iterator &b) {
|
426
|
+
return a.pos > b.pos;
|
427
|
+
}
|
428
|
+
friend inline bool operator>=(const iterator &a, const iterator &b) {
|
429
|
+
return a.pos >= b.pos;
|
430
|
+
}
|
378
431
|
|
379
432
|
private:
|
380
433
|
optional_ptr<WindowInputColumn> coll;
|
@@ -390,13 +443,30 @@ struct OperationCompare : public std::function<bool(T, T)> {
|
|
390
443
|
|
391
444
|
template <typename T, typename OP, bool FROM>
|
392
445
|
static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
393
|
-
WindowInputExpression &boundary, const idx_t
|
394
|
-
D_ASSERT(!boundary.CellIsNull(
|
395
|
-
const auto val = boundary.GetCell<T>(
|
446
|
+
WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
|
447
|
+
D_ASSERT(!boundary.CellIsNull(chunk_idx));
|
448
|
+
const auto val = boundary.GetCell<T>(chunk_idx);
|
396
449
|
|
397
450
|
OperationCompare<T, OP> comp;
|
398
451
|
WindowColumnIterator<T> begin(over, order_begin);
|
399
452
|
WindowColumnIterator<T> end(over, order_end);
|
453
|
+
|
454
|
+
if (order_begin < prev.first && prev.first < order_end) {
|
455
|
+
const auto first = over.GetCell<T>(prev.first);
|
456
|
+
if (!comp(val, first)) {
|
457
|
+
// prev.first <= val, so we can start further forward
|
458
|
+
begin += (prev.first - order_begin);
|
459
|
+
}
|
460
|
+
}
|
461
|
+
if (order_begin <= prev.second && prev.second < order_end) {
|
462
|
+
const auto second = over.GetCell<T>(prev.second);
|
463
|
+
if (!comp(second, val)) {
|
464
|
+
// val <= prev.second, so we can end further back
|
465
|
+
// (prev.second is the largest peer)
|
466
|
+
end -= (order_end - prev.second - 1);
|
467
|
+
}
|
468
|
+
}
|
469
|
+
|
400
470
|
if (FROM) {
|
401
471
|
return idx_t(std::lower_bound(begin, end, val, comp));
|
402
472
|
} else {
|
@@ -406,35 +476,35 @@ static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begi
|
|
406
476
|
|
407
477
|
template <typename OP, bool FROM>
|
408
478
|
static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
409
|
-
WindowInputExpression &boundary, const idx_t
|
479
|
+
WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
|
410
480
|
D_ASSERT(boundary.chunk.ColumnCount() == 1);
|
411
481
|
D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
|
412
482
|
|
413
483
|
switch (over.input_expr.ptype) {
|
414
484
|
case PhysicalType::INT8:
|
415
|
-
return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary,
|
485
|
+
return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
416
486
|
case PhysicalType::INT16:
|
417
|
-
return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary,
|
487
|
+
return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
418
488
|
case PhysicalType::INT32:
|
419
|
-
return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary,
|
489
|
+
return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
420
490
|
case PhysicalType::INT64:
|
421
|
-
return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary,
|
491
|
+
return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
422
492
|
case PhysicalType::UINT8:
|
423
|
-
return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary,
|
493
|
+
return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
424
494
|
case PhysicalType::UINT16:
|
425
|
-
return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary,
|
495
|
+
return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
426
496
|
case PhysicalType::UINT32:
|
427
|
-
return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary,
|
497
|
+
return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
428
498
|
case PhysicalType::UINT64:
|
429
|
-
return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary,
|
499
|
+
return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
430
500
|
case PhysicalType::INT128:
|
431
|
-
return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary,
|
501
|
+
return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
432
502
|
case PhysicalType::FLOAT:
|
433
|
-
return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary,
|
503
|
+
return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
434
504
|
case PhysicalType::DOUBLE:
|
435
|
-
return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary,
|
505
|
+
return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
436
506
|
case PhysicalType::INTERVAL:
|
437
|
-
return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary,
|
507
|
+
return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
438
508
|
default:
|
439
509
|
throw InternalException("Unsupported column type for RANGE");
|
440
510
|
}
|
@@ -442,123 +512,126 @@ static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, co
|
|
442
512
|
|
443
513
|
template <bool FROM>
|
444
514
|
static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
|
445
|
-
const idx_t order_end, WindowInputExpression &boundary, const idx_t
|
515
|
+
const idx_t order_end, WindowInputExpression &boundary, const idx_t chunk_idx,
|
516
|
+
const FrameBounds &prev) {
|
446
517
|
switch (range_sense) {
|
447
518
|
case OrderType::ASCENDING:
|
448
|
-
return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary,
|
519
|
+
return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
449
520
|
case OrderType::DESCENDING:
|
450
|
-
return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary,
|
521
|
+
return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
451
522
|
default:
|
452
523
|
throw InternalException("Unsupported ORDER BY sense for RANGE");
|
453
524
|
}
|
454
525
|
}
|
455
526
|
|
456
|
-
void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t
|
527
|
+
void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t chunk_idx,
|
457
528
|
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
458
529
|
const ValidityMask &partition_mask, const ValidityMask &order_mask) {
|
459
530
|
|
460
|
-
|
461
|
-
if (bounds.partition_count + bounds.order_count > 0) {
|
531
|
+
if (partition_count + order_count > 0) {
|
462
532
|
|
463
533
|
// determine partition and peer group boundaries to ultimately figure out window size
|
464
|
-
|
465
|
-
|
534
|
+
const auto is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
|
535
|
+
const auto is_peer = !order_mask.RowIsValidUnsafe(row_idx);
|
466
536
|
|
467
537
|
// when the partition changes, recompute the boundaries
|
468
|
-
if (!
|
469
|
-
|
470
|
-
|
538
|
+
if (!is_same_partition) {
|
539
|
+
partition_start = row_idx;
|
540
|
+
peer_start = row_idx;
|
471
541
|
|
472
542
|
// find end of partition
|
473
|
-
|
474
|
-
if (
|
543
|
+
partition_end = input_size;
|
544
|
+
if (partition_count) {
|
475
545
|
idx_t n = 1;
|
476
|
-
|
546
|
+
partition_end = FindNextStart(partition_mask, partition_start + 1, input_size, n);
|
477
547
|
}
|
478
548
|
|
479
549
|
// Find valid ordering values for the new partition
|
480
550
|
// so we can exclude NULLs from RANGE expression computations
|
481
|
-
|
482
|
-
|
551
|
+
valid_start = partition_start;
|
552
|
+
valid_end = partition_end;
|
483
553
|
|
484
|
-
if ((
|
554
|
+
if ((valid_start < valid_end) && has_preceding_range) {
|
485
555
|
// Exclude any leading NULLs
|
486
|
-
if (range_collection.CellIsNull(
|
556
|
+
if (range_collection.CellIsNull(valid_start)) {
|
487
557
|
idx_t n = 1;
|
488
|
-
|
558
|
+
valid_start = FindNextStart(order_mask, valid_start + 1, valid_end, n);
|
489
559
|
}
|
490
560
|
}
|
491
561
|
|
492
|
-
if ((
|
562
|
+
if ((valid_start < valid_end) && has_following_range) {
|
493
563
|
// Exclude any trailing NULLs
|
494
|
-
if (range_collection.CellIsNull(
|
564
|
+
if (range_collection.CellIsNull(valid_end - 1)) {
|
495
565
|
idx_t n = 1;
|
496
|
-
|
566
|
+
valid_end = FindPrevStart(order_mask, valid_start, valid_end, n);
|
497
567
|
}
|
498
|
-
}
|
499
568
|
|
500
|
-
|
501
|
-
|
569
|
+
// Reset range hints
|
570
|
+
prev.first = valid_start;
|
571
|
+
prev.second = valid_end;
|
572
|
+
}
|
573
|
+
} else if (!is_peer) {
|
574
|
+
peer_start = row_idx;
|
502
575
|
}
|
503
576
|
|
504
|
-
if (
|
505
|
-
|
506
|
-
if (
|
577
|
+
if (needs_peer) {
|
578
|
+
peer_end = partition_end;
|
579
|
+
if (order_count) {
|
507
580
|
idx_t n = 1;
|
508
|
-
|
581
|
+
peer_end = FindNextStart(order_mask, peer_start + 1, partition_end, n);
|
509
582
|
}
|
510
583
|
}
|
511
584
|
|
512
585
|
} else {
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
bounds.peer_end = bounds.partition_end;
|
586
|
+
// OVER()
|
587
|
+
partition_end = input_size;
|
588
|
+
peer_end = partition_end;
|
517
589
|
}
|
518
590
|
|
519
591
|
// determine window boundaries depending on the type of expression
|
520
|
-
|
521
|
-
|
592
|
+
window_start = -1;
|
593
|
+
window_end = -1;
|
522
594
|
|
523
|
-
switch (
|
595
|
+
switch (start_boundary) {
|
524
596
|
case WindowBoundary::UNBOUNDED_PRECEDING:
|
525
|
-
|
597
|
+
window_start = partition_start;
|
526
598
|
break;
|
527
599
|
case WindowBoundary::CURRENT_ROW_ROWS:
|
528
|
-
|
600
|
+
window_start = row_idx;
|
529
601
|
break;
|
530
602
|
case WindowBoundary::CURRENT_ROW_RANGE:
|
531
|
-
|
603
|
+
window_start = peer_start;
|
532
604
|
break;
|
533
605
|
case WindowBoundary::EXPR_PRECEDING_ROWS: {
|
534
|
-
if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(
|
535
|
-
|
606
|
+
if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(chunk_idx),
|
607
|
+
window_start)) {
|
536
608
|
throw OutOfRangeException("Overflow computing ROWS PRECEDING start");
|
537
609
|
}
|
538
610
|
break;
|
539
611
|
}
|
540
612
|
case WindowBoundary::EXPR_FOLLOWING_ROWS: {
|
541
|
-
if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(
|
542
|
-
bounds.window_start)) {
|
613
|
+
if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(chunk_idx), window_start)) {
|
543
614
|
throw OutOfRangeException("Overflow computing ROWS FOLLOWING start");
|
544
615
|
}
|
545
616
|
break;
|
546
617
|
}
|
547
618
|
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
548
|
-
if (boundary_start.CellIsNull(
|
549
|
-
|
619
|
+
if (boundary_start.CellIsNull(chunk_idx)) {
|
620
|
+
window_start = peer_start;
|
550
621
|
} else {
|
551
|
-
|
552
|
-
|
622
|
+
prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
|
623
|
+
boundary_start, chunk_idx, prev);
|
624
|
+
window_start = prev.first;
|
553
625
|
}
|
554
626
|
break;
|
555
627
|
}
|
556
628
|
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
557
|
-
if (boundary_start.CellIsNull(
|
558
|
-
|
629
|
+
if (boundary_start.CellIsNull(chunk_idx)) {
|
630
|
+
window_start = peer_start;
|
559
631
|
} else {
|
560
|
-
|
561
|
-
|
632
|
+
prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
|
633
|
+
chunk_idx, prev);
|
634
|
+
window_start = prev.first;
|
562
635
|
}
|
563
636
|
break;
|
564
637
|
}
|
@@ -566,43 +639,44 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
|
|
566
639
|
throw InternalException("Unsupported window start boundary");
|
567
640
|
}
|
568
641
|
|
569
|
-
switch (
|
642
|
+
switch (end_boundary) {
|
570
643
|
case WindowBoundary::CURRENT_ROW_ROWS:
|
571
|
-
|
644
|
+
window_end = row_idx + 1;
|
572
645
|
break;
|
573
646
|
case WindowBoundary::CURRENT_ROW_RANGE:
|
574
|
-
|
647
|
+
window_end = peer_end;
|
575
648
|
break;
|
576
649
|
case WindowBoundary::UNBOUNDED_FOLLOWING:
|
577
|
-
|
650
|
+
window_end = partition_end;
|
578
651
|
break;
|
579
652
|
case WindowBoundary::EXPR_PRECEDING_ROWS:
|
580
|
-
if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(
|
581
|
-
|
653
|
+
if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(chunk_idx),
|
654
|
+
window_end)) {
|
582
655
|
throw OutOfRangeException("Overflow computing ROWS PRECEDING end");
|
583
656
|
}
|
584
657
|
break;
|
585
658
|
case WindowBoundary::EXPR_FOLLOWING_ROWS:
|
586
|
-
if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(
|
587
|
-
bounds.window_end)) {
|
659
|
+
if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(chunk_idx), window_end)) {
|
588
660
|
throw OutOfRangeException("Overflow computing ROWS FOLLOWING end");
|
589
661
|
}
|
590
662
|
break;
|
591
663
|
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
592
|
-
if (boundary_end.CellIsNull(
|
593
|
-
|
664
|
+
if (boundary_end.CellIsNull(chunk_idx)) {
|
665
|
+
window_end = peer_end;
|
594
666
|
} else {
|
595
|
-
|
596
|
-
|
667
|
+
prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx,
|
668
|
+
boundary_end, chunk_idx, prev);
|
669
|
+
window_end = prev.second;
|
597
670
|
}
|
598
671
|
break;
|
599
672
|
}
|
600
673
|
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
601
|
-
if (boundary_end.CellIsNull(
|
602
|
-
|
674
|
+
if (boundary_end.CellIsNull(chunk_idx)) {
|
675
|
+
window_end = peer_end;
|
603
676
|
} else {
|
604
|
-
|
605
|
-
|
677
|
+
prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
|
678
|
+
chunk_idx, prev);
|
679
|
+
window_end = prev.second;
|
606
680
|
}
|
607
681
|
break;
|
608
682
|
}
|
@@ -611,41 +685,69 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
|
|
611
685
|
}
|
612
686
|
|
613
687
|
// clamp windows to partitions if they should exceed
|
614
|
-
if (
|
615
|
-
|
688
|
+
if (window_start < (int64_t)partition_start) {
|
689
|
+
window_start = partition_start;
|
616
690
|
}
|
617
|
-
if (
|
618
|
-
|
691
|
+
if (window_start > (int64_t)partition_end) {
|
692
|
+
window_start = partition_end;
|
619
693
|
}
|
620
|
-
if (
|
621
|
-
|
694
|
+
if (window_end < (int64_t)partition_start) {
|
695
|
+
window_end = partition_start;
|
622
696
|
}
|
623
|
-
if (
|
624
|
-
|
697
|
+
if (window_end > (int64_t)partition_end) {
|
698
|
+
window_end = partition_end;
|
625
699
|
}
|
626
700
|
|
627
|
-
if (
|
701
|
+
if (window_start < 0 || window_end < 0) {
|
628
702
|
throw InternalException("Failed to compute window boundaries");
|
629
703
|
}
|
630
704
|
}
|
631
705
|
|
706
|
+
void WindowBoundariesState::Bounds(DataChunk &bounds, idx_t row_idx, WindowInputColumn &range, const idx_t count,
|
707
|
+
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
708
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask) {
|
709
|
+
bounds.Reset();
|
710
|
+
D_ASSERT(bounds.ColumnCount() == 6);
|
711
|
+
auto partition_begin_data = FlatVector::GetData<idx_t>(bounds.data[PARTITION_BEGIN]);
|
712
|
+
auto partition_end_data = FlatVector::GetData<idx_t>(bounds.data[PARTITION_END]);
|
713
|
+
auto peer_begin_data = FlatVector::GetData<idx_t>(bounds.data[PEER_BEGIN]);
|
714
|
+
auto peer_end_data = FlatVector::GetData<idx_t>(bounds.data[PEER_END]);
|
715
|
+
auto window_begin_data = FlatVector::GetData<int64_t>(bounds.data[WINDOW_BEGIN]);
|
716
|
+
auto window_end_data = FlatVector::GetData<int64_t>(bounds.data[WINDOW_END]);
|
717
|
+
for (idx_t chunk_idx = 0; chunk_idx < count; ++chunk_idx, ++row_idx) {
|
718
|
+
Update(row_idx, range, chunk_idx, boundary_start, boundary_end, partition_mask, order_mask);
|
719
|
+
*partition_begin_data++ = partition_start;
|
720
|
+
*partition_end_data++ = partition_end;
|
721
|
+
if (needs_peer) {
|
722
|
+
*peer_begin_data++ = peer_start;
|
723
|
+
*peer_end_data++ = peer_end;
|
724
|
+
}
|
725
|
+
*window_begin_data++ = window_start;
|
726
|
+
*window_end_data++ = window_end;
|
727
|
+
}
|
728
|
+
bounds.SetCardinality(count);
|
729
|
+
}
|
730
|
+
|
632
731
|
struct WindowExecutor {
|
633
|
-
|
732
|
+
bool IsConstantAggregate();
|
733
|
+
bool IsCustomAggregate();
|
634
734
|
|
635
735
|
WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
|
636
|
-
const idx_t count);
|
736
|
+
const idx_t count, WindowAggregationMode mode);
|
637
737
|
|
638
738
|
void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
|
639
|
-
void Finalize(
|
739
|
+
void Finalize();
|
640
740
|
|
641
741
|
void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
642
742
|
const ValidityMask &order_mask);
|
643
743
|
|
644
744
|
// The function
|
645
745
|
BoundWindowExpression &wexpr;
|
746
|
+
const WindowAggregationMode mode;
|
646
747
|
|
647
748
|
// Frame management
|
648
|
-
WindowBoundariesState
|
749
|
+
WindowBoundariesState state;
|
750
|
+
DataChunk bounds;
|
649
751
|
uint64_t dense_rank = 1;
|
650
752
|
uint64_t rank_equal = 0;
|
651
753
|
uint64_t rank = 1;
|
@@ -656,8 +758,6 @@ struct WindowExecutor {
|
|
656
758
|
DataChunk payload_chunk;
|
657
759
|
|
658
760
|
ExpressionExecutor filter_executor;
|
659
|
-
ValidityMask filter_mask;
|
660
|
-
vector<validity_t> filter_bits;
|
661
761
|
SelectionVector filter_sel;
|
662
762
|
|
663
763
|
// LEAD/LAG Evaluation
|
@@ -674,15 +774,25 @@ struct WindowExecutor {
|
|
674
774
|
// IGNORE NULLS
|
675
775
|
ValidityMask ignore_nulls;
|
676
776
|
|
677
|
-
//
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
777
|
+
// aggregate computation algorithm
|
778
|
+
unique_ptr<WindowAggregateState> aggregate_state = nullptr;
|
779
|
+
|
780
|
+
protected:
|
781
|
+
void NextRank(idx_t partition_begin, idx_t peer_begin, idx_t row_idx);
|
782
|
+
void Aggregate(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
783
|
+
void RowNumber(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
784
|
+
void Rank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
785
|
+
void DenseRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
786
|
+
void PercentRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
787
|
+
void CumeDist(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
788
|
+
void Ntile(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
789
|
+
void LeadLag(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
790
|
+
void FirstValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
791
|
+
void LastValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
792
|
+
void NthValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
683
793
|
};
|
684
794
|
|
685
|
-
bool WindowExecutor::IsConstantAggregate(
|
795
|
+
bool WindowExecutor::IsConstantAggregate() {
|
686
796
|
if (!wexpr.aggregate) {
|
687
797
|
return false;
|
688
798
|
}
|
@@ -736,28 +846,44 @@ bool WindowExecutor::IsConstantAggregate(const BoundWindowExpression &wexpr) {
|
|
736
846
|
return true;
|
737
847
|
}
|
738
848
|
|
849
|
+
bool WindowExecutor::IsCustomAggregate() {
|
850
|
+
if (!wexpr.aggregate) {
|
851
|
+
return false;
|
852
|
+
}
|
853
|
+
|
854
|
+
if (!AggregateObject(wexpr).function.window) {
|
855
|
+
return false;
|
856
|
+
}
|
857
|
+
|
858
|
+
return (mode < WindowAggregationMode::COMBINE);
|
859
|
+
}
|
860
|
+
|
739
861
|
WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
|
740
|
-
const idx_t count)
|
741
|
-
: wexpr(wexpr),
|
742
|
-
|
743
|
-
|
744
|
-
|
862
|
+
const idx_t count, WindowAggregationMode mode)
|
863
|
+
: wexpr(wexpr), mode(mode), state(wexpr, count), payload_collection(), payload_executor(context),
|
864
|
+
filter_executor(context), leadlag_offset(wexpr.offset_expr.get(), context),
|
865
|
+
leadlag_default(wexpr.default_expr.get(), context), boundary_start(wexpr.start_expr.get(), context),
|
866
|
+
boundary_end(wexpr.end_expr.get(), context),
|
867
|
+
range((state.has_preceding_range || state.has_following_range) ? wexpr.orders[0].expression.get() : nullptr,
|
745
868
|
context, count)
|
746
869
|
|
747
870
|
{
|
748
871
|
// TODO we could evaluate those expressions in parallel
|
749
872
|
|
750
873
|
// Check for constant aggregate
|
751
|
-
if (IsConstantAggregate(
|
752
|
-
|
874
|
+
if (IsConstantAggregate()) {
|
875
|
+
aggregate_state =
|
753
876
|
make_uniq<WindowConstantAggregate>(AggregateObject(wexpr), wexpr.return_type, partition_mask, count);
|
877
|
+
} else if (IsCustomAggregate()) {
|
878
|
+
aggregate_state = make_uniq<WindowCustomAggregate>(AggregateObject(wexpr), wexpr.return_type, count);
|
879
|
+
} else if (wexpr.aggregate) {
|
880
|
+
// build a segment tree for frame-adhering aggregates
|
881
|
+
// see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
|
882
|
+
aggregate_state = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, count, mode);
|
754
883
|
}
|
755
884
|
|
756
885
|
// evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
|
757
886
|
if (wexpr.filter_expr) {
|
758
|
-
// Start with all invalid and set the ones that pass
|
759
|
-
filter_bits.resize(ValidityMask::ValidityMaskSize(count), 0);
|
760
|
-
filter_mask.Initialize(filter_bits.data());
|
761
887
|
filter_executor.AddExpression(*wexpr.filter_expr);
|
762
888
|
filter_sel.Initialize(STANDARD_VECTOR_SIZE);
|
763
889
|
}
|
@@ -771,6 +897,9 @@ WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &cont
|
|
771
897
|
if (!types.empty()) {
|
772
898
|
payload_collection.Initialize(Allocator::Get(context), types);
|
773
899
|
}
|
900
|
+
|
901
|
+
vector<LogicalType> bounds_types(6, LogicalType(LogicalTypeId::UBIGINT));
|
902
|
+
bounds.Initialize(Allocator::Get(context), bounds_types);
|
774
903
|
}
|
775
904
|
|
776
905
|
void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
|
@@ -800,17 +929,14 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
|
|
800
929
|
if (wexpr.filter_expr) {
|
801
930
|
filtering = &filter_sel;
|
802
931
|
filtered = filter_executor.SelectExpression(input_chunk, filter_sel);
|
803
|
-
for (idx_t f = 0; f < filtered; ++f) {
|
804
|
-
filter_mask.SetValid(input_idx + filter_sel[f]);
|
805
|
-
}
|
806
932
|
}
|
807
933
|
|
808
934
|
if (!wexpr.children.empty()) {
|
809
935
|
payload_chunk.Reset();
|
810
936
|
payload_executor.Execute(input_chunk, payload_chunk);
|
811
937
|
payload_chunk.Verify();
|
812
|
-
if (
|
813
|
-
|
938
|
+
if (aggregate_state) {
|
939
|
+
aggregate_state->Sink(payload_chunk, filtering, filtered);
|
814
940
|
} else {
|
815
941
|
payload_collection.Append(payload_chunk, true);
|
816
942
|
}
|
@@ -840,19 +966,18 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
|
|
840
966
|
}
|
841
967
|
}
|
842
968
|
}
|
969
|
+
} else if (aggregate_state) {
|
970
|
+
// Zero-argument aggregate (e.g., COUNT(*)
|
971
|
+
payload_chunk.SetCardinality(input_chunk);
|
972
|
+
aggregate_state->Sink(payload_chunk, filtering, filtered);
|
843
973
|
}
|
844
974
|
|
845
975
|
range.Append(input_chunk);
|
846
976
|
}
|
847
977
|
|
848
|
-
void WindowExecutor::Finalize(
|
849
|
-
|
850
|
-
|
851
|
-
if (constant_aggregate) {
|
852
|
-
constant_aggregate->Finalize();
|
853
|
-
} else if (wexpr.aggregate) {
|
854
|
-
segment_tree = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, &payload_collection,
|
855
|
-
filter_mask, mode);
|
978
|
+
void WindowExecutor::Finalize() {
|
979
|
+
if (aggregate_state) {
|
980
|
+
aggregate_state->Finalize();
|
856
981
|
}
|
857
982
|
}
|
858
983
|
|
@@ -865,188 +990,275 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
|
|
865
990
|
leadlag_offset.Execute(input_chunk);
|
866
991
|
leadlag_default.Execute(input_chunk);
|
867
992
|
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
|
872
|
-
if (WindowNeedsRank(wexpr)) {
|
873
|
-
if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
|
874
|
-
dense_rank = 1;
|
875
|
-
rank = 1;
|
876
|
-
rank_equal = 0;
|
877
|
-
} else if (!bounds.is_peer) {
|
878
|
-
dense_rank++;
|
879
|
-
rank += rank_equal;
|
880
|
-
rank_equal = 0;
|
881
|
-
}
|
882
|
-
rank_equal++;
|
883
|
-
}
|
993
|
+
const auto count = input_chunk.size();
|
994
|
+
bounds.Reset();
|
995
|
+
state.Bounds(bounds, row_idx, range, input_chunk.size(), boundary_start, boundary_end, partition_mask, order_mask);
|
884
996
|
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
997
|
+
switch (wexpr.type) {
|
998
|
+
case ExpressionType::WINDOW_AGGREGATE:
|
999
|
+
Aggregate(bounds, result, count, row_idx);
|
1000
|
+
break;
|
1001
|
+
case ExpressionType::WINDOW_ROW_NUMBER:
|
1002
|
+
RowNumber(bounds, result, count, row_idx);
|
1003
|
+
break;
|
1004
|
+
case ExpressionType::WINDOW_RANK_DENSE:
|
1005
|
+
DenseRank(bounds, result, count, row_idx);
|
1006
|
+
break;
|
1007
|
+
case ExpressionType::WINDOW_RANK:
|
1008
|
+
Rank(bounds, result, count, row_idx);
|
1009
|
+
break;
|
1010
|
+
case ExpressionType::WINDOW_PERCENT_RANK:
|
1011
|
+
PercentRank(bounds, result, count, row_idx);
|
1012
|
+
break;
|
1013
|
+
case ExpressionType::WINDOW_CUME_DIST:
|
1014
|
+
CumeDist(bounds, result, count, row_idx);
|
1015
|
+
break;
|
1016
|
+
case ExpressionType::WINDOW_NTILE:
|
1017
|
+
Ntile(bounds, result, count, row_idx);
|
1018
|
+
break;
|
1019
|
+
case ExpressionType::WINDOW_LEAD:
|
1020
|
+
case ExpressionType::WINDOW_LAG:
|
1021
|
+
LeadLag(bounds, result, count, row_idx);
|
1022
|
+
break;
|
1023
|
+
case ExpressionType::WINDOW_FIRST_VALUE:
|
1024
|
+
FirstValue(bounds, result, count, row_idx);
|
1025
|
+
break;
|
1026
|
+
case ExpressionType::WINDOW_LAST_VALUE:
|
1027
|
+
LastValue(bounds, result, count, row_idx);
|
1028
|
+
break;
|
1029
|
+
case ExpressionType::WINDOW_NTH_VALUE:
|
1030
|
+
NthValue(bounds, result, count, row_idx);
|
1031
|
+
break;
|
1032
|
+
default:
|
1033
|
+
throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
|
1034
|
+
}
|
890
1035
|
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
1036
|
+
result.Verify(count);
|
1037
|
+
}
|
1038
|
+
|
1039
|
+
void WindowExecutor::NextRank(idx_t partition_begin, idx_t peer_begin, idx_t row_idx) {
|
1040
|
+
if (partition_begin == row_idx) {
|
1041
|
+
dense_rank = 1;
|
1042
|
+
rank = 1;
|
1043
|
+
rank_equal = 0;
|
1044
|
+
} else if (peer_begin == row_idx) {
|
1045
|
+
dense_rank++;
|
1046
|
+
rank += rank_equal;
|
1047
|
+
rank_equal = 0;
|
1048
|
+
}
|
1049
|
+
rank_equal++;
|
1050
|
+
}
|
1051
|
+
|
1052
|
+
void WindowExecutor::Aggregate(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1053
|
+
D_ASSERT(aggregate_state);
|
1054
|
+
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1055
|
+
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1056
|
+
aggregate_state->Evaluate(window_begin, window_end, result, count);
|
1057
|
+
}
|
1058
|
+
|
1059
|
+
void WindowExecutor::RowNumber(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1060
|
+
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1061
|
+
auto rdata = FlatVector::GetData<int64_t>(result);
|
1062
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1063
|
+
rdata[i] = row_idx - partition_begin[i] + 1;
|
1064
|
+
}
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
void WindowExecutor::Rank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1068
|
+
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1069
|
+
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1070
|
+
auto rdata = FlatVector::GetData<int64_t>(result);
|
1071
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1072
|
+
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1073
|
+
rdata[i] = rank;
|
1074
|
+
}
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
void WindowExecutor::DenseRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1078
|
+
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1079
|
+
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1080
|
+
auto rdata = FlatVector::GetData<int64_t>(result);
|
1081
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1082
|
+
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1083
|
+
rdata[i] = dense_rank;
|
1084
|
+
}
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
void WindowExecutor::PercentRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1088
|
+
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1089
|
+
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1090
|
+
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1091
|
+
auto rdata = FlatVector::GetData<double>(result);
|
1092
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1093
|
+
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1094
|
+
int64_t denom = partition_end[i] - partition_begin[i] - 1;
|
1095
|
+
double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0;
|
1096
|
+
rdata[i] = percent_rank;
|
1097
|
+
}
|
1098
|
+
}
|
1099
|
+
|
1100
|
+
void WindowExecutor::CumeDist(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1101
|
+
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1102
|
+
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1103
|
+
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1104
|
+
auto peer_end = FlatVector::GetData<const idx_t>(bounds.data[PEER_END]);
|
1105
|
+
auto rdata = FlatVector::GetData<double>(result);
|
1106
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1107
|
+
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1108
|
+
int64_t denom = partition_end[i] - partition_begin[i];
|
1109
|
+
double cume_dist = denom > 0 ? ((double)(peer_end[i] - partition_begin[i])) / denom : 0;
|
1110
|
+
rdata[i] = cume_dist;
|
1111
|
+
}
|
1112
|
+
}
|
1113
|
+
|
1114
|
+
void WindowExecutor::Ntile(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1115
|
+
D_ASSERT(payload_collection.ColumnCount() == 1);
|
1116
|
+
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1117
|
+
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1118
|
+
auto rdata = FlatVector::GetData<int64_t>(result);
|
1119
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1120
|
+
if (CellIsNull(payload_collection, 0, row_idx)) {
|
1121
|
+
FlatVector::SetNull(result, i, true);
|
1122
|
+
} else {
|
1123
|
+
auto n_param = GetCell<int64_t>(payload_collection, 0, row_idx);
|
1124
|
+
if (n_param < 1) {
|
1125
|
+
throw InvalidInputException("Argument for ntile must be greater than zero");
|
1126
|
+
}
|
1127
|
+
// With thanks from SQLite's ntileValueFunc()
|
1128
|
+
int64_t n_total = partition_end[i] - partition_begin[i];
|
1129
|
+
if (n_param > n_total) {
|
1130
|
+
// more groups allowed than we have values
|
1131
|
+
// map every entry to a unique group
|
1132
|
+
n_param = n_total;
|
1133
|
+
}
|
1134
|
+
int64_t n_size = (n_total / n_param);
|
1135
|
+
// find the row idx within the group
|
1136
|
+
D_ASSERT(row_idx >= partition_begin[i]);
|
1137
|
+
int64_t adjusted_row_idx = row_idx - partition_begin[i];
|
1138
|
+
// now compute the ntile
|
1139
|
+
int64_t n_large = n_total - n_param * n_size;
|
1140
|
+
int64_t i_small = n_large * (n_size + 1);
|
1141
|
+
int64_t result_ntile;
|
1142
|
+
|
1143
|
+
D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
|
1144
|
+
|
1145
|
+
if (adjusted_row_idx < i_small) {
|
1146
|
+
result_ntile = 1 + adjusted_row_idx / (n_size + 1);
|
895
1147
|
} else {
|
896
|
-
|
1148
|
+
result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
|
897
1149
|
}
|
898
|
-
|
1150
|
+
// result has to be between [1, NTILE]
|
1151
|
+
D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
|
1152
|
+
rdata[i] = result_ntile;
|
899
1153
|
}
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
1154
|
+
}
|
1155
|
+
}
|
1156
|
+
|
1157
|
+
void WindowExecutor::LeadLag(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1158
|
+
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1159
|
+
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1160
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1161
|
+
int64_t offset = 1;
|
1162
|
+
if (wexpr.offset_expr) {
|
1163
|
+
offset = leadlag_offset.GetCell<int64_t>(i);
|
904
1164
|
}
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
1165
|
+
int64_t val_idx = (int64_t)row_idx;
|
1166
|
+
if (wexpr.type == ExpressionType::WINDOW_LEAD) {
|
1167
|
+
val_idx += offset;
|
1168
|
+
} else {
|
1169
|
+
val_idx -= offset;
|
909
1170
|
}
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
1171
|
+
|
1172
|
+
idx_t delta = 0;
|
1173
|
+
if (val_idx < (int64_t)row_idx) {
|
1174
|
+
// Count backwards
|
1175
|
+
delta = idx_t(row_idx - val_idx);
|
1176
|
+
val_idx = FindPrevStart(ignore_nulls, partition_begin[i], row_idx, delta);
|
1177
|
+
} else if (val_idx > (int64_t)row_idx) {
|
1178
|
+
delta = idx_t(val_idx - row_idx);
|
1179
|
+
val_idx = FindNextStart(ignore_nulls, row_idx + 1, partition_end[i], delta);
|
914
1180
|
}
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
1181
|
+
// else offset is zero, so don't move.
|
1182
|
+
|
1183
|
+
if (!delta) {
|
1184
|
+
CopyCell(payload_collection, 0, val_idx, result, i);
|
1185
|
+
} else if (wexpr.default_expr) {
|
1186
|
+
leadlag_default.CopyCell(result, i);
|
1187
|
+
} else {
|
1188
|
+
FlatVector::SetNull(result, i, true);
|
921
1189
|
}
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
1190
|
+
}
|
1191
|
+
}
|
1192
|
+
|
1193
|
+
void WindowExecutor::FirstValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1194
|
+
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1195
|
+
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1196
|
+
auto &rmask = FlatVector::Validity(result);
|
1197
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1198
|
+
if (window_begin[i] >= window_end[i]) {
|
1199
|
+
rmask.SetInvalid(i);
|
1200
|
+
continue;
|
928
1201
|
}
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
throw InvalidInputException("Argument for ntile must be greater than zero");
|
937
|
-
}
|
938
|
-
// With thanks from SQLite's ntileValueFunc()
|
939
|
-
int64_t n_total = bounds.partition_end - bounds.partition_start;
|
940
|
-
if (n_param > n_total) {
|
941
|
-
// more groups allowed than we have values
|
942
|
-
// map every entry to a unique group
|
943
|
-
n_param = n_total;
|
944
|
-
}
|
945
|
-
int64_t n_size = (n_total / n_param);
|
946
|
-
// find the row idx within the group
|
947
|
-
D_ASSERT(row_idx >= bounds.partition_start);
|
948
|
-
int64_t adjusted_row_idx = row_idx - bounds.partition_start;
|
949
|
-
// now compute the ntile
|
950
|
-
int64_t n_large = n_total - n_param * n_size;
|
951
|
-
int64_t i_small = n_large * (n_size + 1);
|
952
|
-
int64_t result_ntile;
|
953
|
-
|
954
|
-
D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
|
955
|
-
|
956
|
-
if (adjusted_row_idx < i_small) {
|
957
|
-
result_ntile = 1 + adjusted_row_idx / (n_size + 1);
|
958
|
-
} else {
|
959
|
-
result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
|
960
|
-
}
|
961
|
-
// result has to be between [1, NTILE]
|
962
|
-
D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
|
963
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
964
|
-
rdata[output_offset] = result_ntile;
|
965
|
-
}
|
966
|
-
break;
|
1202
|
+
// Same as NTH_VALUE(..., 1)
|
1203
|
+
idx_t n = 1;
|
1204
|
+
const auto first_idx = FindNextStart(ignore_nulls, window_begin[i], window_end[i], n);
|
1205
|
+
if (!n) {
|
1206
|
+
CopyCell(payload_collection, 0, first_idx, result, i);
|
1207
|
+
} else {
|
1208
|
+
FlatVector::SetNull(result, i, true);
|
967
1209
|
}
|
968
|
-
|
969
|
-
|
970
|
-
int64_t offset = 1;
|
971
|
-
if (wexpr.offset_expr) {
|
972
|
-
offset = leadlag_offset.GetCell<int64_t>(output_offset);
|
973
|
-
}
|
974
|
-
int64_t val_idx = (int64_t)row_idx;
|
975
|
-
if (wexpr.type == ExpressionType::WINDOW_LEAD) {
|
976
|
-
val_idx += offset;
|
977
|
-
} else {
|
978
|
-
val_idx -= offset;
|
979
|
-
}
|
980
|
-
|
981
|
-
idx_t delta = 0;
|
982
|
-
if (val_idx < (int64_t)row_idx) {
|
983
|
-
// Count backwards
|
984
|
-
delta = idx_t(row_idx - val_idx);
|
985
|
-
val_idx = FindPrevStart(ignore_nulls, bounds.partition_start, row_idx, delta);
|
986
|
-
} else if (val_idx > (int64_t)row_idx) {
|
987
|
-
delta = idx_t(val_idx - row_idx);
|
988
|
-
val_idx = FindNextStart(ignore_nulls, row_idx + 1, bounds.partition_end, delta);
|
989
|
-
}
|
990
|
-
// else offset is zero, so don't move.
|
1210
|
+
}
|
1211
|
+
}
|
991
1212
|
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1213
|
+
void WindowExecutor::LastValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1214
|
+
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1215
|
+
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1216
|
+
auto &rmask = FlatVector::Validity(result);
|
1217
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1218
|
+
if (window_begin[i] >= window_end[i]) {
|
1219
|
+
rmask.SetInvalid(i);
|
1220
|
+
continue;
|
1000
1221
|
}
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
} else {
|
1008
|
-
FlatVector::SetNull(result, output_offset, true);
|
1009
|
-
}
|
1010
|
-
break;
|
1222
|
+
idx_t n = 1;
|
1223
|
+
const auto last_idx = FindPrevStart(ignore_nulls, window_begin[i], window_end[i], n);
|
1224
|
+
if (!n) {
|
1225
|
+
CopyCell(payload_collection, 0, last_idx, result, i);
|
1226
|
+
} else {
|
1227
|
+
FlatVector::SetNull(result, i, true);
|
1011
1228
|
}
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1229
|
+
}
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
void WindowExecutor::NthValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1233
|
+
D_ASSERT(payload_collection.ColumnCount() == 2);
|
1234
|
+
|
1235
|
+
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1236
|
+
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1237
|
+
auto &rmask = FlatVector::Validity(result);
|
1238
|
+
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1239
|
+
if (window_begin[i] >= window_end[i]) {
|
1240
|
+
rmask.SetInvalid(i);
|
1241
|
+
continue;
|
1021
1242
|
}
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1243
|
+
// Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
|
1244
|
+
// returns NULL if there is no such row.
|
1245
|
+
if (CellIsNull(payload_collection, 1, row_idx)) {
|
1246
|
+
FlatVector::SetNull(result, i, true);
|
1247
|
+
} else {
|
1248
|
+
auto n_param = GetCell<int64_t>(payload_collection, 1, row_idx);
|
1249
|
+
if (n_param < 1) {
|
1250
|
+
FlatVector::SetNull(result, i, true);
|
1028
1251
|
} else {
|
1029
|
-
auto
|
1030
|
-
|
1031
|
-
|
1252
|
+
auto n = idx_t(n_param);
|
1253
|
+
const auto nth_index = FindNextStart(ignore_nulls, window_begin[i], window_end[i], n);
|
1254
|
+
if (!n) {
|
1255
|
+
CopyCell(payload_collection, 0, nth_index, result, i);
|
1032
1256
|
} else {
|
1033
|
-
|
1034
|
-
const auto nth_index = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
|
1035
|
-
if (!n) {
|
1036
|
-
CopyCell(payload_collection, 0, nth_index, result, output_offset);
|
1037
|
-
} else {
|
1038
|
-
FlatVector::SetNull(result, output_offset, true);
|
1039
|
-
}
|
1257
|
+
FlatVector::SetNull(result, i, true);
|
1040
1258
|
}
|
1041
1259
|
}
|
1042
|
-
break;
|
1043
|
-
}
|
1044
|
-
default:
|
1045
|
-
throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
|
1046
1260
|
}
|
1047
1261
|
}
|
1048
|
-
|
1049
|
-
result.Verify(input_chunk.size());
|
1050
1262
|
}
|
1051
1263
|
|
1052
1264
|
//===--------------------------------------------------------------------===//
|
@@ -1281,7 +1493,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1281
1493
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1282
1494
|
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
1283
1495
|
auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
|
1284
|
-
auto wexec = make_uniq<WindowExecutor>(wexpr, context, partition_mask, count);
|
1496
|
+
auto wexec = make_uniq<WindowExecutor>(wexpr, context, partition_mask, count, gstate.mode);
|
1285
1497
|
window_execs.emplace_back(std::move(wexec));
|
1286
1498
|
}
|
1287
1499
|
|
@@ -1305,7 +1517,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1305
1517
|
|
1306
1518
|
// TODO: Parallelization opportunity
|
1307
1519
|
for (auto &wexec : window_execs) {
|
1308
|
-
wexec->Finalize(
|
1520
|
+
wexec->Finalize();
|
1309
1521
|
}
|
1310
1522
|
|
1311
1523
|
// External scanning assumes all blocks are swizzled.
|