duckdb 0.8.2-dev157.0 → 0.8.2-dev1573.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types.cpp +8 -655
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -13,12 +13,15 @@
|
|
13
13
|
#include "duckdb/parallel/event.hpp"
|
14
14
|
#include "duckdb/parallel/thread_context.hpp"
|
15
15
|
|
16
|
+
#include <thread>
|
17
|
+
|
16
18
|
namespace duckdb {
|
17
19
|
|
18
20
|
PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
|
19
21
|
unique_ptr<PhysicalOperator> right)
|
20
22
|
: PhysicalComparisonJoin(op, PhysicalOperatorType::ASOF_JOIN, std::move(op.conditions), op.join_type,
|
21
|
-
op.estimated_cardinality)
|
23
|
+
op.estimated_cardinality),
|
24
|
+
comparison_type(ExpressionType::INVALID) {
|
22
25
|
|
23
26
|
// Convert the conditions partitions and sorts
|
24
27
|
for (auto &cond : conditions) {
|
@@ -29,9 +32,19 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
|
|
29
32
|
auto right = cond.right->Copy();
|
30
33
|
switch (cond.comparison) {
|
31
34
|
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
35
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
32
36
|
null_sensitive.emplace_back(lhs_orders.size());
|
33
37
|
lhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(left));
|
34
38
|
rhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(right));
|
39
|
+
comparison_type = cond.comparison;
|
40
|
+
break;
|
41
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
42
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
43
|
+
// Always put NULLS LAST so they can be ignored.
|
44
|
+
null_sensitive.emplace_back(lhs_orders.size());
|
45
|
+
lhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(left));
|
46
|
+
rhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(right));
|
47
|
+
comparison_type = cond.comparison;
|
35
48
|
break;
|
36
49
|
case ExpressionType::COMPARE_EQUAL:
|
37
50
|
null_sensitive.emplace_back(lhs_orders.size());
|
@@ -67,21 +80,32 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
|
|
67
80
|
class AsOfGlobalSinkState : public GlobalSinkState {
|
68
81
|
public:
|
69
82
|
AsOfGlobalSinkState(ClientContext &context, const PhysicalAsOfJoin &op)
|
70
|
-
:
|
71
|
-
op.estimated_cardinality),
|
83
|
+
: rhs_sink(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {}, op.estimated_cardinality),
|
72
84
|
is_outer(IsRightOuterJoin(op.join_type)), has_null(false) {
|
73
85
|
}
|
74
86
|
|
75
87
|
idx_t Count() const {
|
76
|
-
return
|
88
|
+
return rhs_sink.count;
|
77
89
|
}
|
78
90
|
|
79
|
-
|
91
|
+
PartitionLocalSinkState *RegisterBuffer(ClientContext &context) {
|
92
|
+
lock_guard<mutex> guard(lock);
|
93
|
+
lhs_buffers.emplace_back(make_uniq<PartitionLocalSinkState>(context, *lhs_sink));
|
94
|
+
return lhs_buffers.back().get();
|
95
|
+
}
|
96
|
+
|
97
|
+
PartitionGlobalSinkState rhs_sink;
|
80
98
|
|
81
99
|
// One per partition
|
82
100
|
const bool is_outer;
|
83
101
|
vector<OuterJoinMarker> right_outers;
|
84
102
|
bool has_null;
|
103
|
+
|
104
|
+
// Left side buffering
|
105
|
+
unique_ptr<PartitionGlobalSinkState> lhs_sink;
|
106
|
+
|
107
|
+
mutex lock;
|
108
|
+
vector<unique_ptr<PartitionLocalSinkState>> lhs_buffers;
|
85
109
|
};
|
86
110
|
|
87
111
|
class AsOfLocalSinkState : public LocalSinkState {
|
@@ -108,7 +132,7 @@ unique_ptr<GlobalSinkState> PhysicalAsOfJoin::GetGlobalSinkState(ClientContext &
|
|
108
132
|
unique_ptr<LocalSinkState> PhysicalAsOfJoin::GetLocalSinkState(ExecutionContext &context) const {
|
109
133
|
// We only sink the RHS
|
110
134
|
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
111
|
-
return make_uniq<AsOfLocalSinkState>(context.client, gsink.
|
135
|
+
return make_uniq<AsOfLocalSinkState>(context.client, gsink.rhs_sink);
|
112
136
|
}
|
113
137
|
|
114
138
|
SinkResultType PhysicalAsOfJoin::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
@@ -131,15 +155,21 @@ SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, Cl
|
|
131
155
|
GlobalSinkState &gstate_p) const {
|
132
156
|
auto &gstate = gstate_p.Cast<AsOfGlobalSinkState>();
|
133
157
|
|
158
|
+
// The data is all in so we can initialise the left partitioning.
|
159
|
+
const vector<unique_ptr<BaseStatistics>> partitions_stats;
|
160
|
+
gstate.lhs_sink = make_uniq<PartitionGlobalSinkState>(context, lhs_partitions, lhs_orders, children[0]->types,
|
161
|
+
partitions_stats, 0);
|
162
|
+
gstate.lhs_sink->SyncPartitioning(gstate.rhs_sink);
|
163
|
+
|
134
164
|
// Find the first group to sort
|
135
|
-
auto &groups = gstate.
|
165
|
+
auto &groups = gstate.rhs_sink.grouping_data->GetPartitions();
|
136
166
|
if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
|
137
167
|
// Empty input!
|
138
168
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
139
169
|
}
|
140
170
|
|
141
171
|
// Schedule all the sorts for maximum thread utilisation
|
142
|
-
auto new_event = make_shared<PartitionMergeEvent>(gstate.
|
172
|
+
auto new_event = make_shared<PartitionMergeEvent>(gstate.rhs_sink, pipeline);
|
143
173
|
event.InsertEvent(std::move(new_event));
|
144
174
|
|
145
175
|
return SinkFinalizeType::READY;
|
@@ -152,10 +182,10 @@ class AsOfGlobalState : public GlobalOperatorState {
|
|
152
182
|
public:
|
153
183
|
explicit AsOfGlobalState(AsOfGlobalSinkState &gsink) {
|
154
184
|
// for FULL/RIGHT OUTER JOIN, initialize right_outers to false for every tuple
|
155
|
-
auto &
|
185
|
+
auto &rhs_partition = gsink.rhs_sink;
|
156
186
|
auto &right_outers = gsink.right_outers;
|
157
|
-
right_outers.reserve(
|
158
|
-
for (const auto &hash_group :
|
187
|
+
right_outers.reserve(rhs_partition.hash_groups.size());
|
188
|
+
for (const auto &hash_group : rhs_partition.hash_groups) {
|
159
189
|
right_outers.emplace_back(OuterJoinMarker(gsink.is_outer));
|
160
190
|
right_outers.back().Initialize(hash_group->count);
|
161
191
|
}
|
@@ -169,79 +199,47 @@ unique_ptr<GlobalOperatorState> PhysicalAsOfJoin::GetGlobalOperatorState(ClientC
|
|
169
199
|
|
170
200
|
class AsOfLocalState : public CachingOperatorState {
|
171
201
|
public:
|
172
|
-
|
173
|
-
|
202
|
+
AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op)
|
203
|
+
: context(context), allocator(Allocator::Get(context)), op(op), lhs_executor(context),
|
204
|
+
left_outer(IsLeftOuterJoin(op.join_type)), fetch_next_left(true) {
|
205
|
+
lhs_keys.Initialize(allocator, op.join_key_types);
|
206
|
+
for (const auto &cond : op.conditions) {
|
207
|
+
lhs_executor.AddExpression(*cond.left);
|
208
|
+
}
|
174
209
|
|
175
|
-
|
210
|
+
lhs_payload.Initialize(allocator, op.children[0]->types);
|
211
|
+
lhs_sel.Initialize();
|
212
|
+
left_outer.Initialize(STANDARD_VECTOR_SIZE);
|
176
213
|
|
177
|
-
|
178
|
-
|
214
|
+
auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
|
215
|
+
lhs_partition_sink = gsink.RegisterBuffer(context);
|
216
|
+
}
|
179
217
|
|
180
|
-
|
218
|
+
bool Sink(DataChunk &input);
|
219
|
+
OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk);
|
181
220
|
|
182
221
|
ClientContext &context;
|
183
222
|
Allocator &allocator;
|
184
223
|
const PhysicalAsOfJoin &op;
|
185
|
-
BufferManager &buffer_manager;
|
186
|
-
const bool force_external;
|
187
|
-
Orders lhs_orders;
|
188
224
|
|
189
|
-
// LHS sorting
|
190
225
|
ExpressionExecutor lhs_executor;
|
191
226
|
DataChunk lhs_keys;
|
192
227
|
ValidityMask lhs_valid_mask;
|
193
228
|
SelectionVector lhs_sel;
|
194
|
-
|
195
|
-
RowLayout lhs_layout;
|
196
|
-
unique_ptr<GlobalSortState> lhs_global_state;
|
197
|
-
DataChunk lhs_sorted;
|
198
|
-
|
199
|
-
// LHS binning
|
200
|
-
Vector hash_vector;
|
201
|
-
Vector bin_vector;
|
229
|
+
DataChunk lhs_payload;
|
202
230
|
|
203
|
-
// Output
|
204
|
-
idx_t lhs_match_count;
|
205
|
-
SelectionVector lhs_matched;
|
206
231
|
OuterJoinMarker left_outer;
|
207
232
|
bool fetch_next_left;
|
208
|
-
DataChunk group_payload;
|
209
|
-
DataChunk rhs_payload;
|
210
|
-
};
|
211
233
|
|
212
|
-
|
213
|
-
|
214
|
-
buffer_manager(BufferManager::GetBufferManager(context)), force_external(force_external), lhs_executor(context),
|
215
|
-
hash_vector(LogicalType::HASH), bin_vector(LogicalType::HASH), left_outer(IsLeftOuterJoin(op.join_type)),
|
216
|
-
fetch_next_left(true) {
|
217
|
-
vector<unique_ptr<BaseStatistics>> partition_stats;
|
218
|
-
Orders partitions; // Not used.
|
219
|
-
PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
|
220
|
-
partition_stats);
|
221
|
-
|
222
|
-
// We sort the row numbers of the incoming block, not the rows
|
223
|
-
lhs_layout.Initialize({LogicalType::UINTEGER});
|
224
|
-
lhs_sorted.Initialize(allocator, lhs_layout.GetTypes());
|
225
|
-
|
226
|
-
lhs_keys.Initialize(allocator, op.join_key_types);
|
227
|
-
for (const auto &cond : op.conditions) {
|
228
|
-
lhs_executor.AddExpression(*cond.left);
|
229
|
-
}
|
230
|
-
|
231
|
-
group_payload.Initialize(allocator, op.children[1]->types);
|
232
|
-
rhs_payload.Initialize(allocator, op.children[1]->types);
|
233
|
-
|
234
|
-
lhs_matched.Initialize();
|
235
|
-
lhs_sel.Initialize();
|
236
|
-
left_outer.Initialize(STANDARD_VECTOR_SIZE);
|
237
|
-
}
|
234
|
+
optional_ptr<PartitionLocalSinkState> lhs_partition_sink;
|
235
|
+
};
|
238
236
|
|
239
|
-
|
237
|
+
bool AsOfLocalState::Sink(DataChunk &input) {
|
240
238
|
// Compute the join keys
|
241
239
|
lhs_keys.Reset();
|
242
240
|
lhs_executor.Execute(input, lhs_keys);
|
243
241
|
|
244
|
-
//
|
242
|
+
// Combine the NULLs
|
245
243
|
const auto count = input.size();
|
246
244
|
lhs_valid_mask.Reset();
|
247
245
|
for (auto col_idx : op.null_sensitive) {
|
@@ -251,17 +249,19 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
|
|
251
249
|
lhs_valid_mask.Combine(unified.validity, count);
|
252
250
|
}
|
253
251
|
|
254
|
-
// Convert the mask to a selection vector
|
255
|
-
//
|
256
|
-
lhs_valid = 0;
|
252
|
+
// Convert the mask to a selection vector
|
253
|
+
// and mark all the rows that cannot match for early return.
|
254
|
+
idx_t lhs_valid = 0;
|
257
255
|
const auto entry_count = lhs_valid_mask.EntryCount(count);
|
258
256
|
idx_t base_idx = 0;
|
257
|
+
left_outer.Reset();
|
259
258
|
for (idx_t entry_idx = 0; entry_idx < entry_count;) {
|
260
259
|
const auto validity_entry = lhs_valid_mask.GetValidityEntry(entry_idx++);
|
261
260
|
const auto next = MinValue<idx_t>(base_idx + ValidityMask::BITS_PER_VALUE, count);
|
262
261
|
if (ValidityMask::AllValid(validity_entry)) {
|
263
262
|
for (; base_idx < next; ++base_idx) {
|
264
263
|
lhs_sel.set_index(lhs_valid++, base_idx);
|
264
|
+
left_outer.SetMatch(base_idx);
|
265
265
|
}
|
266
266
|
} else if (ValidityMask::NoneValid(validity_entry)) {
|
267
267
|
base_idx = next;
|
@@ -270,120 +270,237 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
|
|
270
270
|
for (; base_idx < next; ++base_idx) {
|
271
271
|
if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) {
|
272
272
|
lhs_sel.set_index(lhs_valid++, base_idx);
|
273
|
+
left_outer.SetMatch(base_idx);
|
273
274
|
}
|
274
275
|
}
|
275
276
|
}
|
276
277
|
}
|
277
278
|
|
278
279
|
// Slice the keys to the ones we can match
|
279
|
-
|
280
|
-
|
280
|
+
lhs_payload.Reset();
|
281
|
+
if (lhs_valid == count) {
|
282
|
+
lhs_payload.Reference(input);
|
283
|
+
lhs_payload.SetCardinality(input);
|
284
|
+
} else {
|
285
|
+
lhs_payload.Slice(input, lhs_sel, lhs_valid);
|
286
|
+
lhs_payload.SetCardinality(lhs_valid);
|
287
|
+
|
288
|
+
// Flush the ones that can't match
|
289
|
+
fetch_next_left = false;
|
281
290
|
}
|
282
291
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
292
|
+
lhs_partition_sink->Sink(lhs_payload);
|
293
|
+
|
294
|
+
return false;
|
295
|
+
}
|
296
|
+
|
297
|
+
OperatorResultType AsOfLocalState::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk) {
|
298
|
+
input.Verify();
|
299
|
+
Sink(input);
|
300
|
+
|
301
|
+
// If there were any unmatchable rows, return them now so we can forget about them.
|
302
|
+
if (!fetch_next_left) {
|
303
|
+
fetch_next_left = true;
|
304
|
+
left_outer.ConstructLeftJoinResult(input, chunk);
|
305
|
+
left_outer.Reset();
|
306
|
+
}
|
307
|
+
|
308
|
+
// Just keep asking for data and buffering it
|
309
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
310
|
+
}
|
311
|
+
|
312
|
+
OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
313
|
+
GlobalOperatorState &gstate, OperatorState &lstate_p) const {
|
314
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
315
|
+
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
316
|
+
|
317
|
+
if (gsink.rhs_sink.count == 0) {
|
318
|
+
// empty RHS
|
319
|
+
if (!EmptyResultIfRHSIsEmpty()) {
|
320
|
+
ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
|
321
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
322
|
+
} else {
|
323
|
+
return OperatorResultType::FINISHED;
|
293
324
|
}
|
325
|
+
}
|
326
|
+
|
327
|
+
return lstate.ExecuteInternal(context, input, chunk);
|
328
|
+
}
|
294
329
|
|
295
|
-
|
296
|
-
|
297
|
-
|
330
|
+
//===--------------------------------------------------------------------===//
|
331
|
+
// Source
|
332
|
+
//===--------------------------------------------------------------------===//
|
333
|
+
class AsOfProbeBuffer {
|
334
|
+
public:
|
335
|
+
using Orders = vector<BoundOrderByNode>;
|
336
|
+
|
337
|
+
static bool IsExternal(ClientContext &context) {
|
338
|
+
return ClientConfig::GetConfig(context).force_external;
|
298
339
|
}
|
299
340
|
|
300
|
-
|
301
|
-
lhs_global_state = make_uniq<GlobalSortState>(buffer_manager, lhs_orders, lhs_layout);
|
302
|
-
auto &global_state = *lhs_global_state;
|
303
|
-
LocalSortState local_sort;
|
304
|
-
local_sort.Initialize(*lhs_global_state, buffer_manager);
|
341
|
+
AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op);
|
305
342
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
343
|
+
public:
|
344
|
+
void ResolveJoin(bool *found_matches, idx_t *matches = nullptr);
|
345
|
+
bool Scanning() const {
|
346
|
+
return lhs_scanner.get();
|
347
|
+
}
|
348
|
+
void BeginLeftScan(hash_t scan_bin);
|
349
|
+
bool NextLeft();
|
350
|
+
void EndScan();
|
351
|
+
|
352
|
+
// resolve joins that output max N elements (SEMI, ANTI, MARK)
|
353
|
+
void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk);
|
354
|
+
// resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
|
355
|
+
void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk);
|
356
|
+
// Chunk may be empty
|
357
|
+
void GetData(ExecutionContext &context, DataChunk &chunk);
|
358
|
+
bool HasMoreData() const {
|
359
|
+
return !fetch_next_left || (lhs_scanner && lhs_scanner->Remaining());
|
360
|
+
}
|
361
|
+
|
362
|
+
ClientContext &context;
|
363
|
+
Allocator &allocator;
|
364
|
+
const PhysicalAsOfJoin &op;
|
365
|
+
BufferManager &buffer_manager;
|
366
|
+
const bool force_external;
|
367
|
+
const idx_t memory_per_thread;
|
368
|
+
Orders lhs_orders;
|
369
|
+
|
370
|
+
// LHS scanning
|
371
|
+
SelectionVector lhs_sel;
|
372
|
+
optional_ptr<PartitionGlobalHashGroup> left_hash;
|
373
|
+
OuterJoinMarker left_outer;
|
374
|
+
unique_ptr<SBIterator> left_itr;
|
375
|
+
unique_ptr<PayloadScanner> lhs_scanner;
|
376
|
+
DataChunk lhs_payload;
|
377
|
+
|
378
|
+
// RHS scanning
|
379
|
+
optional_ptr<PartitionGlobalHashGroup> right_hash;
|
380
|
+
optional_ptr<OuterJoinMarker> right_outer;
|
381
|
+
unique_ptr<SBIterator> right_itr;
|
382
|
+
unique_ptr<PayloadScanner> rhs_scanner;
|
383
|
+
DataChunk rhs_payload;
|
384
|
+
|
385
|
+
idx_t lhs_match_count;
|
386
|
+
bool fetch_next_left;
|
387
|
+
};
|
388
|
+
|
389
|
+
AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op)
|
390
|
+
: context(context), allocator(Allocator::Get(context)), op(op),
|
391
|
+
buffer_manager(BufferManager::GetBufferManager(context)), force_external(IsExternal(context)),
|
392
|
+
memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)),
|
393
|
+
fetch_next_left(true) {
|
394
|
+
vector<unique_ptr<BaseStatistics>> partition_stats;
|
395
|
+
Orders partitions; // Not used.
|
396
|
+
PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
|
397
|
+
partition_stats);
|
398
|
+
|
399
|
+
// We sort the row numbers of the incoming block, not the rows
|
400
|
+
lhs_payload.Initialize(allocator, op.children[0]->types);
|
401
|
+
rhs_payload.Initialize(allocator, op.children[1]->types);
|
311
402
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
403
|
+
lhs_sel.Initialize();
|
404
|
+
left_outer.Initialize(STANDARD_VECTOR_SIZE);
|
405
|
+
}
|
406
|
+
|
407
|
+
void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
|
408
|
+
auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
|
409
|
+
auto &lhs_sink = *gsink.lhs_sink;
|
410
|
+
const auto left_group = lhs_sink.bin_groups[scan_bin];
|
411
|
+
if (left_group >= lhs_sink.bin_groups.size()) {
|
412
|
+
return;
|
320
413
|
}
|
321
414
|
|
322
|
-
|
323
|
-
|
415
|
+
auto iterator_comp = ExpressionType::INVALID;
|
416
|
+
switch (op.comparison_type) {
|
417
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
418
|
+
iterator_comp = ExpressionType::COMPARE_LESSTHANOREQUALTO;
|
419
|
+
break;
|
420
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
421
|
+
iterator_comp = ExpressionType::COMPARE_LESSTHAN;
|
422
|
+
break;
|
423
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
424
|
+
iterator_comp = ExpressionType::COMPARE_GREATERTHANOREQUALTO;
|
425
|
+
break;
|
426
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
427
|
+
iterator_comp = ExpressionType::COMPARE_GREATERTHAN;
|
428
|
+
break;
|
429
|
+
default:
|
430
|
+
throw NotImplementedException("Unsupported comparison type for ASOF join");
|
431
|
+
}
|
324
432
|
|
325
|
-
|
326
|
-
|
327
|
-
|
433
|
+
left_hash = lhs_sink.hash_groups[left_group].get();
|
434
|
+
auto &left_sort = *(left_hash->global_sort);
|
435
|
+
lhs_scanner = make_uniq<PayloadScanner>(left_sort, false);
|
436
|
+
left_itr = make_uniq<SBIterator>(left_sort, iterator_comp);
|
437
|
+
|
438
|
+
// We are only probing the corresponding right side bin, which may be empty
|
439
|
+
// If they are empty, we leave the iterator as null so we can emit left matches
|
440
|
+
auto &rhs_sink = gsink.rhs_sink;
|
441
|
+
const auto right_group = rhs_sink.bin_groups[scan_bin];
|
442
|
+
if (right_group < rhs_sink.bin_groups.size()) {
|
443
|
+
right_hash = rhs_sink.hash_groups[right_group].get();
|
444
|
+
right_outer = gsink.right_outers.data() + right_group;
|
445
|
+
auto &right_sort = *(right_hash->global_sort);
|
446
|
+
right_itr = make_uniq<SBIterator>(right_sort, iterator_comp);
|
447
|
+
rhs_scanner = make_uniq<PayloadScanner>(right_sort, false);
|
448
|
+
}
|
328
449
|
}
|
329
450
|
|
330
|
-
|
331
|
-
|
332
|
-
|
451
|
+
bool AsOfProbeBuffer::NextLeft() {
|
452
|
+
if (!HasMoreData()) {
|
453
|
+
return false;
|
454
|
+
}
|
333
455
|
|
334
|
-
|
335
|
-
|
456
|
+
// Scan the next sorted chunk
|
457
|
+
lhs_payload.Reset();
|
458
|
+
left_itr->SetIndex(lhs_scanner->Scanned());
|
459
|
+
lhs_scanner->Scan(lhs_payload);
|
336
460
|
|
337
|
-
|
338
|
-
|
339
|
-
UnifiedVectorFormat bin_unified;
|
340
|
-
bin_vector.ToUnifiedFormat(lhs_valid, bin_unified);
|
341
|
-
const auto bins = UnifiedVectorFormat::GetData<hash_t>(bin_unified);
|
461
|
+
return true;
|
462
|
+
}
|
342
463
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
464
|
+
void AsOfProbeBuffer::EndScan() {
|
465
|
+
right_hash = nullptr;
|
466
|
+
right_itr.reset();
|
467
|
+
rhs_scanner.reset();
|
468
|
+
right_outer = nullptr;
|
469
|
+
|
470
|
+
left_hash = nullptr;
|
471
|
+
left_itr.reset();
|
472
|
+
lhs_scanner.reset();
|
473
|
+
}
|
474
|
+
|
475
|
+
void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
|
476
|
+
// If there was no right partition, there are no matches
|
349
477
|
lhs_match_count = 0;
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
// No matching partition
|
361
|
-
hash_group = nullptr;
|
362
|
-
right_outer = nullptr;
|
363
|
-
right.reset();
|
364
|
-
continue;
|
365
|
-
}
|
366
|
-
hash_group = global_partition.hash_groups[group_idx].get();
|
367
|
-
right_outer = gsink.right_outers.data() + group_idx;
|
368
|
-
right = make_uniq<SBIterator>(*(hash_group->global_sort), ExpressionType::COMPARE_LESSTHANOREQUALTO);
|
369
|
-
}
|
370
|
-
left.SetIndex(i);
|
478
|
+
left_outer.Reset();
|
479
|
+
if (!right_itr) {
|
480
|
+
return;
|
481
|
+
}
|
482
|
+
|
483
|
+
const auto count = lhs_payload.size();
|
484
|
+
const auto left_base = left_itr->GetIndex();
|
485
|
+
// Searching for right <= left
|
486
|
+
for (idx_t i = 0; i < count; ++i) {
|
487
|
+
left_itr->SetIndex(left_base + i);
|
371
488
|
|
372
489
|
// If right > left, then there is no match
|
373
|
-
if (!
|
490
|
+
if (!right_itr->Compare(*left_itr)) {
|
374
491
|
continue;
|
375
492
|
}
|
376
493
|
|
377
494
|
// Exponential search forward for a non-matching value using radix iterators
|
378
495
|
// (We use exponential search to avoid thrashing the block manager on large probes)
|
379
496
|
idx_t bound = 1;
|
380
|
-
idx_t begin =
|
381
|
-
|
382
|
-
while (
|
383
|
-
if (
|
497
|
+
idx_t begin = right_itr->GetIndex();
|
498
|
+
right_itr->SetIndex(begin + bound);
|
499
|
+
while (right_itr->GetIndex() < right_hash->count) {
|
500
|
+
if (right_itr->Compare(*left_itr)) {
|
384
501
|
// If right <= left, jump ahead
|
385
502
|
bound *= 2;
|
386
|
-
|
503
|
+
right_itr->SetIndex(begin + bound);
|
387
504
|
} else {
|
388
505
|
break;
|
389
506
|
}
|
@@ -392,255 +509,298 @@ void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<
|
|
392
509
|
// Binary search for the first non-matching value using radix iterators
|
393
510
|
// The previous value (which we know exists) is the match
|
394
511
|
auto first = begin + bound / 2;
|
395
|
-
auto last = MinValue<idx_t>(begin + bound,
|
512
|
+
auto last = MinValue<idx_t>(begin + bound, right_hash->count);
|
396
513
|
while (first < last) {
|
397
514
|
const auto mid = first + (last - first) / 2;
|
398
|
-
|
399
|
-
if (
|
515
|
+
right_itr->SetIndex(mid);
|
516
|
+
if (right_itr->Compare(*left_itr)) {
|
400
517
|
// If right <= left, new lower bound
|
401
518
|
first = mid + 1;
|
402
519
|
} else {
|
403
520
|
last = mid;
|
404
521
|
}
|
405
522
|
}
|
406
|
-
|
523
|
+
right_itr->SetIndex(--first);
|
407
524
|
|
408
525
|
// Check partitions for strict equality
|
409
|
-
if (
|
526
|
+
if (right_hash->ComparePartitions(*left_itr, *right_itr)) {
|
410
527
|
continue;
|
411
528
|
}
|
412
529
|
|
413
530
|
// Emit match data
|
414
531
|
right_outer->SetMatch(first);
|
415
|
-
left_outer.SetMatch(
|
532
|
+
left_outer.SetMatch(i);
|
416
533
|
if (found_match) {
|
417
|
-
found_match[
|
534
|
+
found_match[i] = true;
|
418
535
|
}
|
419
536
|
if (matches) {
|
420
|
-
matches[
|
537
|
+
matches[i] = first;
|
421
538
|
}
|
422
|
-
|
539
|
+
lhs_sel.set_index(lhs_match_count++, i);
|
423
540
|
}
|
424
541
|
}
|
425
542
|
|
426
543
|
unique_ptr<OperatorState> PhysicalAsOfJoin::GetOperatorState(ExecutionContext &context) const {
|
427
|
-
|
428
|
-
return make_uniq<AsOfLocalState>(context.client, *this, config.force_external);
|
544
|
+
return make_uniq<AsOfLocalState>(context.client, *this);
|
429
545
|
}
|
430
546
|
|
431
|
-
void
|
432
|
-
OperatorState &lstate_p) const {
|
433
|
-
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
434
|
-
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
435
|
-
|
547
|
+
void AsOfProbeBuffer::ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk) {
|
436
548
|
// perform the actual join
|
437
549
|
bool found_match[STANDARD_VECTOR_SIZE] = {false};
|
438
|
-
|
550
|
+
ResolveJoin(found_match);
|
439
551
|
|
440
552
|
// now construct the result based on the join result
|
441
|
-
switch (join_type) {
|
442
|
-
case JoinType::MARK: {
|
443
|
-
PhysicalJoin::ConstructMarkJoinResult(lstate.lhs_keys, input, chunk, found_match, gsink.has_null);
|
444
|
-
break;
|
445
|
-
}
|
553
|
+
switch (op.join_type) {
|
446
554
|
case JoinType::SEMI:
|
447
|
-
PhysicalJoin::ConstructSemiJoinResult(
|
555
|
+
PhysicalJoin::ConstructSemiJoinResult(lhs_payload, chunk, found_match);
|
448
556
|
break;
|
449
557
|
case JoinType::ANTI:
|
450
|
-
PhysicalJoin::ConstructAntiJoinResult(
|
558
|
+
PhysicalJoin::ConstructAntiJoinResult(lhs_payload, chunk, found_match);
|
451
559
|
break;
|
452
560
|
default:
|
453
561
|
throw NotImplementedException("Unimplemented join type for AsOf join");
|
454
562
|
}
|
455
563
|
}
|
456
564
|
|
457
|
-
|
458
|
-
OperatorState &lstate_p) const {
|
459
|
-
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
460
|
-
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
461
|
-
|
462
|
-
if (!lstate.fetch_next_left) {
|
463
|
-
lstate.fetch_next_left = true;
|
464
|
-
if (lstate.left_outer.Enabled()) {
|
465
|
-
// left join: before we move to the next chunk, see if we need to output any vectors that didn't
|
466
|
-
// have a match found
|
467
|
-
lstate.left_outer.ConstructLeftJoinResult(input, chunk);
|
468
|
-
lstate.left_outer.Reset();
|
469
|
-
}
|
470
|
-
return OperatorResultType::NEED_MORE_INPUT;
|
471
|
-
}
|
472
|
-
|
565
|
+
void AsOfProbeBuffer::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk) {
|
473
566
|
// perform the actual join
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
hash_t scan_bin = global_partition.bin_groups.size();
|
481
|
-
optional_ptr<PartitionGlobalHashGroup> hash_group;
|
482
|
-
unique_ptr<PayloadScanner> scanner;
|
483
|
-
for (idx_t i = 0; i < lstate.lhs_match_count; ++i) {
|
484
|
-
const auto idx = lstate.lhs_matched[i];
|
485
|
-
const auto match_bin = matches[idx].first;
|
486
|
-
const auto match_pos = matches[idx].second;
|
487
|
-
if (match_bin != scan_bin) {
|
488
|
-
// Grab the next group
|
489
|
-
const auto group_idx = global_partition.bin_groups[match_bin];
|
490
|
-
hash_group = global_partition.hash_groups[group_idx].get();
|
491
|
-
scan_bin = match_bin;
|
492
|
-
scanner = make_uniq<PayloadScanner>(*hash_group->global_sort, false);
|
493
|
-
lstate.group_payload.Reset();
|
494
|
-
}
|
567
|
+
idx_t matches[STANDARD_VECTOR_SIZE];
|
568
|
+
ResolveJoin(nullptr, matches);
|
569
|
+
|
570
|
+
for (idx_t i = 0; i < lhs_match_count; ++i) {
|
571
|
+
const auto idx = lhs_sel[i];
|
572
|
+
const auto match_pos = matches[idx];
|
495
573
|
// Skip to the range containing the match
|
496
|
-
while (match_pos >=
|
497
|
-
|
498
|
-
|
574
|
+
while (match_pos >= rhs_scanner->Scanned()) {
|
575
|
+
rhs_payload.Reset();
|
576
|
+
rhs_scanner->Scan(rhs_payload);
|
499
577
|
}
|
500
578
|
// Append the individual values
|
501
579
|
// TODO: Batch the copies
|
502
|
-
const auto source_offset = match_pos - (
|
503
|
-
for (
|
504
|
-
const auto rhs_idx = right_projection_map[col_idx];
|
505
|
-
auto &source =
|
506
|
-
auto &target = chunk.data[
|
580
|
+
const auto source_offset = match_pos - (rhs_scanner->Scanned() - rhs_payload.size());
|
581
|
+
for (column_t col_idx = 0; col_idx < op.right_projection_map.size(); ++col_idx) {
|
582
|
+
const auto rhs_idx = op.right_projection_map[col_idx];
|
583
|
+
auto &source = rhs_payload.data[rhs_idx];
|
584
|
+
auto &target = chunk.data[lhs_payload.ColumnCount() + col_idx];
|
507
585
|
VectorOperations::Copy(source, target, source_offset + 1, source_offset, i);
|
508
586
|
}
|
509
587
|
}
|
510
588
|
|
511
|
-
// Slice the
|
512
|
-
|
513
|
-
|
514
|
-
// If we are doing a left join, come back for the NULLs
|
515
|
-
if (lstate.left_outer.Enabled()) {
|
516
|
-
lstate.fetch_next_left = false;
|
517
|
-
return OperatorResultType::HAVE_MORE_OUTPUT;
|
589
|
+
// Slice the left payload into the result
|
590
|
+
for (column_t i = 0; i < lhs_payload.ColumnCount(); ++i) {
|
591
|
+
chunk.data[i].Slice(lhs_payload.data[i], lhs_sel, lhs_match_count);
|
518
592
|
}
|
593
|
+
chunk.SetCardinality(lhs_match_count);
|
519
594
|
|
520
|
-
|
595
|
+
// If we are doing a left join, come back for the NULLs
|
596
|
+
fetch_next_left = !left_outer.Enabled();
|
521
597
|
}
|
522
598
|
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
} else {
|
533
|
-
return OperatorResultType::FINISHED;
|
599
|
+
void AsOfProbeBuffer::GetData(ExecutionContext &context, DataChunk &chunk) {
|
600
|
+
// Handle dangling left join results from current chunk
|
601
|
+
if (!fetch_next_left) {
|
602
|
+
fetch_next_left = true;
|
603
|
+
if (left_outer.Enabled()) {
|
604
|
+
// left join: before we move to the next chunk, see if we need to output any vectors that didn't
|
605
|
+
// have a match found
|
606
|
+
left_outer.ConstructLeftJoinResult(lhs_payload, chunk);
|
607
|
+
left_outer.Reset();
|
534
608
|
}
|
609
|
+
return;
|
535
610
|
}
|
536
611
|
|
537
|
-
|
538
|
-
|
612
|
+
// Stop if there is no more data
|
613
|
+
if (!NextLeft()) {
|
614
|
+
return;
|
615
|
+
}
|
616
|
+
|
617
|
+
switch (op.join_type) {
|
539
618
|
case JoinType::SEMI:
|
540
619
|
case JoinType::ANTI:
|
541
620
|
case JoinType::MARK:
|
542
621
|
// simple joins can have max STANDARD_VECTOR_SIZE matches per chunk
|
543
|
-
ResolveSimpleJoin(context,
|
544
|
-
|
622
|
+
ResolveSimpleJoin(context, chunk);
|
623
|
+
break;
|
545
624
|
case JoinType::LEFT:
|
546
625
|
case JoinType::INNER:
|
547
626
|
case JoinType::RIGHT:
|
548
627
|
case JoinType::OUTER:
|
549
|
-
|
628
|
+
ResolveComplexJoin(context, chunk);
|
629
|
+
break;
|
550
630
|
default:
|
551
631
|
throw NotImplementedException("Unimplemented type for as-of join!");
|
552
632
|
}
|
553
633
|
}
|
554
634
|
|
555
|
-
//===--------------------------------------------------------------------===//
|
556
|
-
// Source
|
557
|
-
//===--------------------------------------------------------------------===//
|
558
635
|
class AsOfGlobalSourceState : public GlobalSourceState {
|
559
636
|
public:
|
560
|
-
explicit AsOfGlobalSourceState(
|
637
|
+
explicit AsOfGlobalSourceState(AsOfGlobalSinkState &gsink_p)
|
638
|
+
: gsink(gsink_p), next_combine(0), combined(0), merged(0), mergers(0), next_left(0), flushed(0), next_right(0) {
|
561
639
|
}
|
562
640
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
public:
|
568
|
-
idx_t MaxThreads() override {
|
569
|
-
// If there is only one partition, we have to process it on one thread.
|
570
|
-
if (!gsink.grouping_data) {
|
571
|
-
return 1;
|
641
|
+
PartitionGlobalMergeStates &GetMergeStates() {
|
642
|
+
lock_guard<mutex> guard(lock);
|
643
|
+
if (!merge_states) {
|
644
|
+
merge_states = make_uniq<PartitionGlobalMergeStates>(*gsink.lhs_sink);
|
572
645
|
}
|
646
|
+
return *merge_states;
|
647
|
+
}
|
573
648
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
649
|
+
AsOfGlobalSinkState &gsink;
|
650
|
+
//! The next buffer to combine
|
651
|
+
atomic<size_t> next_combine;
|
652
|
+
//! The number of combined buffers
|
653
|
+
atomic<size_t> combined;
|
654
|
+
//! The number of combined buffers
|
655
|
+
atomic<size_t> merged;
|
656
|
+
//! The number of combined buffers
|
657
|
+
atomic<size_t> mergers;
|
658
|
+
//! The next buffer to flush
|
659
|
+
atomic<size_t> next_left;
|
660
|
+
//! The number of flushed buffers
|
661
|
+
atomic<size_t> flushed;
|
662
|
+
//! The right outer output read position.
|
663
|
+
atomic<idx_t> next_right;
|
664
|
+
//! The merge handler
|
665
|
+
mutex lock;
|
666
|
+
unique_ptr<PartitionGlobalMergeStates> merge_states;
|
578
667
|
|
579
|
-
|
668
|
+
public:
|
669
|
+
idx_t MaxThreads() override {
|
670
|
+
return gsink.lhs_buffers.size();
|
580
671
|
}
|
581
672
|
};
|
582
673
|
|
583
674
|
unique_ptr<GlobalSourceState> PhysicalAsOfJoin::GetGlobalSourceState(ClientContext &context) const {
|
584
675
|
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
585
|
-
return make_uniq<AsOfGlobalSourceState>(gsink
|
676
|
+
return make_uniq<AsOfGlobalSourceState>(gsink);
|
586
677
|
}
|
587
678
|
|
588
679
|
class AsOfLocalSourceState : public LocalSourceState {
|
589
680
|
public:
|
590
681
|
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
591
682
|
|
592
|
-
|
683
|
+
AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op);
|
684
|
+
|
685
|
+
void CombineLeftPartitions();
|
686
|
+
void MergeLeftPartitions();
|
593
687
|
|
594
|
-
idx_t
|
688
|
+
idx_t BeginRightScan(const idx_t hash_bin);
|
595
689
|
|
596
|
-
|
690
|
+
AsOfGlobalSourceState &gsource;
|
691
|
+
|
692
|
+
//! The left side partition being probed
|
693
|
+
AsOfProbeBuffer probe_buffer;
|
597
694
|
|
598
695
|
//! The read partition
|
599
696
|
idx_t hash_bin;
|
600
697
|
HashGroupPtr hash_group;
|
601
|
-
|
602
698
|
//! The read cursor
|
603
699
|
unique_ptr<PayloadScanner> scanner;
|
604
|
-
//! Buffer for the inputs
|
605
|
-
DataChunk input_chunk;
|
606
700
|
//! Pointer to the matches
|
607
|
-
const bool *found_match;
|
701
|
+
const bool *found_match = {};
|
608
702
|
};
|
609
703
|
|
610
|
-
AsOfLocalSourceState::AsOfLocalSourceState(
|
611
|
-
|
704
|
+
AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op)
|
705
|
+
: gsource(gsource), probe_buffer(gsource.gsink.lhs_sink->context, op) {
|
706
|
+
gsource.mergers++;
|
707
|
+
}
|
708
|
+
|
709
|
+
void AsOfLocalSourceState::CombineLeftPartitions() {
|
710
|
+
const auto buffer_count = gsource.gsink.lhs_buffers.size();
|
711
|
+
while (gsource.combined < buffer_count) {
|
712
|
+
const auto next_combine = gsource.next_combine++;
|
713
|
+
if (next_combine < buffer_count) {
|
714
|
+
gsource.gsink.lhs_buffers[next_combine]->Combine();
|
715
|
+
++gsource.combined;
|
716
|
+
} else {
|
717
|
+
std::this_thread::yield();
|
718
|
+
}
|
719
|
+
}
|
720
|
+
}
|
721
|
+
|
722
|
+
void AsOfLocalSourceState::MergeLeftPartitions() {
|
723
|
+
PartitionGlobalMergeStates::Callback local_callback;
|
724
|
+
PartitionLocalMergeState local_merge;
|
725
|
+
gsource.GetMergeStates().ExecuteTask(local_merge, local_callback);
|
726
|
+
gsource.merged++;
|
727
|
+
while (gsource.merged < gsource.mergers) {
|
728
|
+
std::this_thread::yield();
|
729
|
+
}
|
612
730
|
}
|
613
731
|
|
614
|
-
idx_t AsOfLocalSourceState::
|
615
|
-
// Get rid of any stale data
|
732
|
+
idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
|
616
733
|
hash_bin = hash_bin_p;
|
617
734
|
|
618
|
-
hash_group = std::move(
|
735
|
+
hash_group = std::move(gsource.gsink.rhs_sink.hash_groups[hash_bin]);
|
619
736
|
scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
|
620
|
-
found_match =
|
737
|
+
found_match = gsource.gsink.right_outers[hash_bin].GetMatches();
|
621
738
|
|
622
739
|
return scanner->Remaining();
|
623
740
|
}
|
624
741
|
|
625
742
|
unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
|
626
743
|
GlobalSourceState &gstate) const {
|
627
|
-
auto &
|
628
|
-
return make_uniq<AsOfLocalSourceState>(
|
744
|
+
auto &gsource = gstate.Cast<AsOfGlobalSourceState>();
|
745
|
+
return make_uniq<AsOfLocalSourceState>(gsource, *this);
|
629
746
|
}
|
630
747
|
|
631
748
|
SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &chunk,
|
632
749
|
OperatorSourceInput &input) const {
|
633
|
-
D_ASSERT(IsRightOuterJoin(join_type));
|
634
|
-
|
635
750
|
auto &gsource = input.global_state.Cast<AsOfGlobalSourceState>();
|
636
751
|
auto &lsource = input.local_state.Cast<AsOfLocalSourceState>();
|
637
|
-
auto &
|
752
|
+
auto &rhs_sink = gsource.gsink.rhs_sink;
|
753
|
+
|
754
|
+
// Step 1: Combine the partitions
|
755
|
+
lsource.CombineLeftPartitions();
|
756
|
+
|
757
|
+
// Step 2: Sort on all threads
|
758
|
+
lsource.MergeLeftPartitions();
|
759
|
+
|
760
|
+
// Step 3: Join the partitions
|
761
|
+
auto &lhs_sink = *gsource.gsink.lhs_sink;
|
762
|
+
auto &partitions = lhs_sink.grouping_data->GetPartitions();
|
763
|
+
const auto left_bins = partitions.size();
|
764
|
+
while (gsource.flushed < left_bins) {
|
765
|
+
// Make sure we have something to flush
|
766
|
+
if (!lsource.probe_buffer.Scanning()) {
|
767
|
+
const auto left_bin = gsource.next_left++;
|
768
|
+
if (left_bin < left_bins) {
|
769
|
+
// More to flush
|
770
|
+
lsource.probe_buffer.BeginLeftScan(left_bin);
|
771
|
+
} else if (!IsRightOuterJoin(join_type)) {
|
772
|
+
return SourceResultType::FINISHED;
|
773
|
+
} else {
|
774
|
+
// Wait for all threads to finish
|
775
|
+
// TODO: How to implement a spin wait correctly?
|
776
|
+
// Returning BLOCKED seems to hang the system.
|
777
|
+
std::this_thread::yield();
|
778
|
+
continue;
|
779
|
+
}
|
780
|
+
}
|
781
|
+
|
782
|
+
lsource.probe_buffer.GetData(context, chunk);
|
783
|
+
if (chunk.size()) {
|
784
|
+
return SourceResultType::HAVE_MORE_OUTPUT;
|
785
|
+
} else if (lsource.probe_buffer.HasMoreData()) {
|
786
|
+
// Join the next partition
|
787
|
+
continue;
|
788
|
+
} else {
|
789
|
+
lsource.probe_buffer.EndScan();
|
790
|
+
gsource.flushed++;
|
791
|
+
}
|
792
|
+
}
|
793
|
+
|
794
|
+
// Step 4: Emit right join matches
|
795
|
+
if (!IsRightOuterJoin(join_type)) {
|
796
|
+
return SourceResultType::FINISHED;
|
797
|
+
}
|
638
798
|
|
639
|
-
auto &hash_groups =
|
640
|
-
const auto
|
799
|
+
auto &hash_groups = rhs_sink.hash_groups;
|
800
|
+
const auto right_groups = hash_groups.size();
|
641
801
|
|
642
802
|
DataChunk rhs_chunk;
|
643
|
-
rhs_chunk.Initialize(Allocator::Get(context.client),
|
803
|
+
rhs_chunk.Initialize(Allocator::Get(context.client), rhs_sink.payload_types);
|
644
804
|
SelectionVector rsel(STANDARD_VECTOR_SIZE);
|
645
805
|
|
646
806
|
while (chunk.size() == 0) {
|
@@ -648,17 +808,17 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
|
|
648
808
|
while (!lsource.scanner || !lsource.scanner->Remaining()) {
|
649
809
|
lsource.scanner.reset();
|
650
810
|
lsource.hash_group.reset();
|
651
|
-
auto hash_bin = gsource.
|
652
|
-
if (hash_bin >=
|
811
|
+
auto hash_bin = gsource.next_right++;
|
812
|
+
if (hash_bin >= right_groups) {
|
653
813
|
return SourceResultType::FINISHED;
|
654
814
|
}
|
655
815
|
|
656
|
-
for (; hash_bin < hash_groups.size(); hash_bin = gsource.
|
816
|
+
for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_right++) {
|
657
817
|
if (hash_groups[hash_bin]) {
|
658
818
|
break;
|
659
819
|
}
|
660
820
|
}
|
661
|
-
lsource.
|
821
|
+
lsource.BeginRightScan(hash_bin);
|
662
822
|
}
|
663
823
|
const auto rhs_position = lsource.scanner->Scanned();
|
664
824
|
lsource.scanner->Scan(rhs_chunk);
|