duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +42 -5
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +3 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +64 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -13,6 +13,8 @@
|
|
13
13
|
#include "duckdb/parallel/event.hpp"
|
14
14
|
#include "duckdb/parallel/thread_context.hpp"
|
15
15
|
|
16
|
+
#include <thread>
|
17
|
+
|
16
18
|
namespace duckdb {
|
17
19
|
|
18
20
|
PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
|
@@ -67,21 +69,32 @@ PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<Physica
|
|
67
69
|
class AsOfGlobalSinkState : public GlobalSinkState {
|
68
70
|
public:
|
69
71
|
AsOfGlobalSinkState(ClientContext &context, const PhysicalAsOfJoin &op)
|
70
|
-
:
|
71
|
-
op.estimated_cardinality),
|
72
|
+
: rhs_sink(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {}, op.estimated_cardinality),
|
72
73
|
is_outer(IsRightOuterJoin(op.join_type)), has_null(false) {
|
73
74
|
}
|
74
75
|
|
75
76
|
idx_t Count() const {
|
76
|
-
return
|
77
|
+
return rhs_sink.count;
|
78
|
+
}
|
79
|
+
|
80
|
+
PartitionLocalSinkState *RegisterBuffer(ClientContext &context) {
|
81
|
+
lock_guard<mutex> guard(lock);
|
82
|
+
lhs_buffers.emplace_back(make_uniq<PartitionLocalSinkState>(context, *lhs_sink));
|
83
|
+
return lhs_buffers.back().get();
|
77
84
|
}
|
78
85
|
|
79
|
-
PartitionGlobalSinkState
|
86
|
+
PartitionGlobalSinkState rhs_sink;
|
80
87
|
|
81
88
|
// One per partition
|
82
89
|
const bool is_outer;
|
83
90
|
vector<OuterJoinMarker> right_outers;
|
84
91
|
bool has_null;
|
92
|
+
|
93
|
+
// Left side buffering
|
94
|
+
unique_ptr<PartitionGlobalSinkState> lhs_sink;
|
95
|
+
|
96
|
+
mutex lock;
|
97
|
+
vector<unique_ptr<PartitionLocalSinkState>> lhs_buffers;
|
85
98
|
};
|
86
99
|
|
87
100
|
class AsOfLocalSinkState : public LocalSinkState {
|
@@ -108,7 +121,7 @@ unique_ptr<GlobalSinkState> PhysicalAsOfJoin::GetGlobalSinkState(ClientContext &
|
|
108
121
|
unique_ptr<LocalSinkState> PhysicalAsOfJoin::GetLocalSinkState(ExecutionContext &context) const {
|
109
122
|
// We only sink the RHS
|
110
123
|
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
111
|
-
return make_uniq<AsOfLocalSinkState>(context.client, gsink.
|
124
|
+
return make_uniq<AsOfLocalSinkState>(context.client, gsink.rhs_sink);
|
112
125
|
}
|
113
126
|
|
114
127
|
SinkResultType PhysicalAsOfJoin::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
@@ -131,15 +144,21 @@ SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, Cl
|
|
131
144
|
GlobalSinkState &gstate_p) const {
|
132
145
|
auto &gstate = gstate_p.Cast<AsOfGlobalSinkState>();
|
133
146
|
|
147
|
+
// The data is all in so we can initialise the left partitioning.
|
148
|
+
const vector<unique_ptr<BaseStatistics>> partitions_stats;
|
149
|
+
gstate.lhs_sink = make_uniq<PartitionGlobalSinkState>(context, lhs_partitions, lhs_orders, children[0]->types,
|
150
|
+
partitions_stats, 0);
|
151
|
+
gstate.lhs_sink->SyncPartitioning(gstate.rhs_sink);
|
152
|
+
|
134
153
|
// Find the first group to sort
|
135
|
-
auto &groups = gstate.
|
154
|
+
auto &groups = gstate.rhs_sink.grouping_data->GetPartitions();
|
136
155
|
if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
|
137
156
|
// Empty input!
|
138
157
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
139
158
|
}
|
140
159
|
|
141
160
|
// Schedule all the sorts for maximum thread utilisation
|
142
|
-
auto new_event = make_shared<PartitionMergeEvent>(gstate.
|
161
|
+
auto new_event = make_shared<PartitionMergeEvent>(gstate.rhs_sink, pipeline);
|
143
162
|
event.InsertEvent(std::move(new_event));
|
144
163
|
|
145
164
|
return SinkFinalizeType::READY;
|
@@ -152,10 +171,10 @@ class AsOfGlobalState : public GlobalOperatorState {
|
|
152
171
|
public:
|
153
172
|
explicit AsOfGlobalState(AsOfGlobalSinkState &gsink) {
|
154
173
|
// for FULL/RIGHT OUTER JOIN, initialize right_outers to false for every tuple
|
155
|
-
auto &
|
174
|
+
auto &rhs_partition = gsink.rhs_sink;
|
156
175
|
auto &right_outers = gsink.right_outers;
|
157
|
-
right_outers.reserve(
|
158
|
-
for (const auto &hash_group :
|
176
|
+
right_outers.reserve(rhs_partition.hash_groups.size());
|
177
|
+
for (const auto &hash_group : rhs_partition.hash_groups) {
|
159
178
|
right_outers.emplace_back(OuterJoinMarker(gsink.is_outer));
|
160
179
|
right_outers.back().Initialize(hash_group->count);
|
161
180
|
}
|
@@ -169,79 +188,47 @@ unique_ptr<GlobalOperatorState> PhysicalAsOfJoin::GetGlobalOperatorState(ClientC
|
|
169
188
|
|
170
189
|
class AsOfLocalState : public CachingOperatorState {
|
171
190
|
public:
|
172
|
-
|
173
|
-
|
191
|
+
AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op)
|
192
|
+
: context(context), allocator(Allocator::Get(context)), op(op), lhs_executor(context),
|
193
|
+
left_outer(IsLeftOuterJoin(op.join_type)), fetch_next_left(true) {
|
194
|
+
lhs_keys.Initialize(allocator, op.join_key_types);
|
195
|
+
for (const auto &cond : op.conditions) {
|
196
|
+
lhs_executor.AddExpression(*cond.left);
|
197
|
+
}
|
174
198
|
|
175
|
-
|
199
|
+
lhs_payload.Initialize(allocator, op.children[0]->types);
|
200
|
+
lhs_sel.Initialize();
|
201
|
+
left_outer.Initialize(STANDARD_VECTOR_SIZE);
|
176
202
|
|
177
|
-
|
178
|
-
|
203
|
+
auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
|
204
|
+
lhs_partition_sink = gsink.RegisterBuffer(context);
|
205
|
+
}
|
179
206
|
|
180
|
-
|
207
|
+
bool Sink(DataChunk &input);
|
208
|
+
OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk);
|
181
209
|
|
182
210
|
ClientContext &context;
|
183
211
|
Allocator &allocator;
|
184
212
|
const PhysicalAsOfJoin &op;
|
185
|
-
BufferManager &buffer_manager;
|
186
|
-
const bool force_external;
|
187
|
-
Orders lhs_orders;
|
188
213
|
|
189
|
-
// LHS sorting
|
190
214
|
ExpressionExecutor lhs_executor;
|
191
215
|
DataChunk lhs_keys;
|
192
216
|
ValidityMask lhs_valid_mask;
|
193
217
|
SelectionVector lhs_sel;
|
194
|
-
|
195
|
-
RowLayout lhs_layout;
|
196
|
-
unique_ptr<GlobalSortState> lhs_global_state;
|
197
|
-
DataChunk lhs_sorted;
|
198
|
-
|
199
|
-
// LHS binning
|
200
|
-
Vector hash_vector;
|
201
|
-
Vector bin_vector;
|
218
|
+
DataChunk lhs_payload;
|
202
219
|
|
203
|
-
// Output
|
204
|
-
idx_t lhs_match_count;
|
205
|
-
SelectionVector lhs_matched;
|
206
220
|
OuterJoinMarker left_outer;
|
207
221
|
bool fetch_next_left;
|
208
|
-
DataChunk group_payload;
|
209
|
-
DataChunk rhs_payload;
|
210
|
-
};
|
211
|
-
|
212
|
-
AsOfLocalState::AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external)
|
213
|
-
: context(context), allocator(Allocator::Get(context)), op(op),
|
214
|
-
buffer_manager(BufferManager::GetBufferManager(context)), force_external(force_external), lhs_executor(context),
|
215
|
-
hash_vector(LogicalType::HASH), bin_vector(LogicalType::HASH), left_outer(IsLeftOuterJoin(op.join_type)),
|
216
|
-
fetch_next_left(true) {
|
217
|
-
vector<unique_ptr<BaseStatistics>> partition_stats;
|
218
|
-
Orders partitions; // Not used.
|
219
|
-
PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
|
220
|
-
partition_stats);
|
221
222
|
|
222
|
-
|
223
|
-
|
224
|
-
lhs_sorted.Initialize(allocator, lhs_layout.GetTypes());
|
225
|
-
|
226
|
-
lhs_keys.Initialize(allocator, op.join_key_types);
|
227
|
-
for (const auto &cond : op.conditions) {
|
228
|
-
lhs_executor.AddExpression(*cond.left);
|
229
|
-
}
|
230
|
-
|
231
|
-
group_payload.Initialize(allocator, op.children[1]->types);
|
232
|
-
rhs_payload.Initialize(allocator, op.children[1]->types);
|
233
|
-
|
234
|
-
lhs_matched.Initialize();
|
235
|
-
lhs_sel.Initialize();
|
236
|
-
left_outer.Initialize(STANDARD_VECTOR_SIZE);
|
237
|
-
}
|
223
|
+
optional_ptr<PartitionLocalSinkState> lhs_partition_sink;
|
224
|
+
};
|
238
225
|
|
239
|
-
|
226
|
+
bool AsOfLocalState::Sink(DataChunk &input) {
|
240
227
|
// Compute the join keys
|
241
228
|
lhs_keys.Reset();
|
242
229
|
lhs_executor.Execute(input, lhs_keys);
|
243
230
|
|
244
|
-
//
|
231
|
+
// Combine the NULLs
|
245
232
|
const auto count = input.size();
|
246
233
|
lhs_valid_mask.Reset();
|
247
234
|
for (auto col_idx : op.null_sensitive) {
|
@@ -251,17 +238,19 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
|
|
251
238
|
lhs_valid_mask.Combine(unified.validity, count);
|
252
239
|
}
|
253
240
|
|
254
|
-
// Convert the mask to a selection vector
|
255
|
-
//
|
256
|
-
lhs_valid = 0;
|
241
|
+
// Convert the mask to a selection vector
|
242
|
+
// and mark all the rows that cannot match for early return.
|
243
|
+
idx_t lhs_valid = 0;
|
257
244
|
const auto entry_count = lhs_valid_mask.EntryCount(count);
|
258
245
|
idx_t base_idx = 0;
|
246
|
+
left_outer.Reset();
|
259
247
|
for (idx_t entry_idx = 0; entry_idx < entry_count;) {
|
260
248
|
const auto validity_entry = lhs_valid_mask.GetValidityEntry(entry_idx++);
|
261
249
|
const auto next = MinValue<idx_t>(base_idx + ValidityMask::BITS_PER_VALUE, count);
|
262
250
|
if (ValidityMask::AllValid(validity_entry)) {
|
263
251
|
for (; base_idx < next; ++base_idx) {
|
264
252
|
lhs_sel.set_index(lhs_valid++, base_idx);
|
253
|
+
left_outer.SetMatch(base_idx);
|
265
254
|
}
|
266
255
|
} else if (ValidityMask::NoneValid(validity_entry)) {
|
267
256
|
base_idx = next;
|
@@ -270,120 +259,219 @@ void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
|
|
270
259
|
for (; base_idx < next; ++base_idx) {
|
271
260
|
if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) {
|
272
261
|
lhs_sel.set_index(lhs_valid++, base_idx);
|
262
|
+
left_outer.SetMatch(base_idx);
|
273
263
|
}
|
274
264
|
}
|
275
265
|
}
|
276
266
|
}
|
277
267
|
|
278
268
|
// Slice the keys to the ones we can match
|
279
|
-
|
280
|
-
|
269
|
+
lhs_payload.Reset();
|
270
|
+
if (lhs_valid == count) {
|
271
|
+
lhs_payload.Reference(input);
|
272
|
+
lhs_payload.SetCardinality(input);
|
273
|
+
} else {
|
274
|
+
lhs_payload.Slice(input, lhs_sel, lhs_valid);
|
275
|
+
lhs_payload.SetCardinality(lhs_valid);
|
276
|
+
|
277
|
+
// Flush the ones that can't match
|
278
|
+
fetch_next_left = false;
|
281
279
|
}
|
282
280
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
281
|
+
lhs_partition_sink->Sink(lhs_payload);
|
282
|
+
|
283
|
+
return false;
|
284
|
+
}
|
285
|
+
|
286
|
+
OperatorResultType AsOfLocalState::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk) {
|
287
|
+
input.Verify();
|
288
|
+
Sink(input);
|
289
|
+
|
290
|
+
// If there were any unmatchable rows, return them now so we can forget about them.
|
291
|
+
if (!fetch_next_left) {
|
292
|
+
fetch_next_left = true;
|
293
|
+
left_outer.ConstructLeftJoinResult(input, chunk);
|
294
|
+
left_outer.Reset();
|
295
|
+
}
|
296
|
+
|
297
|
+
// Just keep asking for data and buffering it
|
298
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
299
|
+
}
|
300
|
+
|
301
|
+
OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
302
|
+
GlobalOperatorState &gstate, OperatorState &lstate_p) const {
|
303
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
304
|
+
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
305
|
+
|
306
|
+
if (gsink.rhs_sink.count == 0) {
|
307
|
+
// empty RHS
|
308
|
+
if (!EmptyResultIfRHSIsEmpty()) {
|
309
|
+
ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
|
310
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
311
|
+
} else {
|
312
|
+
return OperatorResultType::FINISHED;
|
293
313
|
}
|
314
|
+
}
|
294
315
|
|
295
|
-
|
296
|
-
|
297
|
-
|
316
|
+
return lstate.ExecuteInternal(context, input, chunk);
|
317
|
+
}
|
318
|
+
|
319
|
+
//===--------------------------------------------------------------------===//
|
320
|
+
// Source
|
321
|
+
//===--------------------------------------------------------------------===//
|
322
|
+
class AsOfProbeBuffer {
|
323
|
+
public:
|
324
|
+
using Orders = vector<BoundOrderByNode>;
|
325
|
+
|
326
|
+
static bool IsExternal(ClientContext &context) {
|
327
|
+
return ClientConfig::GetConfig(context).force_external;
|
328
|
+
}
|
329
|
+
|
330
|
+
AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op);
|
331
|
+
|
332
|
+
public:
|
333
|
+
void ResolveJoin(bool *found_matches, idx_t *matches = nullptr);
|
334
|
+
bool Scanning() const {
|
335
|
+
return lhs_scanner.get();
|
298
336
|
}
|
337
|
+
void BeginLeftScan(hash_t scan_bin);
|
338
|
+
bool NextLeft();
|
339
|
+
void EndScan();
|
340
|
+
|
341
|
+
// resolve joins that output max N elements (SEMI, ANTI, MARK)
|
342
|
+
void ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk);
|
343
|
+
// resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
|
344
|
+
void ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk);
|
345
|
+
// Chunk may be empty
|
346
|
+
void GetData(ExecutionContext &context, DataChunk &chunk);
|
347
|
+
bool HasMoreData() const {
|
348
|
+
return !fetch_next_left || (lhs_scanner && lhs_scanner->Remaining());
|
349
|
+
}
|
350
|
+
|
351
|
+
ClientContext &context;
|
352
|
+
Allocator &allocator;
|
353
|
+
const PhysicalAsOfJoin &op;
|
354
|
+
BufferManager &buffer_manager;
|
355
|
+
const bool force_external;
|
356
|
+
const idx_t memory_per_thread;
|
357
|
+
Orders lhs_orders;
|
358
|
+
|
359
|
+
// LHS scanning
|
360
|
+
SelectionVector lhs_sel;
|
361
|
+
optional_ptr<PartitionGlobalHashGroup> left_hash;
|
362
|
+
OuterJoinMarker left_outer;
|
363
|
+
unique_ptr<SBIterator> left_itr;
|
364
|
+
unique_ptr<PayloadScanner> lhs_scanner;
|
365
|
+
DataChunk lhs_payload;
|
366
|
+
|
367
|
+
// RHS scanning
|
368
|
+
optional_ptr<PartitionGlobalHashGroup> right_hash;
|
369
|
+
optional_ptr<OuterJoinMarker> right_outer;
|
370
|
+
unique_ptr<SBIterator> right_itr;
|
371
|
+
unique_ptr<PayloadScanner> rhs_scanner;
|
372
|
+
DataChunk rhs_payload;
|
373
|
+
|
374
|
+
idx_t lhs_match_count;
|
375
|
+
bool fetch_next_left;
|
376
|
+
};
|
377
|
+
|
378
|
+
AsOfProbeBuffer::AsOfProbeBuffer(ClientContext &context, const PhysicalAsOfJoin &op)
|
379
|
+
: context(context), allocator(Allocator::Get(context)), op(op),
|
380
|
+
buffer_manager(BufferManager::GetBufferManager(context)), force_external(IsExternal(context)),
|
381
|
+
memory_per_thread(op.GetMaxThreadMemory(context)), left_outer(IsLeftOuterJoin(op.join_type)),
|
382
|
+
fetch_next_left(true) {
|
383
|
+
vector<unique_ptr<BaseStatistics>> partition_stats;
|
384
|
+
Orders partitions; // Not used.
|
385
|
+
PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
|
386
|
+
partition_stats);
|
387
|
+
|
388
|
+
// We sort the row numbers of the incoming block, not the rows
|
389
|
+
lhs_payload.Initialize(allocator, op.children[0]->types);
|
390
|
+
rhs_payload.Initialize(allocator, op.children[1]->types);
|
391
|
+
|
392
|
+
lhs_sel.Initialize();
|
393
|
+
left_outer.Initialize(STANDARD_VECTOR_SIZE);
|
394
|
+
}
|
299
395
|
|
300
|
-
|
301
|
-
|
302
|
-
auto &
|
303
|
-
|
304
|
-
|
396
|
+
void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
|
397
|
+
auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
|
398
|
+
auto &lhs_sink = *gsink.lhs_sink;
|
399
|
+
const auto left_group = lhs_sink.bin_groups[scan_bin];
|
400
|
+
if (left_group >= lhs_sink.bin_groups.size()) {
|
401
|
+
return;
|
402
|
+
}
|
305
403
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
404
|
+
left_hash = lhs_sink.hash_groups[left_group].get();
|
405
|
+
auto &left_sort = *(left_hash->global_sort);
|
406
|
+
lhs_scanner = make_uniq<PayloadScanner>(left_sort, false);
|
407
|
+
left_itr = make_uniq<SBIterator>(left_sort, ExpressionType::COMPARE_LESSTHANOREQUALTO);
|
408
|
+
|
409
|
+
// We are only probing the corresponding right side bin, which may be empty
|
410
|
+
// If they are empty, we leave the iterator as null so we can emit left matches
|
411
|
+
auto &rhs_sink = gsink.rhs_sink;
|
412
|
+
const auto right_group = rhs_sink.bin_groups[scan_bin];
|
413
|
+
if (right_group < rhs_sink.bin_groups.size()) {
|
414
|
+
right_hash = rhs_sink.hash_groups[right_group].get();
|
415
|
+
right_outer = gsink.right_outers.data() + right_group;
|
416
|
+
auto &right_sort = *(right_hash->global_sort);
|
417
|
+
right_itr = make_uniq<SBIterator>(right_sort, ExpressionType::COMPARE_LESSTHANOREQUALTO);
|
418
|
+
rhs_scanner = make_uniq<PayloadScanner>(right_sort, false);
|
419
|
+
}
|
420
|
+
}
|
311
421
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
global_state.PrepareMergePhase();
|
316
|
-
while (global_state.sorted_blocks.size() > 1) {
|
317
|
-
MergeSorter merge_sorter(*lhs_global_state, buffer_manager);
|
318
|
-
merge_sorter.PerformInMergeRound();
|
319
|
-
global_state.CompleteMergeRound();
|
422
|
+
bool AsOfProbeBuffer::NextLeft() {
|
423
|
+
if (!HasMoreData()) {
|
424
|
+
return false;
|
320
425
|
}
|
321
426
|
|
322
|
-
//
|
323
|
-
|
427
|
+
// Scan the next sorted chunk
|
428
|
+
lhs_payload.Reset();
|
429
|
+
left_itr->SetIndex(lhs_scanner->Scanned());
|
430
|
+
lhs_scanner->Scan(lhs_payload);
|
324
431
|
|
325
|
-
|
326
|
-
lhs_sorted.Reset();
|
327
|
-
scanner->Scan(lhs_sorted);
|
432
|
+
return true;
|
328
433
|
}
|
329
434
|
|
330
|
-
void
|
331
|
-
|
332
|
-
|
435
|
+
void AsOfProbeBuffer::EndScan() {
|
436
|
+
right_hash = nullptr;
|
437
|
+
right_itr.reset();
|
438
|
+
rhs_scanner.reset();
|
439
|
+
right_outer = nullptr;
|
333
440
|
|
334
|
-
|
335
|
-
|
441
|
+
left_hash = nullptr;
|
442
|
+
left_itr.reset();
|
443
|
+
lhs_scanner.reset();
|
444
|
+
}
|
336
445
|
|
337
|
-
|
338
|
-
//
|
339
|
-
|
340
|
-
|
341
|
-
|
446
|
+
void AsOfProbeBuffer::ResolveJoin(bool *found_match, idx_t *matches) {
|
447
|
+
// If there was no right partition, there are no matches
|
448
|
+
lhs_match_count = 0;
|
449
|
+
left_outer.Reset();
|
450
|
+
if (!right_itr) {
|
451
|
+
return;
|
452
|
+
}
|
342
453
|
|
343
|
-
|
344
|
-
|
345
|
-
optional_ptr<OuterJoinMarker> right_outer;
|
454
|
+
const auto count = lhs_payload.size();
|
455
|
+
const auto left_base = left_itr->GetIndex();
|
346
456
|
// Searching for right <= left
|
347
|
-
|
348
|
-
|
349
|
-
lhs_match_count = 0;
|
350
|
-
const auto sorted_sel = FlatVector::GetData<sel_t>(lhs_sorted.data[0]);
|
351
|
-
for (idx_t i = 0; i < lhs_valid; ++i) {
|
352
|
-
// idx is the index in the input; i is the index in the sorted keys
|
353
|
-
const auto idx = sorted_sel[i];
|
354
|
-
const auto curr_bin = bins[bin_unified.sel->get_index(idx)];
|
355
|
-
if (!hash_group || curr_bin != prev_bin) {
|
356
|
-
// Grab the next group
|
357
|
-
prev_bin = curr_bin;
|
358
|
-
const auto group_idx = global_partition.bin_groups[curr_bin];
|
359
|
-
if (group_idx >= global_partition.hash_groups.size()) {
|
360
|
-
// No matching partition
|
361
|
-
hash_group = nullptr;
|
362
|
-
right_outer = nullptr;
|
363
|
-
right.reset();
|
364
|
-
continue;
|
365
|
-
}
|
366
|
-
hash_group = global_partition.hash_groups[group_idx].get();
|
367
|
-
right_outer = gsink.right_outers.data() + group_idx;
|
368
|
-
right = make_uniq<SBIterator>(*(hash_group->global_sort), ExpressionType::COMPARE_LESSTHANOREQUALTO);
|
369
|
-
}
|
370
|
-
left.SetIndex(i);
|
457
|
+
for (idx_t i = 0; i < count; ++i) {
|
458
|
+
left_itr->SetIndex(left_base + i);
|
371
459
|
|
372
460
|
// If right > left, then there is no match
|
373
|
-
if (!
|
461
|
+
if (!right_itr->Compare(*left_itr)) {
|
374
462
|
continue;
|
375
463
|
}
|
376
464
|
|
377
465
|
// Exponential search forward for a non-matching value using radix iterators
|
378
466
|
// (We use exponential search to avoid thrashing the block manager on large probes)
|
379
467
|
idx_t bound = 1;
|
380
|
-
idx_t begin =
|
381
|
-
|
382
|
-
while (
|
383
|
-
if (
|
468
|
+
idx_t begin = right_itr->GetIndex();
|
469
|
+
right_itr->SetIndex(begin + bound);
|
470
|
+
while (right_itr->GetIndex() < right_hash->count) {
|
471
|
+
if (right_itr->Compare(*left_itr)) {
|
384
472
|
// If right <= left, jump ahead
|
385
473
|
bound *= 2;
|
386
|
-
|
474
|
+
right_itr->SetIndex(begin + bound);
|
387
475
|
} else {
|
388
476
|
break;
|
389
477
|
}
|
@@ -392,255 +480,298 @@ void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<
|
|
392
480
|
// Binary search for the first non-matching value using radix iterators
|
393
481
|
// The previous value (which we know exists) is the match
|
394
482
|
auto first = begin + bound / 2;
|
395
|
-
auto last = MinValue<idx_t>(begin + bound,
|
483
|
+
auto last = MinValue<idx_t>(begin + bound, right_hash->count);
|
396
484
|
while (first < last) {
|
397
485
|
const auto mid = first + (last - first) / 2;
|
398
|
-
|
399
|
-
if (
|
486
|
+
right_itr->SetIndex(mid);
|
487
|
+
if (right_itr->Compare(*left_itr)) {
|
400
488
|
// If right <= left, new lower bound
|
401
489
|
first = mid + 1;
|
402
490
|
} else {
|
403
491
|
last = mid;
|
404
492
|
}
|
405
493
|
}
|
406
|
-
|
494
|
+
right_itr->SetIndex(--first);
|
407
495
|
|
408
496
|
// Check partitions for strict equality
|
409
|
-
if (
|
497
|
+
if (right_hash->ComparePartitions(*left_itr, *right_itr)) {
|
410
498
|
continue;
|
411
499
|
}
|
412
500
|
|
413
501
|
// Emit match data
|
414
502
|
right_outer->SetMatch(first);
|
415
|
-
left_outer.SetMatch(
|
503
|
+
left_outer.SetMatch(i);
|
416
504
|
if (found_match) {
|
417
|
-
found_match[
|
505
|
+
found_match[i] = true;
|
418
506
|
}
|
419
507
|
if (matches) {
|
420
|
-
matches[
|
508
|
+
matches[i] = first;
|
421
509
|
}
|
422
|
-
|
510
|
+
lhs_sel.set_index(lhs_match_count++, i);
|
423
511
|
}
|
424
512
|
}
|
425
513
|
|
426
514
|
unique_ptr<OperatorState> PhysicalAsOfJoin::GetOperatorState(ExecutionContext &context) const {
|
427
|
-
|
428
|
-
return make_uniq<AsOfLocalState>(context.client, *this, config.force_external);
|
515
|
+
return make_uniq<AsOfLocalState>(context.client, *this);
|
429
516
|
}
|
430
517
|
|
431
|
-
void
|
432
|
-
OperatorState &lstate_p) const {
|
433
|
-
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
434
|
-
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
435
|
-
|
518
|
+
void AsOfProbeBuffer::ResolveSimpleJoin(ExecutionContext &context, DataChunk &chunk) {
|
436
519
|
// perform the actual join
|
437
520
|
bool found_match[STANDARD_VECTOR_SIZE] = {false};
|
438
|
-
|
521
|
+
ResolveJoin(found_match);
|
439
522
|
|
440
523
|
// now construct the result based on the join result
|
441
|
-
switch (join_type) {
|
442
|
-
case JoinType::MARK: {
|
443
|
-
PhysicalJoin::ConstructMarkJoinResult(lstate.lhs_keys, input, chunk, found_match, gsink.has_null);
|
444
|
-
break;
|
445
|
-
}
|
524
|
+
switch (op.join_type) {
|
446
525
|
case JoinType::SEMI:
|
447
|
-
PhysicalJoin::ConstructSemiJoinResult(
|
526
|
+
PhysicalJoin::ConstructSemiJoinResult(lhs_payload, chunk, found_match);
|
448
527
|
break;
|
449
528
|
case JoinType::ANTI:
|
450
|
-
PhysicalJoin::ConstructAntiJoinResult(
|
529
|
+
PhysicalJoin::ConstructAntiJoinResult(lhs_payload, chunk, found_match);
|
451
530
|
break;
|
452
531
|
default:
|
453
532
|
throw NotImplementedException("Unimplemented join type for AsOf join");
|
454
533
|
}
|
455
534
|
}
|
456
535
|
|
457
|
-
|
458
|
-
OperatorState &lstate_p) const {
|
459
|
-
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
460
|
-
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
461
|
-
|
462
|
-
if (!lstate.fetch_next_left) {
|
463
|
-
lstate.fetch_next_left = true;
|
464
|
-
if (lstate.left_outer.Enabled()) {
|
465
|
-
// left join: before we move to the next chunk, see if we need to output any vectors that didn't
|
466
|
-
// have a match found
|
467
|
-
lstate.left_outer.ConstructLeftJoinResult(input, chunk);
|
468
|
-
lstate.left_outer.Reset();
|
469
|
-
}
|
470
|
-
return OperatorResultType::NEED_MORE_INPUT;
|
471
|
-
}
|
472
|
-
|
536
|
+
void AsOfProbeBuffer::ResolveComplexJoin(ExecutionContext &context, DataChunk &chunk) {
|
473
537
|
// perform the actual join
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
hash_t scan_bin = global_partition.bin_groups.size();
|
481
|
-
optional_ptr<PartitionGlobalHashGroup> hash_group;
|
482
|
-
unique_ptr<PayloadScanner> scanner;
|
483
|
-
for (idx_t i = 0; i < lstate.lhs_match_count; ++i) {
|
484
|
-
const auto idx = lstate.lhs_matched[i];
|
485
|
-
const auto match_bin = matches[idx].first;
|
486
|
-
const auto match_pos = matches[idx].second;
|
487
|
-
if (match_bin != scan_bin) {
|
488
|
-
// Grab the next group
|
489
|
-
const auto group_idx = global_partition.bin_groups[match_bin];
|
490
|
-
hash_group = global_partition.hash_groups[group_idx].get();
|
491
|
-
scan_bin = match_bin;
|
492
|
-
scanner = make_uniq<PayloadScanner>(*hash_group->global_sort, false);
|
493
|
-
lstate.group_payload.Reset();
|
494
|
-
}
|
538
|
+
idx_t matches[STANDARD_VECTOR_SIZE];
|
539
|
+
ResolveJoin(nullptr, matches);
|
540
|
+
|
541
|
+
for (idx_t i = 0; i < lhs_match_count; ++i) {
|
542
|
+
const auto idx = lhs_sel[i];
|
543
|
+
const auto match_pos = matches[idx];
|
495
544
|
// Skip to the range containing the match
|
496
|
-
while (match_pos >=
|
497
|
-
|
498
|
-
|
545
|
+
while (match_pos >= rhs_scanner->Scanned()) {
|
546
|
+
rhs_payload.Reset();
|
547
|
+
rhs_scanner->Scan(rhs_payload);
|
499
548
|
}
|
500
549
|
// Append the individual values
|
501
550
|
// TODO: Batch the copies
|
502
|
-
const auto source_offset = match_pos - (
|
503
|
-
for (
|
504
|
-
const auto rhs_idx = right_projection_map[col_idx];
|
505
|
-
auto &source =
|
506
|
-
auto &target = chunk.data[
|
551
|
+
const auto source_offset = match_pos - (rhs_scanner->Scanned() - rhs_payload.size());
|
552
|
+
for (column_t col_idx = 0; col_idx < op.right_projection_map.size(); ++col_idx) {
|
553
|
+
const auto rhs_idx = op.right_projection_map[col_idx];
|
554
|
+
auto &source = rhs_payload.data[rhs_idx];
|
555
|
+
auto &target = chunk.data[lhs_payload.ColumnCount() + col_idx];
|
507
556
|
VectorOperations::Copy(source, target, source_offset + 1, source_offset, i);
|
508
557
|
}
|
509
558
|
}
|
510
559
|
|
511
|
-
// Slice the
|
512
|
-
|
513
|
-
|
514
|
-
// If we are doing a left join, come back for the NULLs
|
515
|
-
if (lstate.left_outer.Enabled()) {
|
516
|
-
lstate.fetch_next_left = false;
|
517
|
-
return OperatorResultType::HAVE_MORE_OUTPUT;
|
560
|
+
// Slice the left payload into the result
|
561
|
+
for (column_t i = 0; i < lhs_payload.ColumnCount(); ++i) {
|
562
|
+
chunk.data[i].Slice(lhs_payload.data[i], lhs_sel, lhs_match_count);
|
518
563
|
}
|
564
|
+
chunk.SetCardinality(lhs_match_count);
|
519
565
|
|
520
|
-
|
566
|
+
// If we are doing a left join, come back for the NULLs
|
567
|
+
fetch_next_left = !left_outer.Enabled();
|
521
568
|
}
|
522
569
|
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
} else {
|
533
|
-
return OperatorResultType::FINISHED;
|
570
|
+
void AsOfProbeBuffer::GetData(ExecutionContext &context, DataChunk &chunk) {
|
571
|
+
// Handle dangling left join results from current chunk
|
572
|
+
if (!fetch_next_left) {
|
573
|
+
fetch_next_left = true;
|
574
|
+
if (left_outer.Enabled()) {
|
575
|
+
// left join: before we move to the next chunk, see if we need to output any vectors that didn't
|
576
|
+
// have a match found
|
577
|
+
left_outer.ConstructLeftJoinResult(lhs_payload, chunk);
|
578
|
+
left_outer.Reset();
|
534
579
|
}
|
580
|
+
return;
|
535
581
|
}
|
536
582
|
|
537
|
-
|
538
|
-
|
583
|
+
// Stop if there is no more data
|
584
|
+
if (!NextLeft()) {
|
585
|
+
return;
|
586
|
+
}
|
587
|
+
|
588
|
+
switch (op.join_type) {
|
539
589
|
case JoinType::SEMI:
|
540
590
|
case JoinType::ANTI:
|
541
591
|
case JoinType::MARK:
|
542
592
|
// simple joins can have max STANDARD_VECTOR_SIZE matches per chunk
|
543
|
-
ResolveSimpleJoin(context,
|
544
|
-
|
593
|
+
ResolveSimpleJoin(context, chunk);
|
594
|
+
break;
|
545
595
|
case JoinType::LEFT:
|
546
596
|
case JoinType::INNER:
|
547
597
|
case JoinType::RIGHT:
|
548
598
|
case JoinType::OUTER:
|
549
|
-
|
599
|
+
ResolveComplexJoin(context, chunk);
|
600
|
+
break;
|
550
601
|
default:
|
551
602
|
throw NotImplementedException("Unimplemented type for as-of join!");
|
552
603
|
}
|
553
604
|
}
|
554
605
|
|
555
|
-
//===--------------------------------------------------------------------===//
|
556
|
-
// Source
|
557
|
-
//===--------------------------------------------------------------------===//
|
558
606
|
class AsOfGlobalSourceState : public GlobalSourceState {
|
559
607
|
public:
|
560
|
-
explicit AsOfGlobalSourceState(
|
608
|
+
explicit AsOfGlobalSourceState(AsOfGlobalSinkState &gsink_p)
|
609
|
+
: gsink(gsink_p), next_combine(0), combined(0), merged(0), mergers(0), next_left(0), flushed(0), next_right(0) {
|
561
610
|
}
|
562
611
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
public:
|
568
|
-
idx_t MaxThreads() override {
|
569
|
-
// If there is only one partition, we have to process it on one thread.
|
570
|
-
if (!gsink.grouping_data) {
|
571
|
-
return 1;
|
612
|
+
PartitionGlobalMergeStates &GetMergeStates() {
|
613
|
+
lock_guard<mutex> guard(lock);
|
614
|
+
if (!merge_states) {
|
615
|
+
merge_states = make_uniq<PartitionGlobalMergeStates>(*gsink.lhs_sink);
|
572
616
|
}
|
617
|
+
return *merge_states;
|
618
|
+
}
|
573
619
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
620
|
+
AsOfGlobalSinkState &gsink;
|
621
|
+
//! The next buffer to combine
|
622
|
+
atomic<size_t> next_combine;
|
623
|
+
//! The number of combined buffers
|
624
|
+
atomic<size_t> combined;
|
625
|
+
//! The number of combined buffers
|
626
|
+
atomic<size_t> merged;
|
627
|
+
//! The number of combined buffers
|
628
|
+
atomic<size_t> mergers;
|
629
|
+
//! The next buffer to flush
|
630
|
+
atomic<size_t> next_left;
|
631
|
+
//! The number of flushed buffers
|
632
|
+
atomic<size_t> flushed;
|
633
|
+
//! The right outer output read position.
|
634
|
+
atomic<idx_t> next_right;
|
635
|
+
//! The merge handler
|
636
|
+
mutex lock;
|
637
|
+
unique_ptr<PartitionGlobalMergeStates> merge_states;
|
578
638
|
|
579
|
-
|
639
|
+
public:
|
640
|
+
idx_t MaxThreads() override {
|
641
|
+
return gsink.lhs_buffers.size();
|
580
642
|
}
|
581
643
|
};
|
582
644
|
|
583
645
|
unique_ptr<GlobalSourceState> PhysicalAsOfJoin::GetGlobalSourceState(ClientContext &context) const {
|
584
646
|
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
585
|
-
return make_uniq<AsOfGlobalSourceState>(gsink
|
647
|
+
return make_uniq<AsOfGlobalSourceState>(gsink);
|
586
648
|
}
|
587
649
|
|
588
650
|
class AsOfLocalSourceState : public LocalSourceState {
|
589
651
|
public:
|
590
652
|
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
591
653
|
|
592
|
-
|
654
|
+
AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op);
|
655
|
+
|
656
|
+
void CombineLeftPartitions();
|
657
|
+
void MergeLeftPartitions();
|
658
|
+
|
659
|
+
idx_t BeginRightScan(const idx_t hash_bin);
|
593
660
|
|
594
|
-
|
661
|
+
AsOfGlobalSourceState &gsource;
|
595
662
|
|
596
|
-
|
663
|
+
//! The left side partition being probed
|
664
|
+
AsOfProbeBuffer probe_buffer;
|
597
665
|
|
598
666
|
//! The read partition
|
599
667
|
idx_t hash_bin;
|
600
668
|
HashGroupPtr hash_group;
|
601
|
-
|
602
669
|
//! The read cursor
|
603
670
|
unique_ptr<PayloadScanner> scanner;
|
604
|
-
//! Buffer for the inputs
|
605
|
-
DataChunk input_chunk;
|
606
671
|
//! Pointer to the matches
|
607
|
-
const bool *found_match;
|
672
|
+
const bool *found_match = {};
|
608
673
|
};
|
609
674
|
|
610
|
-
AsOfLocalSourceState::AsOfLocalSourceState(
|
611
|
-
|
675
|
+
AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op)
|
676
|
+
: gsource(gsource), probe_buffer(gsource.gsink.lhs_sink->context, op) {
|
677
|
+
gsource.mergers++;
|
678
|
+
}
|
679
|
+
|
680
|
+
void AsOfLocalSourceState::CombineLeftPartitions() {
|
681
|
+
const auto buffer_count = gsource.gsink.lhs_buffers.size();
|
682
|
+
while (gsource.combined < buffer_count) {
|
683
|
+
const auto next_combine = gsource.next_combine++;
|
684
|
+
if (next_combine < buffer_count) {
|
685
|
+
gsource.gsink.lhs_buffers[next_combine]->Combine();
|
686
|
+
++gsource.combined;
|
687
|
+
} else {
|
688
|
+
std::this_thread::yield();
|
689
|
+
}
|
690
|
+
}
|
691
|
+
}
|
692
|
+
|
693
|
+
void AsOfLocalSourceState::MergeLeftPartitions() {
|
694
|
+
PartitionGlobalMergeStates::Callback local_callback;
|
695
|
+
PartitionLocalMergeState local_merge;
|
696
|
+
gsource.GetMergeStates().ExecuteTask(local_merge, local_callback);
|
697
|
+
gsource.merged++;
|
698
|
+
while (gsource.merged < gsource.mergers) {
|
699
|
+
std::this_thread::yield();
|
700
|
+
}
|
612
701
|
}
|
613
702
|
|
614
|
-
idx_t AsOfLocalSourceState::
|
615
|
-
// Get rid of any stale data
|
703
|
+
idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
|
616
704
|
hash_bin = hash_bin_p;
|
617
705
|
|
618
|
-
hash_group = std::move(
|
706
|
+
hash_group = std::move(gsource.gsink.rhs_sink.hash_groups[hash_bin]);
|
619
707
|
scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
|
620
|
-
found_match =
|
708
|
+
found_match = gsource.gsink.right_outers[hash_bin].GetMatches();
|
621
709
|
|
622
710
|
return scanner->Remaining();
|
623
711
|
}
|
624
712
|
|
625
713
|
unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
|
626
714
|
GlobalSourceState &gstate) const {
|
627
|
-
auto &
|
628
|
-
return make_uniq<AsOfLocalSourceState>(
|
715
|
+
auto &gsource = gstate.Cast<AsOfGlobalSourceState>();
|
716
|
+
return make_uniq<AsOfLocalSourceState>(gsource, *this);
|
629
717
|
}
|
630
718
|
|
631
719
|
SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &chunk,
|
632
720
|
OperatorSourceInput &input) const {
|
633
|
-
D_ASSERT(IsRightOuterJoin(join_type));
|
634
|
-
|
635
721
|
auto &gsource = input.global_state.Cast<AsOfGlobalSourceState>();
|
636
722
|
auto &lsource = input.local_state.Cast<AsOfLocalSourceState>();
|
637
|
-
auto &
|
723
|
+
auto &rhs_sink = gsource.gsink.rhs_sink;
|
724
|
+
|
725
|
+
// Step 1: Combine the partitions
|
726
|
+
lsource.CombineLeftPartitions();
|
727
|
+
|
728
|
+
// Step 2: Sort on all threads
|
729
|
+
lsource.MergeLeftPartitions();
|
730
|
+
|
731
|
+
// Step 3: Join the partitions
|
732
|
+
auto &lhs_sink = *gsource.gsink.lhs_sink;
|
733
|
+
auto &partitions = lhs_sink.grouping_data->GetPartitions();
|
734
|
+
const auto left_bins = partitions.size();
|
735
|
+
while (gsource.flushed < left_bins) {
|
736
|
+
// Make sure we have something to flush
|
737
|
+
if (!lsource.probe_buffer.Scanning()) {
|
738
|
+
const auto left_bin = gsource.next_left++;
|
739
|
+
if (left_bin < left_bins) {
|
740
|
+
// More to flush
|
741
|
+
lsource.probe_buffer.BeginLeftScan(left_bin);
|
742
|
+
} else if (!IsRightOuterJoin(join_type)) {
|
743
|
+
return SourceResultType::FINISHED;
|
744
|
+
} else {
|
745
|
+
// Wait for all threads to finish
|
746
|
+
// TODO: How to implement a spin wait correctly?
|
747
|
+
// Returning BLOCKED seems to hang the system.
|
748
|
+
std::this_thread::yield();
|
749
|
+
continue;
|
750
|
+
}
|
751
|
+
}
|
752
|
+
|
753
|
+
lsource.probe_buffer.GetData(context, chunk);
|
754
|
+
if (chunk.size()) {
|
755
|
+
return SourceResultType::HAVE_MORE_OUTPUT;
|
756
|
+
} else if (lsource.probe_buffer.HasMoreData()) {
|
757
|
+
// Join the next partition
|
758
|
+
continue;
|
759
|
+
} else {
|
760
|
+
lsource.probe_buffer.EndScan();
|
761
|
+
gsource.flushed++;
|
762
|
+
}
|
763
|
+
}
|
764
|
+
|
765
|
+
// Step 4: Emit right join matches
|
766
|
+
if (!IsRightOuterJoin(join_type)) {
|
767
|
+
return SourceResultType::FINISHED;
|
768
|
+
}
|
638
769
|
|
639
|
-
auto &hash_groups =
|
640
|
-
const auto
|
770
|
+
auto &hash_groups = rhs_sink.hash_groups;
|
771
|
+
const auto right_groups = hash_groups.size();
|
641
772
|
|
642
773
|
DataChunk rhs_chunk;
|
643
|
-
rhs_chunk.Initialize(Allocator::Get(context.client),
|
774
|
+
rhs_chunk.Initialize(Allocator::Get(context.client), rhs_sink.payload_types);
|
644
775
|
SelectionVector rsel(STANDARD_VECTOR_SIZE);
|
645
776
|
|
646
777
|
while (chunk.size() == 0) {
|
@@ -648,17 +779,17 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
|
|
648
779
|
while (!lsource.scanner || !lsource.scanner->Remaining()) {
|
649
780
|
lsource.scanner.reset();
|
650
781
|
lsource.hash_group.reset();
|
651
|
-
auto hash_bin = gsource.
|
652
|
-
if (hash_bin >=
|
782
|
+
auto hash_bin = gsource.next_right++;
|
783
|
+
if (hash_bin >= right_groups) {
|
653
784
|
return SourceResultType::FINISHED;
|
654
785
|
}
|
655
786
|
|
656
|
-
for (; hash_bin < hash_groups.size(); hash_bin = gsource.
|
787
|
+
for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_right++) {
|
657
788
|
if (hash_groups[hash_bin]) {
|
658
789
|
break;
|
659
790
|
}
|
660
791
|
}
|
661
|
-
lsource.
|
792
|
+
lsource.BeginRightScan(hash_bin);
|
662
793
|
}
|
663
794
|
const auto rhs_position = lsource.scanner->Scanned();
|
664
795
|
lsource.scanner->Scan(rhs_chunk);
|