duckdb 1.0.1-dev22.0 → 1.0.1-dev27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +1 -1
- package/binding.gyp +41 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +4 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +6 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +13 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +6 -6
- package/src/duckdb/extension/icu/icu-table-range.cpp +92 -73
- package/src/duckdb/extension/icu/icu-timebucket.cpp +12 -2
- package/src/duckdb/extension/icu/icu-timezone.cpp +3 -3
- package/src/duckdb/extension/icu/icu_extension.cpp +61 -9
- package/src/duckdb/extension/json/include/json_executors.hpp +20 -23
- package/src/duckdb/extension/json/include/json_functions.hpp +4 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +6 -2
- package/src/duckdb/extension/json/include/json_structure.hpp +12 -9
- package/src/duckdb/extension/json/json_common.cpp +66 -10
- package/src/duckdb/extension/json/json_extension.cpp +13 -5
- package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -4
- package/src/duckdb/extension/json/json_functions/json_exists.cpp +32 -0
- package/src/duckdb/extension/json/json_functions/json_extract.cpp +2 -2
- package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_pretty.cpp +32 -0
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +5 -1
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +305 -94
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_type.cpp +3 -3
- package/src/duckdb/extension/json/json_functions/json_value.cpp +42 -0
- package/src/duckdb/extension/json/json_functions/read_json.cpp +16 -2
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -2
- package/src/duckdb/extension/json/json_functions.cpp +5 -1
- package/src/duckdb/extension/json/json_scan.cpp +13 -12
- package/src/duckdb/extension/json/serialize_json.cpp +5 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +206 -43
- package/src/duckdb/extension/parquet/column_writer.cpp +133 -62
- package/src/duckdb/extension/parquet/geo_parquet.cpp +391 -0
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +16 -5
- package/src/duckdb/extension/parquet/include/column_reader.hpp +37 -12
- package/src/duckdb/extension/parquet/include/column_writer.hpp +10 -11
- package/src/duckdb/extension/parquet/include/expression_column_reader.hpp +52 -0
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +139 -0
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +13 -8
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +7 -3
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +55 -8
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +8 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -7
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +33 -11
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +5 -2
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +48 -14
- package/src/duckdb/extension/parquet/parquet_crypto.cpp +109 -61
- package/src/duckdb/extension/parquet/parquet_extension.cpp +305 -72
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -4
- package/src/duckdb/extension/parquet/parquet_reader.cpp +151 -40
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +50 -16
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +42 -1
- package/src/duckdb/extension/parquet/parquet_writer.cpp +67 -75
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +3 -1
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +5 -1
- package/src/duckdb/src/catalog/catalog.cpp +14 -16
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +14 -11
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +39 -19
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +92 -78
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +10 -2
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +10 -3
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +7 -7
- package/src/duckdb/src/catalog/catalog_entry.cpp +6 -3
- package/src/duckdb/src/catalog/catalog_set.cpp +14 -19
- package/src/duckdb/src/catalog/default/default_functions.cpp +179 -166
- package/src/duckdb/src/catalog/default/default_generator.cpp +24 -0
- package/src/duckdb/src/catalog/default/default_schemas.cpp +4 -3
- package/src/duckdb/src/catalog/default/default_table_functions.cpp +148 -0
- package/src/duckdb/src/catalog/default/default_views.cpp +7 -3
- package/src/duckdb/src/catalog/duck_catalog.cpp +7 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +120 -58
- package/src/duckdb/src/common/allocator.cpp +71 -6
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +8 -7
- package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +1 -1
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +4 -5
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +55 -21
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +85 -10
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +142 -0
- package/src/duckdb/src/common/arrow/arrow_query_result.cpp +56 -0
- package/src/duckdb/src/common/arrow/physical_arrow_batch_collector.cpp +37 -0
- package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +128 -0
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +101 -0
- package/src/duckdb/src/common/cgroups.cpp +189 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +6 -3
- package/src/duckdb/src/common/encryption_state.cpp +38 -0
- package/src/duckdb/src/common/enum_util.cpp +682 -14
- package/src/duckdb/src/common/enums/file_compression_type.cpp +24 -0
- package/src/duckdb/src/common/enums/metric_type.cpp +208 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +8 -2
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/relation_type.cpp +4 -0
- package/src/duckdb/src/common/enums/statement_type.cpp +15 -0
- package/src/duckdb/src/common/error_data.cpp +22 -20
- package/src/duckdb/src/common/exception/binder_exception.cpp +5 -0
- package/src/duckdb/src/common/exception.cpp +11 -1
- package/src/duckdb/src/common/extra_type_info.cpp +3 -0
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/file_system.cpp +25 -3
- package/src/duckdb/src/common/filename_pattern.cpp +1 -0
- package/src/duckdb/src/common/fsst.cpp +15 -14
- package/src/duckdb/src/common/gzip_file_system.cpp +3 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +103 -43
- package/src/duckdb/src/common/http_util.cpp +25 -0
- package/src/duckdb/src/common/local_file_system.cpp +48 -27
- package/src/duckdb/src/common/multi_file_list.cpp +113 -22
- package/src/duckdb/src/common/multi_file_reader.cpp +59 -58
- package/src/duckdb/src/common/operator/cast_operators.cpp +133 -34
- package/src/duckdb/src/common/operator/string_cast.cpp +42 -11
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +2 -2
- package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +1 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +31 -21
- package/src/duckdb/src/common/random_engine.cpp +4 -0
- package/src/duckdb/src/common/re2_regex.cpp +47 -12
- package/src/duckdb/src/common/render_tree.cpp +243 -0
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +58 -5
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +79 -43
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +6 -4
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +18 -9
- package/src/duckdb/src/common/serializer/memory_stream.cpp +1 -0
- package/src/duckdb/src/common/sort/partition_state.cpp +33 -18
- package/src/duckdb/src/common/sort/radix_sort.cpp +22 -15
- package/src/duckdb/src/common/sort/sort_state.cpp +19 -16
- package/src/duckdb/src/common/sort/sorted_block.cpp +11 -10
- package/src/duckdb/src/common/string_util.cpp +167 -10
- package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +108 -0
- package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +267 -0
- package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +116 -0
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +482 -0
- package/src/duckdb/src/common/tree_renderer/tree_renderer.cpp +12 -0
- package/src/duckdb/src/common/tree_renderer.cpp +16 -508
- package/src/duckdb/src/common/types/batched_data_collection.cpp +78 -9
- package/src/duckdb/src/common/types/bit.cpp +24 -22
- package/src/duckdb/src/common/types/blob.cpp +15 -11
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -9
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +3 -4
- package/src/duckdb/src/common/types/column/column_data_consumer.cpp +2 -2
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +70 -21
- package/src/duckdb/src/common/types/data_chunk.cpp +10 -1
- package/src/duckdb/src/common/types/date.cpp +8 -19
- package/src/duckdb/src/common/types/decimal.cpp +3 -2
- package/src/duckdb/src/common/types/hugeint.cpp +11 -3
- package/src/duckdb/src/common/types/hyperloglog.cpp +212 -227
- package/src/duckdb/src/common/types/interval.cpp +1 -1
- package/src/duckdb/src/common/types/list_segment.cpp +83 -49
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +22 -83
- package/src/duckdb/src/common/types/row/row_data_collection.cpp +2 -2
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +20 -4
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +28 -7
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +29 -14
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +152 -102
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +4 -1
- package/src/duckdb/src/common/types/selection_vector.cpp +17 -1
- package/src/duckdb/src/common/types/time.cpp +62 -31
- package/src/duckdb/src/common/types/timestamp.cpp +70 -12
- package/src/duckdb/src/common/types/uuid.cpp +1 -1
- package/src/duckdb/src/common/types/validity_mask.cpp +40 -5
- package/src/duckdb/src/common/types/value.cpp +50 -8
- package/src/duckdb/src/common/types/varint.cpp +295 -0
- package/src/duckdb/src/common/types/vector.cpp +165 -54
- package/src/duckdb/src/common/types/vector_buffer.cpp +5 -4
- package/src/duckdb/src/common/types.cpp +106 -26
- package/src/duckdb/src/common/vector_operations/vector_copy.cpp +13 -25
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +6 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +3 -3
- package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +35 -82
- package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +283 -46
- package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +3 -2
- package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +226 -338
- package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +388 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +63 -21
- package/src/duckdb/src/core_functions/aggregate/holistic/mad.cpp +330 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +136 -97
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +601 -1485
- package/src/duckdb/src/core_functions/aggregate/nested/binned_histogram.cpp +405 -0
- package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +136 -165
- package/src/duckdb/src/core_functions/function_list.cpp +35 -8
- package/src/duckdb/src/core_functions/lambda_functions.cpp +5 -7
- package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +172 -198
- package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +341 -54
- package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +89 -29
- package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +133 -71
- package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/generic/can_implicitly_cast.cpp +40 -0
- package/src/duckdb/src/core_functions/scalar/generic/error.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/generic/least.cpp +161 -58
- package/src/duckdb/src/core_functions/scalar/generic/typeof.cpp +13 -0
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +59 -75
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +93 -40
- package/src/duckdb/src/core_functions/scalar/list/list_has_any_or_all.cpp +227 -0
- package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +20 -19
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +0 -2
- package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +106 -8
- package/src/duckdb/src/core_functions/scalar/map/map_contains.cpp +56 -0
- package/src/duckdb/src/core_functions/scalar/map/map_extract.cpp +73 -118
- package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +98 -2
- package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +1 -2
- package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/bar.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/hex.cpp +5 -1
- package/src/duckdb/src/core_functions/scalar/string/md5.cpp +10 -37
- package/src/duckdb/src/core_functions/scalar/string/printf.cpp +18 -2
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +45 -0
- package/src/duckdb/src/core_functions/scalar/string/reverse.cpp +4 -5
- package/src/duckdb/src/core_functions/scalar/string/sha1.cpp +35 -0
- package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +5 -2
- package/src/duckdb/src/core_functions/scalar/string/url_encode.cpp +49 -0
- package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -2
- package/src/duckdb/src/core_functions/scalar/union/union_extract.cpp +4 -2
- package/src/duckdb/src/execution/adaptive_filter.cpp +30 -11
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +13 -18
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +4 -9
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +683 -670
- package/src/duckdb/src/execution/index/art/art_key.cpp +121 -38
- package/src/duckdb/src/execution/index/art/base_leaf.cpp +168 -0
- package/src/duckdb/src/execution/index/art/base_node.cpp +163 -0
- package/src/duckdb/src/execution/index/art/iterator.cpp +148 -77
- package/src/duckdb/src/execution/index/art/leaf.cpp +159 -263
- package/src/duckdb/src/execution/index/art/node.cpp +493 -247
- package/src/duckdb/src/execution/index/art/node256.cpp +31 -91
- package/src/duckdb/src/execution/index/art/node256_leaf.cpp +71 -0
- package/src/duckdb/src/execution/index/art/node48.cpp +75 -143
- package/src/duckdb/src/execution/index/art/prefix.cpp +424 -244
- package/src/duckdb/src/execution/index/bound_index.cpp +7 -1
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +22 -18
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +22 -73
- package/src/duckdb/src/execution/join_hashtable.cpp +637 -179
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +15 -10
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +13 -8
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +525 -132
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +147 -138
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +531 -312
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -3
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +9 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +13 -17
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +60 -16
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +105 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +24 -24
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +25 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +275 -112
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +106 -11
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +253 -115
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +93 -52
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +116 -76
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +29 -14
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +70 -26
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +81 -60
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +88 -50
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +161 -51
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +59 -17
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +5 -5
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +0 -21
- package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +109 -0
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +5 -13
- package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_load.cpp +12 -4
- package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +0 -16
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +4 -2
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +5 -0
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +3 -1
- package/src/duckdb/src/execution/operator/helper/physical_set_variable.cpp +39 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +4 -2
- package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +16 -5
- package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -4
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +59 -21
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +7 -4
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +333 -176
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +57 -34
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +16 -8
- package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +10 -4
- package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +2 -5
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +3 -3
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +7 -2
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +17 -12
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +12 -9
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +35 -17
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +17 -11
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +5 -1
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +156 -47
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -2
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +1 -3
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -2
- package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +13 -6
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +22 -3
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +19 -3
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +37 -22
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +77 -21
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +27 -55
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +41 -44
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +4 -6
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +4 -6
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +18 -4
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +3 -2
- package/src/duckdb/src/execution/physical_operator.cpp +45 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -7
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +8 -3
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +13 -6
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +111 -19
- package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +19 -2
- package/src/duckdb/src/execution/physical_plan/plan_set.cpp +9 -0
- package/src/duckdb/src/execution/physical_plan/plan_window.cpp +3 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -49
- package/src/duckdb/src/execution/reservoir_sample.cpp +2 -2
- package/src/duckdb/src/execution/window_executor.cpp +556 -318
- package/src/duckdb/src/execution/window_segment_tree.cpp +1058 -485
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +5 -5
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +92 -95
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +10 -9
- package/src/duckdb/src/function/aggregate_function.cpp +8 -0
- package/src/duckdb/src/function/cast/cast_function_set.cpp +10 -1
- package/src/duckdb/src/function/cast/decimal_cast.cpp +10 -1
- package/src/duckdb/src/function/cast/default_casts.cpp +2 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +3 -0
- package/src/duckdb/src/function/cast/string_cast.cpp +8 -5
- package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
- package/src/duckdb/src/function/cast/union_casts.cpp +1 -1
- package/src/duckdb/src/function/cast/varint_casts.cpp +283 -0
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -1
- package/src/duckdb/src/function/cast_rules.cpp +104 -15
- package/src/duckdb/src/function/compression_config.cpp +35 -33
- package/src/duckdb/src/function/copy_function.cpp +27 -0
- package/src/duckdb/src/function/function_binder.cpp +39 -11
- package/src/duckdb/src/function/macro_function.cpp +75 -32
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +10 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -0
- package/src/duckdb/src/function/scalar/generic/binning.cpp +507 -0
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +58 -0
- package/src/duckdb/src/function/scalar/generic_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +33 -47
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +70 -143
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +93 -84
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -0
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +24 -11
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +4 -4
- package/src/duckdb/src/function/scalar/strftime_format.cpp +196 -57
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +9 -7
- package/src/duckdb/src/function/scalar/string/concat.cpp +239 -123
- package/src/duckdb/src/function/scalar/string/concat_ws.cpp +149 -0
- package/src/duckdb/src/function/scalar/string/contains.cpp +18 -7
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/substring.cpp +6 -11
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +7 -3
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
- package/src/duckdb/src/function/scalar_function.cpp +5 -2
- package/src/duckdb/src/function/scalar_macro_function.cpp +2 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +20 -39
- package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +135 -0
- package/src/duckdb/src/function/table/arrow.cpp +194 -52
- package/src/duckdb/src/function/table/arrow_conversion.cpp +212 -69
- package/src/duckdb/src/function/table/copy_csv.cpp +43 -14
- package/src/duckdb/src/function/table/query_function.cpp +80 -0
- package/src/duckdb/src/function/table/range.cpp +222 -142
- package/src/duckdb/src/function/table/read_csv.cpp +25 -13
- package/src/duckdb/src/function/table/sniff_csv.cpp +55 -35
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +141 -129
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +25 -14
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +20 -14
- package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +15 -1
- package/src/duckdb/src/function/table/system/duckdb_variables.cpp +84 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +1 -0
- package/src/duckdb/src/function/table/system/test_vector_types.cpp +33 -3
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +45 -22
- package/src/duckdb/src/function/table/unnest.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
- package/src/duckdb/src/function/table_function.cpp +5 -4
- package/src/duckdb/src/function/table_macro_function.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +8 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +5 -5
- package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/default/default_generator.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/default/default_schemas.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/default/default_table_functions.hpp +47 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +120 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +37 -11
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +9 -8
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +92 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +26 -4
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +90 -11
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_buffer.hpp +8 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_merge_event.hpp +62 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +52 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_batch_collector.hpp +30 -0
- package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_collector.hpp +65 -0
- package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +43 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +18 -16
- package/src/duckdb/src/include/duckdb/common/cgroups.hpp +30 -0
- package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +8 -1
- package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +48 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/enums/checkpoint_type.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/copy_overwrite_mode.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/enums/destroy_buffer_upon.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/explain_format.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/file_compression_type.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/set_scope.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +23 -2
- package/src/duckdb/src/include/duckdb/common/enums/stream_execution_result.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +20 -2
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +12 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +160 -96
- package/src/duckdb/src/include/duckdb/common/fsst.hpp +9 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -8
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +16 -7
- package/src/duckdb/src/include/duckdb/common/http_util.hpp +19 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +19 -6
- package/src/duckdb/src/include/duckdb/common/limits.hpp +9 -2
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +38 -6
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +9 -2
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +82 -50
- package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +23 -1
- package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +10 -5
- package/src/duckdb/src/include/duckdb/common/optionally_owned_ptr.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/owning_string_map.hpp +155 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -3
- package/src/duckdb/src/include/duckdb/common/platform.hpp +58 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +172 -27
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/render_tree.hpp +77 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +12 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +15 -7
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +245 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +10 -11
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -6
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +37 -7
- package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +119 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +9 -123
- package/src/duckdb/src/include/duckdb/common/type_visitor.hpp +96 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +11 -1
- package/src/duckdb/src/include/duckdb/common/types/arrow_string_view_type.hpp +84 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +36 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +52 -0
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +0 -3
- package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +65 -0
- package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +49 -40
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +4 -3
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/types/time.hpp +11 -6
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +103 -12
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +12 -3
- package/src/duckdb/src/include/duckdb/common/types/varint.hpp +107 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/types.hpp +6 -39
- package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +42 -10
- package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +29 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +0 -7
- package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/winapi.hpp +8 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +8 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/stddev.hpp +8 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/histogram_helpers.hpp +99 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +16 -7
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +396 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +10 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_helpers.hpp +65 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp +349 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_state.hpp +300 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/sort_key_helpers.hpp +55 -0
- package/src/duckdb/src/include/duckdb/core_functions/array_kernels.hpp +107 -0
- package/src/duckdb/src/include/duckdb/core_functions/create_sort_key.hpp +55 -0
- package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +1 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +24 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +80 -8
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +54 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +30 -21
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +25 -14
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -48
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +25 -2
- package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +102 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +94 -101
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +43 -25
- package/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp +109 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp +140 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +43 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +41 -52
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +133 -74
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +46 -29
- package/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp +53 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +52 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +96 -57
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +9 -4
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +48 -10
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +114 -36
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +158 -67
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp +19 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +81 -23
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +27 -8
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +31 -22
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +48 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +7 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +22 -12
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +35 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +81 -39
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +18 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +9 -7
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/header_value.hpp +26 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +6 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +36 -19
- package/src/duckdb/src/include/duckdb/execution/operator/filter/physical_filter.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +53 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +6 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +18 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set_variable.hpp +43 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +59 -0
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_comparison_join.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +5 -37
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +9 -3
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +8 -6
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +21 -6
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +137 -110
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +57 -126
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +21 -4
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -10
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +37 -7
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +24 -11
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +41 -1
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +15 -5
- package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -4
- package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +6 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +77 -109
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +6 -3
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +25 -12
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +9 -8
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +38 -4
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -57
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +142 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_datetime_type.hpp +18 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +7 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp +10 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/range.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +5 -5
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +14 -2
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/appender.hpp +14 -4
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +79 -0
- package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +10 -20
- package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +11 -12
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +809 -0
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state/batched_data_collection.hpp +35 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +68 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +30 -22
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +79 -1
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +9 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +55 -7
- package/src/duckdb/src/include/duckdb/main/connection.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/database.hpp +16 -5
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +9 -8
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +21 -6
- package/src/duckdb/src/include/duckdb/main/extension.hpp +20 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +29 -23
- package/src/duckdb/src/include/duckdb/main/extension_install_info.hpp +6 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +5 -6
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -5
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +87 -0
- package/src/duckdb/src/include/duckdb/main/profiling_node.hpp +60 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +72 -34
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/relation/delim_get_relation.hpp +30 -0
- package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/materialized_relation.hpp +1 -4
- package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/read_json_relation.hpp +6 -0
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +7 -4
- package/src/duckdb/src/include/duckdb/main/secret/default_secrets.hpp +36 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +108 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +14 -4
- package/src/duckdb/src/include/duckdb/main/settings.hpp +227 -3
- package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +8 -0
- package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +51 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/cte_filter_pusher.hpp +46 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +31 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +51 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +17 -5
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +15 -13
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +9 -4
- package/src/duckdb/src/include/duckdb/optimizer/limit_pushdown.hpp +25 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/rule/join_dependent_filter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +6 -1
- package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +54 -2
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +27 -8
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline_prepare_finish_event.hpp +25 -0
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +63 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +11 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +9 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_expression_iterator.hpp +13 -6
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -3
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/statement/explain_statement.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/statement/transaction_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/column_data_ref.hpp +9 -7
- package/src/duckdb/src/include/duckdb/parser/tableref/delimgetref.hpp +37 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +17 -9
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -14
- package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -5
- package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +19 -11
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +6 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +10 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +15 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +24 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_delimgetref.hpp +26 -0
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_subqueryref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_table_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/tableref/list.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +48 -3
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +21 -7
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +65 -51
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +14 -5
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +4 -4
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +6 -4
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +19 -17
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +13 -11
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +19 -19
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +10 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +15 -0
- package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +14 -10
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -8
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +7 -4
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -7
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +29 -4
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +22 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +15 -2
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -2
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +5 -16
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +51 -13
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +29 -19
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +23 -7
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +27 -18
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +5 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +77 -6
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +23 -11
- package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +18 -4
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +89 -14
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +33 -15
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +9 -9
- package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +3 -1
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +4 -16
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +27 -4
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -2
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +5 -5
- package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +6 -2
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +48 -0
- package/src/duckdb/src/include/duckdb.h +1779 -739
- package/src/duckdb/src/include/duckdb_extension.h +921 -0
- package/src/duckdb/src/main/appender.cpp +53 -7
- package/src/duckdb/src/main/attached_database.cpp +87 -17
- package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +226 -0
- package/src/duckdb/src/main/buffered_data/buffered_data.cpp +35 -0
- package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +48 -23
- package/src/duckdb/src/main/capi/aggregate_function-c.cpp +327 -0
- package/src/duckdb/src/main/capi/appender-c.cpp +18 -0
- package/src/duckdb/src/main/capi/cast/utils-c.cpp +2 -2
- package/src/duckdb/src/main/capi/cast_function-c.cpp +210 -0
- package/src/duckdb/src/main/capi/config-c.cpp +3 -3
- package/src/duckdb/src/main/capi/data_chunk-c.cpp +18 -7
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +223 -24
- package/src/duckdb/src/main/capi/helper-c.cpp +51 -11
- package/src/duckdb/src/main/capi/logical_types-c.cpp +105 -46
- package/src/duckdb/src/main/capi/pending-c.cpp +7 -6
- package/src/duckdb/src/main/capi/prepared-c.cpp +18 -7
- package/src/duckdb/src/main/capi/profiling_info-c.cpp +84 -0
- package/src/duckdb/src/main/capi/result-c.cpp +139 -37
- package/src/duckdb/src/main/capi/scalar_function-c.cpp +269 -0
- package/src/duckdb/src/main/capi/table_description-c.cpp +82 -0
- package/src/duckdb/src/main/capi/table_function-c.cpp +161 -95
- package/src/duckdb/src/main/capi/value-c.cpp +2 -2
- package/src/duckdb/src/main/chunk_scan_state/batched_data_collection.cpp +57 -0
- package/src/duckdb/src/main/client_config.cpp +17 -0
- package/src/duckdb/src/main/client_context.cpp +67 -52
- package/src/duckdb/src/main/client_data.cpp +3 -3
- package/src/duckdb/src/main/config.cpp +120 -62
- package/src/duckdb/src/main/connection.cpp +14 -2
- package/src/duckdb/src/main/database.cpp +96 -35
- package/src/duckdb/src/main/database_manager.cpp +25 -23
- package/src/duckdb/src/main/database_path_and_type.cpp +2 -2
- package/src/duckdb/src/main/db_instance_cache.cpp +54 -19
- package/src/duckdb/src/main/extension/extension_helper.cpp +47 -42
- package/src/duckdb/src/main/extension/extension_install.cpp +155 -87
- package/src/duckdb/src/main/extension/extension_load.cpp +180 -26
- package/src/duckdb/src/main/extension/extension_util.cpp +8 -0
- package/src/duckdb/src/main/extension.cpp +72 -5
- package/src/duckdb/src/main/pending_query_result.cpp +20 -12
- package/src/duckdb/src/main/prepared_statement.cpp +6 -6
- package/src/duckdb/src/main/prepared_statement_data.cpp +28 -17
- package/src/duckdb/src/main/profiling_info.cpp +196 -0
- package/src/duckdb/src/main/query_profiler.cpp +413 -224
- package/src/duckdb/src/main/query_result.cpp +1 -1
- package/src/duckdb/src/main/relation/create_table_relation.cpp +4 -2
- package/src/duckdb/src/main/relation/create_view_relation.cpp +0 -6
- package/src/duckdb/src/main/relation/delim_get_relation.cpp +44 -0
- package/src/duckdb/src/main/relation/explain_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -0
- package/src/duckdb/src/main/relation/limit_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/materialized_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/query_relation.cpp +42 -15
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +7 -14
- package/src/duckdb/src/main/relation/read_json_relation.cpp +20 -0
- package/src/duckdb/src/main/relation/setop_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/table_function_relation.cpp +6 -0
- package/src/duckdb/src/main/relation/view_relation.cpp +10 -0
- package/src/duckdb/src/main/relation.cpp +12 -8
- package/src/duckdb/src/main/secret/default_secrets.cpp +108 -0
- package/src/duckdb/src/main/secret/secret.cpp +145 -2
- package/src/duckdb/src/main/secret/secret_manager.cpp +85 -35
- package/src/duckdb/src/main/secret/secret_storage.cpp +29 -17
- package/src/duckdb/src/main/settings/settings.cpp +503 -11
- package/src/duckdb/src/main/stream_query_result.cpp +75 -2
- package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +248 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +28 -6
- package/src/duckdb/src/optimizer/compressed_materialization/compress_comparison_join.cpp +152 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +11 -1
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +3 -0
- package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +117 -0
- package/src/duckdb/src/optimizer/filter_combiner.cpp +30 -9
- package/src/duckdb/src/optimizer/filter_pullup.cpp +54 -2
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +71 -3
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +154 -0
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +245 -114
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +42 -20
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +6 -2
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +32 -10
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +97 -131
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +265 -51
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +21 -17
- package/src/duckdb/src/optimizer/limit_pushdown.cpp +42 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -8
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +17 -17
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +22 -4
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +1 -18
- package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +6 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +4 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +91 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -25
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +3 -0
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +8 -2
- package/src/duckdb/src/optimizer/rule/join_dependent_filter.cpp +135 -0
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +1 -1
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +6 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +7 -6
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +46 -7
- package/src/duckdb/src/parallel/executor.cpp +129 -51
- package/src/duckdb/src/parallel/executor_task.cpp +16 -3
- package/src/duckdb/src/parallel/meta_pipeline.cpp +98 -29
- package/src/duckdb/src/parallel/pipeline.cpp +17 -3
- package/src/duckdb/src/parallel/pipeline_executor.cpp +14 -2
- package/src/duckdb/src/parallel/pipeline_prepare_finish_event.cpp +34 -0
- package/src/duckdb/src/parallel/task_executor.cpp +84 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +94 -16
- package/src/duckdb/src/parallel/thread_context.cpp +1 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +14 -0
- package/src/duckdb/src/parser/expression/star_expression.cpp +35 -2
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +5 -1
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +37 -28
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +44 -2
- package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +21 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +29 -25
- package/src/duckdb/src/parser/parser.cpp +41 -1
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +1 -0
- package/src/duckdb/src/parser/statement/explain_statement.cpp +28 -13
- package/src/duckdb/src/parser/statement/relation_statement.cpp +5 -0
- package/src/duckdb/src/parser/statement/set_statement.cpp +4 -2
- package/src/duckdb/src/parser/statement/transaction_statement.cpp +3 -3
- package/src/duckdb/src/parser/tableref/column_data_ref.cpp +1 -27
- package/src/duckdb/src/parser/tableref/delimgetref.cpp +30 -0
- package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +35 -29
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +32 -32
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +2 -1
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +17 -0
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +5 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +36 -34
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +30 -14
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +2 -1
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +27 -19
- package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +31 -28
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +25 -27
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +53 -42
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +6 -6
- package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_explain.cpp +38 -3
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +1 -2
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_select.cpp +26 -21
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +8 -8
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +5 -2
- package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +6 -4
- package/src/duckdb/src/parser/transform/statement/transform_transaction.cpp +27 -6
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +8 -9
- package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +11 -12
- package/src/duckdb/src/parser/transform/statement/transform_vacuum.cpp +3 -3
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -10
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +11 -7
- package/src/duckdb/src/planner/bind_context.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +22 -7
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +11 -4
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +9 -54
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -5
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -27
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +26 -7
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +5 -0
- package/src/duckdb/src/planner/binder/expression/bind_unpacked_star_expression.cpp +91 -0
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -8
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +6 -10
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +14 -10
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +3 -3
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +46 -7
- package/src/duckdb/src/planner/binder/statement/bind_call.cpp +13 -20
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +105 -13
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -3
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +75 -55
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +5 -4
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +24 -8
- package/src/duckdb/src/planner/binder/statement/bind_explain.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +5 -105
- package/src/duckdb/src/planner/binder/statement/bind_extension.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +109 -41
- package/src/duckdb/src/planner/binder/statement/bind_set.cpp +23 -7
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +4 -1
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +17 -3
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -4
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +8 -6
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +55 -42
- package/src/duckdb/src/planner/binder/tableref/bind_column_data_ref.cpp +3 -2
- package/src/duckdb/src/planner/binder/tableref/bind_delimgetref.cpp +16 -0
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +31 -1
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +6 -0
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +2 -0
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +106 -46
- package/src/duckdb/src/planner/binder/tableref/plan_delimgetref.cpp +11 -0
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +15 -2
- package/src/duckdb/src/planner/binder/tableref/plan_table_function.cpp +4 -0
- package/src/duckdb/src/planner/binder.cpp +172 -15
- package/src/duckdb/src/planner/collation_binding.cpp +99 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -4
- package/src/duckdb/src/planner/expression/bound_between_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_case_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +14 -12
- package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_comparison_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_conjunction_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_constant_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -2
- package/src/duckdb/src/planner/expression/bound_lambda_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_operator_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_unnest_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +6 -6
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +7 -0
- package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +26 -22
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +7 -1
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +61 -43
- package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +3 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +10 -3
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/where_binder.cpp +9 -2
- package/src/duckdb/src/planner/expression_binder.cpp +121 -21
- package/src/duckdb/src/planner/expression_iterator.cpp +26 -1
- package/src/duckdb/src/planner/filter/conjunction_filter.cpp +33 -0
- package/src/duckdb/src/planner/filter/constant_filter.cpp +15 -0
- package/src/duckdb/src/planner/filter/null_filter.cpp +22 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +16 -0
- package/src/duckdb/src/planner/logical_operator.cpp +24 -7
- package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -7
- package/src/duckdb/src/planner/operator/logical_any_join.cpp +5 -2
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +13 -5
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +64 -8
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +7 -0
- package/src/duckdb/src/planner/operator/logical_distinct.cpp +6 -5
- package/src/duckdb/src/planner/operator/logical_get.cpp +60 -18
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +7 -0
- package/src/duckdb/src/planner/operator/logical_order.cpp +7 -4
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_vacuum.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +2 -3
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +27 -10
- package/src/duckdb/src/planner/table_filter.cpp +51 -0
- package/src/duckdb/src/storage/arena_allocator.cpp +28 -10
- package/src/duckdb/src/storage/block.cpp +3 -2
- package/src/duckdb/src/storage/buffer/block_handle.cpp +29 -14
- package/src/duckdb/src/storage/buffer/block_manager.cpp +6 -5
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -1
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +264 -125
- package/src/duckdb/src/storage/buffer_manager.cpp +5 -1
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -6
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +26 -3
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +21 -9
- package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -24
- package/src/duckdb/src/storage/compression/alp/alp.cpp +6 -11
- package/src/duckdb/src/storage/compression/alprd.cpp +5 -9
- package/src/duckdb/src/storage/compression/bitpacking.cpp +35 -31
- package/src/duckdb/src/storage/compression/chimp/chimp.cpp +6 -8
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +71 -58
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +15 -13
- package/src/duckdb/src/storage/compression/fsst.cpp +66 -53
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +4 -5
- package/src/duckdb/src/storage/compression/patas.cpp +6 -17
- package/src/duckdb/src/storage/compression/rle.cpp +20 -18
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +71 -52
- package/src/duckdb/src/storage/compression/uncompressed.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +8 -7
- package/src/duckdb/src/storage/data_pointer.cpp +22 -0
- package/src/duckdb/src/storage/data_table.cpp +41 -12
- package/src/duckdb/src/storage/local_storage.cpp +22 -8
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +33 -17
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +4 -4
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +3 -3
- package/src/duckdb/src/storage/partial_block_manager.cpp +19 -8
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +11 -8
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_extension_install_info.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +19 -5
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +21 -1
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +4 -2
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +8 -4
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +4 -4
- package/src/duckdb/src/storage/single_file_block_manager.cpp +170 -34
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +221 -64
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +4 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +36 -26
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +4 -15
- package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -8
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +2 -1
- package/src/duckdb/src/storage/storage_info.cpp +34 -9
- package/src/duckdb/src/storage/storage_manager.cpp +147 -74
- package/src/duckdb/src/storage/table/array_column_data.cpp +37 -17
- package/src/duckdb/src/storage/table/chunk_info.cpp +38 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -6
- package/src/duckdb/src/storage/table/column_data.cpp +252 -31
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -12
- package/src/duckdb/src/storage/table/column_segment.cpp +63 -34
- package/src/duckdb/src/storage/table/list_column_data.cpp +34 -15
- package/src/duckdb/src/storage/table/row_group.cpp +228 -120
- package/src/duckdb/src/storage/table/row_group_collection.cpp +122 -120
- package/src/duckdb/src/storage/table/row_version_manager.cpp +27 -1
- package/src/duckdb/src/storage/table/scan_state.cpp +101 -18
- package/src/duckdb/src/storage/table/standard_column_data.cpp +20 -34
- package/src/duckdb/src/storage/table/struct_column_data.cpp +39 -42
- package/src/duckdb/src/storage/table/table_statistics.cpp +2 -1
- package/src/duckdb/src/storage/table/update_segment.cpp +9 -8
- package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -2
- package/src/duckdb/src/storage/table_index_list.cpp +8 -7
- package/src/duckdb/src/storage/temporary_file_manager.cpp +11 -9
- package/src/duckdb/src/storage/temporary_memory_manager.cpp +227 -39
- package/src/duckdb/src/storage/wal_replay.cpp +68 -28
- package/src/duckdb/src/storage/write_ahead_log.cpp +56 -47
- package/src/duckdb/src/transaction/cleanup_state.cpp +9 -1
- package/src/duckdb/src/transaction/commit_state.cpp +7 -170
- package/src/duckdb/src/transaction/duck_transaction.cpp +87 -19
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +65 -10
- package/src/duckdb/src/transaction/meta_transaction.cpp +18 -3
- package/src/duckdb/src/transaction/transaction_context.cpp +21 -17
- package/src/duckdb/src/transaction/undo_buffer.cpp +20 -14
- package/src/duckdb/src/transaction/wal_write_state.cpp +292 -0
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +0 -1
- package/src/duckdb/third_party/brotli/common/brotli_constants.h +204 -0
- package/src/duckdb/third_party/brotli/common/brotli_platform.h +543 -0
- package/src/duckdb/third_party/brotli/common/constants.cpp +17 -0
- package/src/duckdb/third_party/brotli/common/context.cpp +156 -0
- package/src/duckdb/third_party/brotli/common/context.h +110 -0
- package/src/duckdb/third_party/brotli/common/dictionary.cpp +5912 -0
- package/src/duckdb/third_party/brotli/common/dictionary.h +60 -0
- package/src/duckdb/third_party/brotli/common/platform.cpp +24 -0
- package/src/duckdb/third_party/brotli/common/shared_dictionary.cpp +517 -0
- package/src/duckdb/third_party/brotli/common/shared_dictionary_internal.h +71 -0
- package/src/duckdb/third_party/brotli/common/transform.cpp +287 -0
- package/src/duckdb/third_party/brotli/common/transform.h +77 -0
- package/src/duckdb/third_party/brotli/common/version.h +51 -0
- package/src/duckdb/third_party/brotli/dec/bit_reader.cpp +74 -0
- package/src/duckdb/third_party/brotli/dec/bit_reader.h +419 -0
- package/src/duckdb/third_party/brotli/dec/decode.cpp +2758 -0
- package/src/duckdb/third_party/brotli/dec/huffman.cpp +338 -0
- package/src/duckdb/third_party/brotli/dec/huffman.h +118 -0
- package/src/duckdb/third_party/brotli/dec/prefix.h +733 -0
- package/src/duckdb/third_party/brotli/dec/state.cpp +178 -0
- package/src/duckdb/third_party/brotli/dec/state.h +386 -0
- package/src/duckdb/third_party/brotli/enc/backward_references.cpp +3775 -0
- package/src/duckdb/third_party/brotli/enc/backward_references.h +36 -0
- package/src/duckdb/third_party/brotli/enc/backward_references_hq.cpp +935 -0
- package/src/duckdb/third_party/brotli/enc/backward_references_hq.h +92 -0
- package/src/duckdb/third_party/brotli/enc/bit_cost.cpp +410 -0
- package/src/duckdb/third_party/brotli/enc/bit_cost.h +60 -0
- package/src/duckdb/third_party/brotli/enc/block_splitter.cpp +1653 -0
- package/src/duckdb/third_party/brotli/enc/block_splitter.h +48 -0
- package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp +1431 -0
- package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.h +85 -0
- package/src/duckdb/third_party/brotli/enc/brotli_hash.h +4352 -0
- package/src/duckdb/third_party/brotli/enc/brotli_params.h +47 -0
- package/src/duckdb/third_party/brotli/enc/cluster.cpp +1025 -0
- package/src/duckdb/third_party/brotli/enc/cluster.h +1017 -0
- package/src/duckdb/third_party/brotli/enc/command.cpp +24 -0
- package/src/duckdb/third_party/brotli/enc/command.h +187 -0
- package/src/duckdb/third_party/brotli/enc/compound_dictionary.cpp +209 -0
- package/src/duckdb/third_party/brotli/enc/compound_dictionary.h +75 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment.cpp +796 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment.h +82 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp +653 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h +68 -0
- package/src/duckdb/third_party/brotli/enc/dictionary_hash.cpp +1844 -0
- package/src/duckdb/third_party/brotli/enc/dictionary_hash.h +21 -0
- package/src/duckdb/third_party/brotli/enc/encode.cpp +1990 -0
- package/src/duckdb/third_party/brotli/enc/encoder_dict.cpp +636 -0
- package/src/duckdb/third_party/brotli/enc/encoder_dict.h +153 -0
- package/src/duckdb/third_party/brotli/enc/entropy_encode.cpp +500 -0
- package/src/duckdb/third_party/brotli/enc/entropy_encode.h +119 -0
- package/src/duckdb/third_party/brotli/enc/entropy_encode_static.h +538 -0
- package/src/duckdb/third_party/brotli/enc/fast_log.cpp +101 -0
- package/src/duckdb/third_party/brotli/enc/fast_log.h +63 -0
- package/src/duckdb/third_party/brotli/enc/find_match_length.h +68 -0
- package/src/duckdb/third_party/brotli/enc/histogram.cpp +96 -0
- package/src/duckdb/third_party/brotli/enc/histogram.h +210 -0
- package/src/duckdb/third_party/brotli/enc/literal_cost.cpp +176 -0
- package/src/duckdb/third_party/brotli/enc/literal_cost.h +28 -0
- package/src/duckdb/third_party/brotli/enc/memory.cpp +190 -0
- package/src/duckdb/third_party/brotli/enc/memory.h +127 -0
- package/src/duckdb/third_party/brotli/enc/metablock.cpp +1225 -0
- package/src/duckdb/third_party/brotli/enc/metablock.h +102 -0
- package/src/duckdb/third_party/brotli/enc/prefix.h +50 -0
- package/src/duckdb/third_party/brotli/enc/quality.h +202 -0
- package/src/duckdb/third_party/brotli/enc/ringbuffer.h +164 -0
- package/src/duckdb/third_party/brotli/enc/state.h +106 -0
- package/src/duckdb/third_party/brotli/enc/static_dict.cpp +538 -0
- package/src/duckdb/third_party/brotli/enc/static_dict.h +37 -0
- package/src/duckdb/third_party/brotli/enc/static_dict_lut.h +5862 -0
- package/src/duckdb/third_party/brotli/enc/utf8_util.cpp +81 -0
- package/src/duckdb/third_party/brotli/enc/utf8_util.h +29 -0
- package/src/duckdb/third_party/brotli/enc/write_bits.h +84 -0
- package/src/duckdb/third_party/brotli/include/brotli/decode.h +405 -0
- package/src/duckdb/third_party/brotli/include/brotli/encode.h +489 -0
- package/src/duckdb/third_party/brotli/include/brotli/port.h +238 -0
- package/src/duckdb/third_party/brotli/include/brotli/shared_dictionary.h +96 -0
- package/src/duckdb/third_party/brotli/include/brotli/types.h +83 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +20 -4
- package/src/duckdb/third_party/fmt/include/fmt/format.h +54 -10
- package/src/duckdb/third_party/fsst/fsst.h +2 -2
- package/src/duckdb/third_party/fsst/libfsst.hpp +2 -2
- package/src/duckdb/third_party/httplib/httplib.hpp +6763 -5580
- package/src/duckdb/third_party/hyperloglog/hyperloglog.cpp +13 -30
- package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +8 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +22 -9
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1041 -554
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +21605 -21752
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +538 -299
- package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +36 -12
- package/src/duckdb/third_party/mbedtls/library/md.cpp +6 -6
- package/src/duckdb/third_party/mbedtls/library/sha1.cpp +2 -0
- package/src/duckdb/third_party/mbedtls/library/sha256.cpp +3 -0
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +99 -47
- package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
- package/src/duckdb/third_party/re2/re2/prog.cc +2 -2
- package/src/duckdb/third_party/snappy/snappy-internal.h +398 -0
- package/src/duckdb/third_party/snappy/snappy-sinksource.cc +111 -9
- package/src/duckdb/third_party/snappy/snappy-sinksource.h +158 -0
- package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +523 -3
- package/src/duckdb/third_party/snappy/snappy-stubs-public.h +34 -1
- package/src/duckdb/third_party/snappy/snappy.cc +2626 -0
- package/src/duckdb/third_party/snappy/snappy.h +223 -0
- package/src/duckdb/third_party/snappy/snappy_version.hpp +11 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc.hpp +69 -101
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +53 -0
- package/src/duckdb/third_party/utf8proc/utf8proc.cpp +627 -678
- package/src/duckdb/third_party/utf8proc/utf8proc_data.cpp +15008 -12868
- package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +185 -29
- package/src/duckdb/ub_extension_json_json_functions.cpp +6 -0
- package/src/duckdb/ub_src_catalog_default.cpp +4 -0
- package/src/duckdb/ub_src_common.cpp +7 -1
- package/src/duckdb/ub_src_common_arrow.cpp +10 -0
- package/src/duckdb/ub_src_common_enums.cpp +2 -0
- package/src/duckdb/ub_src_common_tree_renderer.cpp +10 -0
- package/src/duckdb/ub_src_common_types.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +4 -0
- package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +2 -4
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +5 -3
- package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
- package/src/duckdb/ub_src_function.cpp +4 -0
- package/src/duckdb/ub_src_function_cast.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_generic.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +0 -2
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_function_table.cpp +2 -0
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +4 -0
- package/src/duckdb/ub_src_main_buffered_data.cpp +4 -0
- package/src/duckdb/ub_src_main_capi.cpp +10 -0
- package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
- package/src/duckdb/ub_src_main_relation.cpp +2 -0
- package/src/duckdb/ub_src_main_secret.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +8 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parallel.cpp +4 -0
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +4 -0
- package/src/duckdb/ub_src_storage_statistics.cpp +0 -2
- package/src/duckdb/ub_src_transaction.cpp +2 -0
- package/test/columns.test.ts +1 -1
- package/test/prepare.test.ts +1 -1
- package/test/test_all_types.test.ts +1 -1
@@ -1,26 +1,42 @@
|
|
1
1
|
#include "duckdb/execution/join_hashtable.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/common/exception.hpp"
|
4
|
-
#include "duckdb/common/
|
5
|
-
#include "duckdb/common/types/column/column_data_collection_segment.hpp"
|
4
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
6
5
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
|
+
#include "duckdb/execution/ht_entry.hpp"
|
7
7
|
#include "duckdb/main/client_context.hpp"
|
8
8
|
#include "duckdb/storage/buffer_manager.hpp"
|
9
9
|
|
10
10
|
namespace duckdb {
|
11
|
-
|
12
11
|
using ValidityBytes = JoinHashTable::ValidityBytes;
|
13
12
|
using ScanStructure = JoinHashTable::ScanStructure;
|
14
13
|
using ProbeSpill = JoinHashTable::ProbeSpill;
|
15
14
|
using ProbeSpillLocalState = JoinHashTable::ProbeSpillLocalAppendState;
|
16
15
|
|
17
|
-
JoinHashTable::
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
finalized(false), has_null(false), radix_bits(INITIAL_RADIX_BITS), partition_start(0), partition_end(0) {
|
16
|
+
JoinHashTable::SharedState::SharedState()
|
17
|
+
: rhs_row_locations(LogicalType::POINTER), salt_match_sel(STANDARD_VECTOR_SIZE),
|
18
|
+
key_no_match_sel(STANDARD_VECTOR_SIZE) {
|
19
|
+
}
|
22
20
|
|
23
|
-
|
21
|
+
JoinHashTable::ProbeState::ProbeState()
|
22
|
+
: SharedState(), salt_v(LogicalType::UBIGINT), ht_offsets_v(LogicalType::UBIGINT),
|
23
|
+
ht_offsets_dense_v(LogicalType::UBIGINT), non_empty_sel(STANDARD_VECTOR_SIZE) {
|
24
|
+
}
|
25
|
+
|
26
|
+
JoinHashTable::InsertState::InsertState(const JoinHashTable &ht)
|
27
|
+
: SharedState(), remaining_sel(STANDARD_VECTOR_SIZE), key_match_sel(STANDARD_VECTOR_SIZE) {
|
28
|
+
ht.data_collection->InitializeChunk(lhs_data, ht.equality_predicate_columns);
|
29
|
+
ht.data_collection->InitializeChunkState(chunk_state, ht.equality_predicate_columns);
|
30
|
+
}
|
31
|
+
|
32
|
+
JoinHashTable::JoinHashTable(ClientContext &context, const vector<JoinCondition> &conditions_p,
|
33
|
+
vector<LogicalType> btypes, JoinType type_p, const vector<idx_t> &output_columns_p)
|
34
|
+
: buffer_manager(BufferManager::GetBufferManager(context)), conditions(conditions_p),
|
35
|
+
build_types(std::move(btypes)), output_columns(output_columns_p), entry_size(0), tuple_size(0),
|
36
|
+
vfound(Value::BOOLEAN(false)), join_type(type_p), finalized(false), has_null(false),
|
37
|
+
radix_bits(INITIAL_RADIX_BITS), partition_start(0), partition_end(0) {
|
38
|
+
for (idx_t i = 0; i < conditions.size(); ++i) {
|
39
|
+
auto &condition = conditions[i];
|
24
40
|
D_ASSERT(condition.left->return_type == condition.right->return_type);
|
25
41
|
auto type = condition.left->return_type;
|
26
42
|
if (condition.comparison == ExpressionType::COMPARE_EQUAL ||
|
@@ -30,9 +46,15 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
|
|
30
46
|
// and that all other conditions are at the back
|
31
47
|
D_ASSERT(equality_types.size() == condition_types.size());
|
32
48
|
equality_types.push_back(type);
|
49
|
+
equality_predicates.push_back(condition.comparison);
|
50
|
+
equality_predicate_columns.push_back(i);
|
51
|
+
|
52
|
+
} else {
|
53
|
+
// all non-equality conditions are at the back
|
54
|
+
non_equality_predicates.push_back(condition.comparison);
|
55
|
+
non_equality_predicate_columns.push_back(i);
|
33
56
|
}
|
34
57
|
|
35
|
-
predicates.push_back(condition.comparison);
|
36
58
|
null_values_are_equal.push_back(condition.comparison == ExpressionType::COMPARE_DISTINCT_FROM ||
|
37
59
|
condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM);
|
38
60
|
|
@@ -51,8 +73,24 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
|
|
51
73
|
}
|
52
74
|
layout_types.emplace_back(LogicalType::HASH);
|
53
75
|
layout.Initialize(layout_types, false);
|
54
|
-
|
55
|
-
|
76
|
+
|
77
|
+
// Initialize the row matcher that are used for filtering during the probing only if there are non-equality
|
78
|
+
if (!non_equality_predicates.empty()) {
|
79
|
+
|
80
|
+
row_matcher_probe = unique_ptr<RowMatcher>(new RowMatcher());
|
81
|
+
row_matcher_probe_no_match_sel = unique_ptr<RowMatcher>(new RowMatcher());
|
82
|
+
|
83
|
+
row_matcher_probe->Initialize(false, layout, non_equality_predicates, non_equality_predicate_columns);
|
84
|
+
row_matcher_probe_no_match_sel->Initialize(true, layout, non_equality_predicates,
|
85
|
+
non_equality_predicate_columns);
|
86
|
+
|
87
|
+
needs_chain_matcher = true;
|
88
|
+
} else {
|
89
|
+
needs_chain_matcher = false;
|
90
|
+
}
|
91
|
+
|
92
|
+
chains_longer_than_one = false;
|
93
|
+
row_matcher_build.Initialize(true, layout, equality_predicates);
|
56
94
|
|
57
95
|
const auto &offsets = layout.GetOffsets();
|
58
96
|
tuple_size = offsets[condition_types.size() + build_types.size()];
|
@@ -62,6 +100,14 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
|
|
62
100
|
data_collection = make_uniq<TupleDataCollection>(buffer_manager, layout);
|
63
101
|
sink_collection =
|
64
102
|
make_uniq<RadixPartitionedTupleData>(buffer_manager, layout, radix_bits, layout.ColumnCount() - 1);
|
103
|
+
|
104
|
+
dead_end = make_unsafe_uniq_array_uninitialized<data_t>(layout.GetRowWidth());
|
105
|
+
memset(dead_end.get(), 0, layout.GetRowWidth());
|
106
|
+
|
107
|
+
if (join_type == JoinType::SINGLE) {
|
108
|
+
auto &config = ClientConfig::GetConfig(context);
|
109
|
+
single_join_error_on_multiple_rows = config.scalar_subquery_error_on_multiple_rows;
|
110
|
+
}
|
65
111
|
}
|
66
112
|
|
67
113
|
JoinHashTable::~JoinHashTable() {
|
@@ -86,32 +132,176 @@ void JoinHashTable::Merge(JoinHashTable &other) {
|
|
86
132
|
sink_collection->Combine(*other.sink_collection);
|
87
133
|
}
|
88
134
|
|
89
|
-
void
|
90
|
-
if (
|
91
|
-
D_ASSERT(!ConstantVector::IsNull(
|
92
|
-
auto indices = ConstantVector::GetData<hash_t>(
|
93
|
-
|
135
|
+
static void ApplyBitmaskAndGetSaltBuild(Vector &hashes_v, const idx_t &count, const idx_t &bitmask) {
|
136
|
+
if (hashes_v.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
137
|
+
D_ASSERT(!ConstantVector::IsNull(hashes_v));
|
138
|
+
auto indices = ConstantVector::GetData<hash_t>(hashes_v);
|
139
|
+
hash_t salt = ht_entry_t::ExtractSaltWithNulls(*indices);
|
140
|
+
idx_t offset = *indices & bitmask;
|
141
|
+
*indices = offset | salt;
|
142
|
+
hashes_v.Flatten(count);
|
94
143
|
} else {
|
95
|
-
|
96
|
-
auto
|
144
|
+
hashes_v.Flatten(count);
|
145
|
+
auto hashes = FlatVector::GetData<hash_t>(hashes_v);
|
97
146
|
for (idx_t i = 0; i < count; i++) {
|
98
|
-
|
147
|
+
idx_t salt = ht_entry_t::ExtractSaltWithNulls(hashes[i]);
|
148
|
+
idx_t offset = hashes[i] & bitmask;
|
149
|
+
hashes[i] = offset | salt;
|
99
150
|
}
|
100
151
|
}
|
101
152
|
}
|
102
153
|
|
103
|
-
|
104
|
-
|
105
|
-
|
154
|
+
//! Gets a pointer to the entry in the HT for each of the hashes_v using linear probing. Will update the key_match_sel
|
155
|
+
//! vector and the count argument to the number and position of the matches
|
156
|
+
template <bool USE_SALTS>
|
157
|
+
static inline void GetRowPointersInternal(DataChunk &keys, TupleDataChunkState &key_state,
|
158
|
+
JoinHashTable::ProbeState &state, Vector &hashes_v,
|
159
|
+
const SelectionVector &sel, idx_t &count, JoinHashTable *ht,
|
160
|
+
ht_entry_t *entries, Vector &pointers_result_v, SelectionVector &match_sel) {
|
161
|
+
UnifiedVectorFormat hashes_v_unified;
|
162
|
+
hashes_v.ToUnifiedFormat(count, hashes_v_unified);
|
163
|
+
|
164
|
+
auto hashes = UnifiedVectorFormat::GetData<hash_t>(hashes_v_unified);
|
165
|
+
auto salts = FlatVector::GetData<hash_t>(state.salt_v);
|
166
|
+
|
167
|
+
auto ht_offsets = FlatVector::GetData<idx_t>(state.ht_offsets_v);
|
168
|
+
auto ht_offsets_dense = FlatVector::GetData<idx_t>(state.ht_offsets_dense_v);
|
106
169
|
|
107
|
-
|
108
|
-
|
109
|
-
|
170
|
+
idx_t non_empty_count = 0;
|
171
|
+
|
172
|
+
// first, filter out the empty rows and calculate the offset
|
173
|
+
for (idx_t i = 0; i < count; i++) {
|
174
|
+
const auto row_index = sel.get_index(i);
|
175
|
+
auto uvf_index = hashes_v_unified.sel->get_index(row_index);
|
176
|
+
auto ht_offset = hashes[uvf_index] & ht->bitmask;
|
177
|
+
ht_offsets_dense[i] = ht_offset;
|
178
|
+
ht_offsets[row_index] = ht_offset;
|
179
|
+
}
|
180
|
+
|
181
|
+
// have a dense loop to have as few instructions as possible while producing cache misses as this is the
|
182
|
+
// first location where we access the big entries array
|
110
183
|
for (idx_t i = 0; i < count; i++) {
|
111
|
-
|
112
|
-
auto
|
113
|
-
|
114
|
-
|
184
|
+
idx_t ht_offset = ht_offsets_dense[i];
|
185
|
+
auto &entry = entries[ht_offset];
|
186
|
+
bool occupied = entry.IsOccupied();
|
187
|
+
state.non_empty_sel.set_index(non_empty_count, i);
|
188
|
+
non_empty_count += occupied;
|
189
|
+
}
|
190
|
+
|
191
|
+
for (idx_t i = 0; i < non_empty_count; i++) {
|
192
|
+
// transform the dense index to the actual index in the sel vector
|
193
|
+
idx_t dense_index = state.non_empty_sel.get_index(i);
|
194
|
+
const auto row_index = sel.get_index(dense_index);
|
195
|
+
state.non_empty_sel.set_index(i, row_index);
|
196
|
+
|
197
|
+
if (USE_SALTS) {
|
198
|
+
auto uvf_index = hashes_v_unified.sel->get_index(row_index);
|
199
|
+
auto hash = hashes[uvf_index];
|
200
|
+
hash_t row_salt = ht_entry_t::ExtractSalt(hash);
|
201
|
+
salts[row_index] = row_salt;
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
auto pointers_result = FlatVector::GetData<data_ptr_t>(pointers_result_v);
|
206
|
+
auto row_ptr_insert_to = FlatVector::GetData<data_ptr_t>(state.rhs_row_locations);
|
207
|
+
|
208
|
+
const SelectionVector *remaining_sel = &state.non_empty_sel;
|
209
|
+
idx_t remaining_count = non_empty_count;
|
210
|
+
|
211
|
+
idx_t &match_count = count;
|
212
|
+
match_count = 0;
|
213
|
+
|
214
|
+
while (remaining_count > 0) {
|
215
|
+
idx_t salt_match_count = 0;
|
216
|
+
idx_t key_no_match_count = 0;
|
217
|
+
|
218
|
+
// for each entry, linear probing until
|
219
|
+
// a) an empty entry is found -> return nullptr (do nothing, as vector is zeroed)
|
220
|
+
// b) an entry is found where the salt matches -> need to compare the keys
|
221
|
+
for (idx_t i = 0; i < remaining_count; i++) {
|
222
|
+
const auto row_index = remaining_sel->get_index(i);
|
223
|
+
|
224
|
+
idx_t &ht_offset = ht_offsets[row_index];
|
225
|
+
bool occupied;
|
226
|
+
ht_entry_t entry;
|
227
|
+
|
228
|
+
if (USE_SALTS) {
|
229
|
+
hash_t row_salt = salts[row_index];
|
230
|
+
// increment the ht_offset of the entry as long as next entry is occupied and salt does not match
|
231
|
+
while (true) {
|
232
|
+
entry = entries[ht_offset];
|
233
|
+
occupied = entry.IsOccupied();
|
234
|
+
bool salt_match = entry.GetSalt() == row_salt;
|
235
|
+
|
236
|
+
// condition for incrementing the ht_offset: occupied and row_salt does not match -> move to next
|
237
|
+
// entry
|
238
|
+
if (!occupied || salt_match) {
|
239
|
+
break;
|
240
|
+
}
|
241
|
+
|
242
|
+
IncrementAndWrap(ht_offset, ht->bitmask);
|
243
|
+
}
|
244
|
+
} else {
|
245
|
+
entry = entries[ht_offset];
|
246
|
+
occupied = entry.IsOccupied();
|
247
|
+
}
|
248
|
+
|
249
|
+
// the entries we need to process in the next iteration are the ones that are occupied and the row_salt
|
250
|
+
// does not match, the ones that are empty need no further processing
|
251
|
+
state.salt_match_sel.set_index(salt_match_count, row_index);
|
252
|
+
salt_match_count += occupied;
|
253
|
+
|
254
|
+
// entry might be empty, so the pointer in the entry is nullptr, but this does not matter as the row
|
255
|
+
// will not be compared anyway as with an empty entry we are already done
|
256
|
+
row_ptr_insert_to[row_index] = entry.GetPointerOrNull();
|
257
|
+
}
|
258
|
+
|
259
|
+
if (salt_match_count != 0) {
|
260
|
+
// Perform row comparisons, after function call salt_match_sel will point to the keys that match
|
261
|
+
idx_t key_match_count = ht->row_matcher_build.Match(keys, key_state.vector_data, state.salt_match_sel,
|
262
|
+
salt_match_count, ht->layout, state.rhs_row_locations,
|
263
|
+
&state.key_no_match_sel, key_no_match_count);
|
264
|
+
|
265
|
+
D_ASSERT(key_match_count + key_no_match_count == salt_match_count);
|
266
|
+
|
267
|
+
// Set a pointer to the matching row
|
268
|
+
for (idx_t i = 0; i < key_match_count; i++) {
|
269
|
+
const auto row_index = state.salt_match_sel.get_index(i);
|
270
|
+
pointers_result[row_index] = row_ptr_insert_to[row_index];
|
271
|
+
|
272
|
+
match_sel.set_index(match_count, row_index);
|
273
|
+
match_count++;
|
274
|
+
}
|
275
|
+
|
276
|
+
// Linear probing: each of the entries that do not match move to the next entry in the HT
|
277
|
+
for (idx_t i = 0; i < key_no_match_count; i++) {
|
278
|
+
const auto row_index = state.key_no_match_sel.get_index(i);
|
279
|
+
auto &ht_offset = ht_offsets[row_index];
|
280
|
+
|
281
|
+
IncrementAndWrap(ht_offset, ht->bitmask);
|
282
|
+
}
|
283
|
+
}
|
284
|
+
|
285
|
+
remaining_sel = &state.key_no_match_sel;
|
286
|
+
remaining_count = key_no_match_count;
|
287
|
+
}
|
288
|
+
}
|
289
|
+
|
290
|
+
inline bool JoinHashTable::UseSalt() const {
|
291
|
+
// only use salt for large hash tables and if there is only one equality condition as otherwise
|
292
|
+
// we potentially need to compare multiple keys
|
293
|
+
return this->capacity > USE_SALT_THRESHOLD && this->equality_predicate_columns.size() == 1;
|
294
|
+
}
|
295
|
+
|
296
|
+
void JoinHashTable::GetRowPointers(DataChunk &keys, TupleDataChunkState &key_state, ProbeState &state, Vector &hashes_v,
|
297
|
+
const SelectionVector &sel, idx_t &count, Vector &pointers_result_v,
|
298
|
+
SelectionVector &match_sel) {
|
299
|
+
if (UseSalt()) {
|
300
|
+
GetRowPointersInternal<true>(keys, key_state, state, hashes_v, sel, count, this, entries, pointers_result_v,
|
301
|
+
match_sel);
|
302
|
+
} else {
|
303
|
+
GetRowPointersInternal<false>(keys, key_state, state, hashes_v, sel, count, this, entries, pointers_result_v,
|
304
|
+
match_sel);
|
115
305
|
}
|
116
306
|
}
|
117
307
|
|
@@ -224,84 +414,295 @@ idx_t JoinHashTable::PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat>
|
|
224
414
|
// figure out which keys are NULL, and create a selection vector out of them
|
225
415
|
current_sel = FlatVector::IncrementalSelectionVector();
|
226
416
|
idx_t added_count = keys.size();
|
227
|
-
if (build_side &&
|
417
|
+
if (build_side && PropagatesBuildSide(join_type)) {
|
228
418
|
// in case of a right or full outer join, we cannot remove NULL keys from the build side
|
229
419
|
return added_count;
|
230
420
|
}
|
231
421
|
|
232
422
|
for (idx_t col_idx = 0; col_idx < keys.ColumnCount(); col_idx++) {
|
233
|
-
if (
|
234
|
-
|
235
|
-
if (col_key_data.validity.AllValid()) {
|
236
|
-
continue;
|
237
|
-
}
|
238
|
-
added_count = FilterNullValues(col_key_data, *current_sel, added_count, sel);
|
239
|
-
// null values are NOT equal for this column, filter them out
|
240
|
-
current_sel = &sel;
|
423
|
+
if (null_values_are_equal[col_idx]) {
|
424
|
+
continue;
|
241
425
|
}
|
426
|
+
auto &col_key_data = vector_data[col_idx].unified;
|
427
|
+
if (col_key_data.validity.AllValid()) {
|
428
|
+
continue;
|
429
|
+
}
|
430
|
+
added_count = FilterNullValues(col_key_data, *current_sel, added_count, sel);
|
431
|
+
// null values are NOT equal for this column, filter them out
|
432
|
+
current_sel = &sel;
|
242
433
|
}
|
243
434
|
return added_count;
|
244
435
|
}
|
245
436
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
437
|
+
static void StorePointer(const_data_ptr_t pointer, data_ptr_t target) {
|
438
|
+
Store<uint64_t>(cast_pointer_to_uint64(pointer), target);
|
439
|
+
}
|
440
|
+
|
441
|
+
static data_ptr_t LoadPointer(const_data_ptr_t source) {
|
442
|
+
return cast_uint64_to_pointer(Load<uint64_t>(source));
|
443
|
+
}
|
444
|
+
|
445
|
+
//! If we consider to insert into an entry we expct to be empty, if it was filled in the meantime the insert will not
|
446
|
+
//! happen and we need to return the pointer to the to row with which the new entry would have collided. In any other
|
447
|
+
//! case we return a nullptr
|
448
|
+
template <bool PARALLEL, bool EXPECT_EMPTY>
|
449
|
+
static inline data_ptr_t InsertRowToEntry(atomic<ht_entry_t> &entry, const data_ptr_t &row_ptr_to_insert,
|
450
|
+
const hash_t &salt, const idx_t &pointer_offset) {
|
451
|
+
|
452
|
+
if (PARALLEL) {
|
453
|
+
// if we expect the entry to be empty, if the operation fails we need to cancel the whole operation as another
|
454
|
+
// key might have been inserted in the meantime that does not match the current key
|
455
|
+
if (EXPECT_EMPTY) {
|
456
|
+
|
457
|
+
// add nullptr to the end of the list to mark the end
|
458
|
+
StorePointer(nullptr, row_ptr_to_insert + pointer_offset);
|
459
|
+
|
460
|
+
ht_entry_t new_empty_entry = ht_entry_t::GetDesiredEntry(row_ptr_to_insert, salt);
|
461
|
+
ht_entry_t expected_empty_entry = ht_entry_t::GetEmptyEntry();
|
462
|
+
std::atomic_compare_exchange_weak(&entry, &expected_empty_entry, new_empty_entry);
|
463
|
+
|
464
|
+
// if the expected empty entry actually was null, we can just return the pointer, and it will be a nullptr
|
465
|
+
// if the expected entry was filled in the meantime, we need to cancel the operation and will return the
|
466
|
+
// pointer to the next entry
|
467
|
+
return expected_empty_entry.GetPointerOrNull();
|
468
|
+
}
|
469
|
+
|
470
|
+
// if we expect the entry to be full, we know that even if the insert fails the keys still match so we can
|
471
|
+
// just keep trying until we succeed
|
472
|
+
else {
|
473
|
+
ht_entry_t expected_current_entry = entry.load(std::memory_order_relaxed);
|
474
|
+
ht_entry_t desired_new_entry = ht_entry_t::GetDesiredEntry(row_ptr_to_insert, salt);
|
475
|
+
D_ASSERT(expected_current_entry.IsOccupied());
|
476
|
+
|
253
477
|
do {
|
254
|
-
|
255
|
-
|
256
|
-
} while (!std::atomic_compare_exchange_weak(&
|
257
|
-
} else {
|
258
|
-
// set prev in current key to the value (NOTE: this will be nullptr if there is none)
|
259
|
-
Store<data_ptr_t>(pointers[index], key_locations[i] + pointer_offset);
|
478
|
+
data_ptr_t current_row_pointer = expected_current_entry.GetPointer();
|
479
|
+
StorePointer(current_row_pointer, row_ptr_to_insert + pointer_offset);
|
480
|
+
} while (!std::atomic_compare_exchange_weak(&entry, &expected_current_entry, desired_new_entry));
|
260
481
|
|
261
|
-
|
262
|
-
pointers[index] = key_locations[i];
|
482
|
+
return nullptr;
|
263
483
|
}
|
484
|
+
} else {
|
485
|
+
// if we are not in parallel mode, we can just do the operation without any checks
|
486
|
+
ht_entry_t current_entry = entry.load(std::memory_order_relaxed);
|
487
|
+
data_ptr_t current_row_pointer = current_entry.GetPointerOrNull();
|
488
|
+
StorePointer(current_row_pointer, row_ptr_to_insert + pointer_offset);
|
489
|
+
entry = ht_entry_t::GetDesiredEntry(row_ptr_to_insert, salt);
|
490
|
+
return nullptr;
|
491
|
+
}
|
492
|
+
}
|
493
|
+
static inline void PerformKeyComparison(JoinHashTable::InsertState &state, JoinHashTable &ht,
|
494
|
+
const TupleDataCollection &data_collection, Vector &row_locations,
|
495
|
+
const idx_t count, idx_t &key_match_count, idx_t &key_no_match_count) {
|
496
|
+
// Get the data for the rows that need to be compared
|
497
|
+
state.lhs_data.Reset();
|
498
|
+
state.lhs_data.SetCardinality(count); // the right size
|
499
|
+
|
500
|
+
// The target selection vector says where to write the results into the lhs_data, we just want to write
|
501
|
+
// sequentially as otherwise we trigger a bug in the Gather function
|
502
|
+
data_collection.ResetCachedCastVectors(state.chunk_state, ht.equality_predicate_columns);
|
503
|
+
data_collection.Gather(row_locations, state.salt_match_sel, count, ht.equality_predicate_columns, state.lhs_data,
|
504
|
+
*FlatVector::IncrementalSelectionVector(), state.chunk_state.cached_cast_vectors);
|
505
|
+
TupleDataCollection::ToUnifiedFormat(state.chunk_state, state.lhs_data);
|
506
|
+
|
507
|
+
for (idx_t i = 0; i < count; i++) {
|
508
|
+
state.key_match_sel.set_index(i, i);
|
509
|
+
}
|
510
|
+
|
511
|
+
// Perform row comparisons
|
512
|
+
key_match_count =
|
513
|
+
ht.row_matcher_build.Match(state.lhs_data, state.chunk_state.vector_data, state.key_match_sel, count, ht.layout,
|
514
|
+
state.rhs_row_locations, &state.key_no_match_sel, key_no_match_count);
|
515
|
+
|
516
|
+
D_ASSERT(key_match_count + key_no_match_count == count);
|
517
|
+
}
|
518
|
+
|
519
|
+
template <bool PARALLEL>
|
520
|
+
static inline void InsertMatchesAndIncrementMisses(atomic<ht_entry_t> entries[], JoinHashTable::InsertState &state,
|
521
|
+
JoinHashTable &ht, const data_ptr_t lhs_row_locations[],
|
522
|
+
idx_t ht_offsets_and_salts[], const idx_t capacity_mask,
|
523
|
+
const idx_t key_match_count, const idx_t key_no_match_count) {
|
524
|
+
if (key_match_count != 0) {
|
525
|
+
ht.chains_longer_than_one = true;
|
526
|
+
}
|
527
|
+
|
528
|
+
// Insert the rows that match
|
529
|
+
for (idx_t i = 0; i < key_match_count; i++) {
|
530
|
+
const auto need_compare_idx = state.key_match_sel.get_index(i);
|
531
|
+
const auto entry_index = state.salt_match_sel.get_index(need_compare_idx);
|
532
|
+
|
533
|
+
const auto &ht_offset = ht_offsets_and_salts[entry_index] & ht_entry_t::POINTER_MASK;
|
534
|
+
auto &entry = entries[ht_offset];
|
535
|
+
const data_ptr_t row_ptr_to_insert = lhs_row_locations[entry_index];
|
536
|
+
|
537
|
+
const auto salt = ht_offsets_and_salts[entry_index];
|
538
|
+
InsertRowToEntry<PARALLEL, false>(entry, row_ptr_to_insert, salt, ht.pointer_offset);
|
539
|
+
}
|
540
|
+
|
541
|
+
// Linear probing: each of the entries that do not match move to the next entry in the HT
|
542
|
+
for (idx_t i = 0; i < key_no_match_count; i++) {
|
543
|
+
const auto need_compare_idx = state.key_no_match_sel.get_index(i);
|
544
|
+
const auto entry_index = state.salt_match_sel.get_index(need_compare_idx);
|
545
|
+
|
546
|
+
idx_t &ht_offset_and_salt = ht_offsets_and_salts[entry_index];
|
547
|
+
IncrementAndWrap(ht_offset_and_salt, capacity_mask);
|
548
|
+
|
549
|
+
state.remaining_sel.set_index(i, entry_index);
|
264
550
|
}
|
265
551
|
}
|
266
552
|
|
267
|
-
|
268
|
-
|
553
|
+
template <bool PARALLEL>
|
554
|
+
static void InsertHashesLoop(atomic<ht_entry_t> entries[], Vector &row_locations, Vector &hashes_v, const idx_t &count,
|
555
|
+
JoinHashTable::InsertState &state, const TupleDataCollection &data_collection,
|
556
|
+
JoinHashTable &ht) {
|
557
|
+
D_ASSERT(hashes_v.GetType().id() == LogicalType::HASH);
|
558
|
+
ApplyBitmaskAndGetSaltBuild(hashes_v, count, ht.bitmask);
|
559
|
+
|
560
|
+
// the offset for each row to insert
|
561
|
+
const auto ht_offsets_and_salts = FlatVector::GetData<idx_t>(hashes_v);
|
562
|
+
// the row locations of the rows that are already in the hash table
|
563
|
+
const auto rhs_row_locations = FlatVector::GetData<data_ptr_t>(state.rhs_row_locations);
|
564
|
+
// the row locations of the rows that are to be inserted
|
565
|
+
const auto lhs_row_locations = FlatVector::GetData<data_ptr_t>(row_locations);
|
566
|
+
|
567
|
+
// we start off with the entire chunk
|
568
|
+
idx_t remaining_count = count;
|
569
|
+
const auto *remaining_sel = FlatVector::IncrementalSelectionVector();
|
570
|
+
|
571
|
+
if (PropagatesBuildSide(ht.join_type)) {
|
572
|
+
// if we propagate the build side, we may have added rows with NULL keys to the HT
|
573
|
+
// these may need to be filtered out depending on the comparison type (exactly like PrepareKeys does)
|
574
|
+
for (idx_t col_idx = 0; col_idx < ht.conditions.size(); col_idx++) {
|
575
|
+
// if null values are NOT equal for this column we filter them out
|
576
|
+
if (ht.NullValuesAreEqual(col_idx)) {
|
577
|
+
continue;
|
578
|
+
}
|
579
|
+
|
580
|
+
idx_t entry_idx;
|
581
|
+
idx_t idx_in_entry;
|
582
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
583
|
+
|
584
|
+
idx_t new_remaining_count = 0;
|
585
|
+
for (idx_t i = 0; i < remaining_count; i++) {
|
586
|
+
const auto idx = remaining_sel->get_index(i);
|
587
|
+
if (ValidityBytes(lhs_row_locations[idx]).RowIsValidUnsafe(col_idx)) {
|
588
|
+
state.remaining_sel.set_index(new_remaining_count++, idx);
|
589
|
+
}
|
590
|
+
}
|
591
|
+
remaining_count = new_remaining_count;
|
592
|
+
remaining_sel = &state.remaining_sel;
|
593
|
+
}
|
594
|
+
}
|
595
|
+
|
596
|
+
// use the ht bitmask to make the modulo operation faster but keep the salt bits intact
|
597
|
+
idx_t capacity_mask = ht.bitmask | ht_entry_t::SALT_MASK;
|
598
|
+
while (remaining_count > 0) {
|
599
|
+
idx_t salt_match_count = 0;
|
600
|
+
|
601
|
+
// iterate over each entry to find out whether it belongs to an existing list or will start a new list
|
602
|
+
for (idx_t i = 0; i < remaining_count; i++) {
|
603
|
+
const idx_t row_index = remaining_sel->get_index(i);
|
604
|
+
idx_t &ht_offset_and_salt = ht_offsets_and_salts[row_index];
|
605
|
+
const hash_t salt = ht_entry_t::ExtractSalt(ht_offset_and_salt);
|
606
|
+
|
607
|
+
// increment the ht_offset_and_salt of the entry as long as next entry is occupied and salt does not match
|
608
|
+
idx_t ht_offset;
|
609
|
+
ht_entry_t entry;
|
610
|
+
bool occupied;
|
611
|
+
while (true) {
|
612
|
+
ht_offset = ht_offset_and_salt & ht_entry_t::POINTER_MASK;
|
613
|
+
atomic<ht_entry_t> &atomic_entry = entries[ht_offset];
|
614
|
+
entry = atomic_entry.load(std::memory_order_relaxed);
|
615
|
+
occupied = entry.IsOccupied();
|
616
|
+
|
617
|
+
// condition for incrementing the ht_offset: occupied and row_salt does not match -> move to next entry
|
618
|
+
if (!occupied) {
|
619
|
+
break;
|
620
|
+
}
|
621
|
+
if (entry.GetSalt() == salt) {
|
622
|
+
break;
|
623
|
+
}
|
624
|
+
|
625
|
+
IncrementAndWrap(ht_offset_and_salt, capacity_mask);
|
626
|
+
}
|
627
|
+
|
628
|
+
if (!occupied) { // insert into free
|
629
|
+
auto &atomic_entry = entries[ht_offset];
|
630
|
+
const auto row_ptr_to_insert = lhs_row_locations[row_index];
|
631
|
+
const auto potential_collided_ptr =
|
632
|
+
InsertRowToEntry<PARALLEL, true>(atomic_entry, row_ptr_to_insert, salt, ht.pointer_offset);
|
633
|
+
|
634
|
+
if (PARALLEL) {
|
635
|
+
// if the insertion was not successful, the entry was occupied in the meantime, so we have to
|
636
|
+
// compare the keys and insert the row to the next entry
|
637
|
+
if (potential_collided_ptr) {
|
638
|
+
// if the entry was occupied, we need to compare the keys and insert the row to the next entry
|
639
|
+
// we need to compare the keys and insert the row to the next entry
|
640
|
+
state.salt_match_sel.set_index(salt_match_count, row_index);
|
641
|
+
rhs_row_locations[salt_match_count] = potential_collided_ptr;
|
642
|
+
salt_match_count += 1;
|
643
|
+
}
|
644
|
+
}
|
269
645
|
|
270
|
-
|
271
|
-
|
646
|
+
} else { // compare with full entry
|
647
|
+
state.salt_match_sel.set_index(salt_match_count, row_index);
|
648
|
+
rhs_row_locations[salt_match_count] = entry.GetPointer();
|
649
|
+
salt_match_count += 1;
|
650
|
+
}
|
651
|
+
}
|
272
652
|
|
273
|
-
|
274
|
-
|
653
|
+
// at this step, for all the rows to insert we stepped either until we found an empty entry or an entry with
|
654
|
+
// a matching salt, we now need to compare the keys for the ones that have a matching salt
|
655
|
+
idx_t key_no_match_count = 0;
|
656
|
+
if (salt_match_count != 0) {
|
657
|
+
idx_t key_match_count = 0;
|
658
|
+
PerformKeyComparison(state, ht, data_collection, row_locations, salt_match_count, key_match_count,
|
659
|
+
key_no_match_count);
|
660
|
+
InsertMatchesAndIncrementMisses<PARALLEL>(entries, state, ht, lhs_row_locations, ht_offsets_and_salts,
|
661
|
+
capacity_mask, key_match_count, key_no_match_count);
|
662
|
+
}
|
275
663
|
|
276
|
-
|
277
|
-
|
664
|
+
// update the overall selection vector to only point the entries that still need to be inserted
|
665
|
+
// as there was no match found for them yet
|
666
|
+
remaining_sel = &state.remaining_sel;
|
667
|
+
remaining_count = key_no_match_count;
|
668
|
+
}
|
669
|
+
}
|
278
670
|
|
671
|
+
void JoinHashTable::InsertHashes(Vector &hashes_v, const idx_t count, TupleDataChunkState &chunk_state,
|
672
|
+
InsertState &insert_state, bool parallel) {
|
673
|
+
auto atomic_entries = reinterpret_cast<atomic<ht_entry_t> *>(this->entries);
|
674
|
+
auto row_locations = chunk_state.row_locations;
|
279
675
|
if (parallel) {
|
280
|
-
InsertHashesLoop<true>(
|
676
|
+
InsertHashesLoop<true>(atomic_entries, row_locations, hashes_v, count, insert_state, *data_collection, *this);
|
281
677
|
} else {
|
282
|
-
InsertHashesLoop<false>(
|
678
|
+
InsertHashesLoop<false>(atomic_entries, row_locations, hashes_v, count, insert_state, *data_collection, *this);
|
283
679
|
}
|
284
680
|
}
|
285
681
|
|
286
682
|
void JoinHashTable::InitializePointerTable() {
|
287
|
-
|
683
|
+
capacity = PointerTableCapacity(Count());
|
288
684
|
D_ASSERT(IsPowerOfTwo(capacity));
|
289
685
|
|
290
686
|
if (hash_map.get()) {
|
291
687
|
// There is already a hash map
|
292
|
-
auto current_capacity = hash_map.GetSize() / sizeof(
|
293
|
-
if (capacity
|
294
|
-
//
|
295
|
-
hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(
|
688
|
+
auto current_capacity = hash_map.GetSize() / sizeof(ht_entry_t);
|
689
|
+
if (capacity > current_capacity) {
|
690
|
+
// Need more space
|
691
|
+
hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(ht_entry_t));
|
692
|
+
entries = reinterpret_cast<ht_entry_t *>(hash_map.get());
|
693
|
+
} else {
|
694
|
+
// Just use the current hash map
|
695
|
+
capacity = current_capacity;
|
296
696
|
}
|
297
697
|
} else {
|
298
698
|
// Allocate a hash map
|
299
|
-
hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(
|
699
|
+
hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(ht_entry_t));
|
700
|
+
entries = reinterpret_cast<ht_entry_t *>(hash_map.get());
|
300
701
|
}
|
301
|
-
D_ASSERT(hash_map.GetSize() == capacity * sizeof(
|
702
|
+
D_ASSERT(hash_map.GetSize() == capacity * sizeof(ht_entry_t));
|
302
703
|
|
303
704
|
// initialize HT with all-zero entries
|
304
|
-
std::fill_n(
|
705
|
+
std::fill_n(entries, capacity, ht_entry_t::GetEmptyEntry());
|
305
706
|
|
306
707
|
bitmask = capacity - 1;
|
307
708
|
}
|
@@ -316,62 +717,63 @@ void JoinHashTable::Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool para
|
|
316
717
|
TupleDataChunkIterator iterator(*data_collection, TupleDataPinProperties::KEEP_EVERYTHING_PINNED, chunk_idx_from,
|
317
718
|
chunk_idx_to, false);
|
318
719
|
const auto row_locations = iterator.GetRowLocations();
|
720
|
+
|
721
|
+
InsertState insert_state(*this);
|
319
722
|
do {
|
320
723
|
const auto count = iterator.GetCurrentChunkCount();
|
321
724
|
for (idx_t i = 0; i < count; i++) {
|
322
725
|
hash_data[i] = Load<hash_t>(row_locations[i] + pointer_offset);
|
323
726
|
}
|
324
|
-
|
727
|
+
TupleDataChunkState &chunk_state = iterator.GetChunkState();
|
728
|
+
|
729
|
+
InsertHashes(hashes, count, chunk_state, insert_state, parallel);
|
325
730
|
} while (iterator.Next());
|
326
731
|
}
|
327
732
|
|
328
|
-
|
329
|
-
|
733
|
+
void JoinHashTable::InitializeScanStructure(ScanStructure &scan_structure, DataChunk &keys,
|
734
|
+
TupleDataChunkState &key_state, const SelectionVector *¤t_sel) {
|
330
735
|
D_ASSERT(Count() > 0); // should be handled before
|
331
736
|
D_ASSERT(finalized);
|
332
737
|
|
333
738
|
// set up the scan structure
|
334
|
-
|
335
|
-
|
739
|
+
scan_structure.is_null = false;
|
740
|
+
scan_structure.finished = false;
|
336
741
|
if (join_type != JoinType::INNER) {
|
337
|
-
|
338
|
-
memset(ss->found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE);
|
742
|
+
memset(scan_structure.found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE);
|
339
743
|
}
|
340
744
|
|
341
745
|
// first prepare the keys for probing
|
342
746
|
TupleDataCollection::ToUnifiedFormat(key_state, keys);
|
343
|
-
|
344
|
-
return ss;
|
747
|
+
scan_structure.count = PrepareKeys(keys, key_state.vector_data, current_sel, scan_structure.sel_vector, false);
|
345
748
|
}
|
346
749
|
|
347
|
-
|
348
|
-
|
750
|
+
void JoinHashTable::Probe(ScanStructure &scan_structure, DataChunk &keys, TupleDataChunkState &key_state,
|
751
|
+
ProbeState &probe_state, optional_ptr<Vector> precomputed_hashes) {
|
349
752
|
const SelectionVector *current_sel;
|
350
|
-
|
351
|
-
if (
|
352
|
-
return
|
753
|
+
InitializeScanStructure(scan_structure, keys, key_state, current_sel);
|
754
|
+
if (scan_structure.count == 0) {
|
755
|
+
return;
|
353
756
|
}
|
354
757
|
|
355
758
|
if (precomputed_hashes) {
|
356
|
-
|
759
|
+
GetRowPointers(keys, key_state, probe_state, *precomputed_hashes, *current_sel, scan_structure.count,
|
760
|
+
scan_structure.pointers, scan_structure.sel_vector);
|
357
761
|
} else {
|
358
|
-
// hash all the keys
|
359
762
|
Vector hashes(LogicalType::HASH);
|
360
|
-
|
763
|
+
// hash all the keys
|
764
|
+
Hash(keys, *current_sel, scan_structure.count, hashes);
|
361
765
|
|
362
766
|
// now initialize the pointers of the scan structure based on the hashes
|
363
|
-
|
767
|
+
GetRowPointers(keys, key_state, probe_state, hashes, *current_sel, scan_structure.count,
|
768
|
+
scan_structure.pointers, scan_structure.sel_vector);
|
364
769
|
}
|
365
|
-
|
366
|
-
// create the selection vector linking to only non-empty entries
|
367
|
-
ss->InitializeSelectionVector(current_sel);
|
368
|
-
|
369
|
-
return ss;
|
370
770
|
}
|
371
771
|
|
372
772
|
ScanStructure::ScanStructure(JoinHashTable &ht_p, TupleDataChunkState &key_state_p)
|
373
|
-
: key_state(key_state_p), pointers(LogicalType::POINTER),
|
374
|
-
|
773
|
+
: key_state(key_state_p), pointers(LogicalType::POINTER), count(0), sel_vector(STANDARD_VECTOR_SIZE),
|
774
|
+
chain_match_sel_vector(STANDARD_VECTOR_SIZE), chain_no_match_sel_vector(STANDARD_VECTOR_SIZE),
|
775
|
+
found_match(make_unsafe_uniq_array_uninitialized<bool>(STANDARD_VECTOR_SIZE)), ht(ht_p), finished(false),
|
776
|
+
is_null(true) {
|
375
777
|
}
|
376
778
|
|
377
779
|
void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
@@ -381,8 +783,6 @@ void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
|
381
783
|
switch (ht.join_type) {
|
382
784
|
case JoinType::INNER:
|
383
785
|
case JoinType::RIGHT:
|
384
|
-
case JoinType::RIGHT_ANTI:
|
385
|
-
case JoinType::RIGHT_SEMI:
|
386
786
|
NextInnerJoin(keys, left, result);
|
387
787
|
break;
|
388
788
|
case JoinType::SEMI:
|
@@ -394,6 +794,10 @@ void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
|
394
794
|
case JoinType::ANTI:
|
395
795
|
NextAntiJoin(keys, left, result);
|
396
796
|
break;
|
797
|
+
case JoinType::RIGHT_ANTI:
|
798
|
+
case JoinType::RIGHT_SEMI:
|
799
|
+
NextRightSemiOrAntiJoin(keys);
|
800
|
+
break;
|
397
801
|
case JoinType::OUTER:
|
398
802
|
case JoinType::LEFT:
|
399
803
|
NextLeftJoin(keys, left, result);
|
@@ -406,7 +810,7 @@ void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
|
406
810
|
}
|
407
811
|
}
|
408
812
|
|
409
|
-
bool ScanStructure::PointersExhausted() {
|
813
|
+
bool ScanStructure::PointersExhausted() const {
|
410
814
|
// AdvancePointers creates a "new_count" for every pointer advanced during the
|
411
815
|
// previous advance pointers call. If no pointers are advanced, new_count = 0.
|
412
816
|
// count is then set ot new_count.
|
@@ -414,20 +818,31 @@ bool ScanStructure::PointersExhausted() {
|
|
414
818
|
}
|
415
819
|
|
416
820
|
idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_sel, SelectionVector *no_match_sel) {
|
417
|
-
|
821
|
+
|
822
|
+
// Initialize the found_match array to the current sel_vector
|
418
823
|
for (idx_t i = 0; i < this->count; ++i) {
|
419
824
|
match_sel.set_index(i, this->sel_vector.get_index(i));
|
420
825
|
}
|
421
|
-
idx_t no_match_count = 0;
|
422
826
|
|
423
|
-
|
424
|
-
|
425
|
-
|
827
|
+
// If there is a matcher for the probing side because of non-equality predicates, use it
|
828
|
+
if (ht.needs_chain_matcher) {
|
829
|
+
idx_t no_match_count = 0;
|
830
|
+
auto &matcher = no_match_sel ? ht.row_matcher_probe_no_match_sel : ht.row_matcher_probe;
|
831
|
+
D_ASSERT(matcher);
|
832
|
+
|
833
|
+
// we need to only use the vectors with the indices of the columns that are used in the probe phase, namely
|
834
|
+
// the non-equality columns
|
835
|
+
return matcher->Match(keys, key_state.vector_data, match_sel, this->count, ht.layout, pointers, no_match_sel,
|
836
|
+
no_match_count, ht.non_equality_predicate_columns);
|
837
|
+
} else {
|
838
|
+
// no match sel is the opposite of match sel
|
839
|
+
return this->count;
|
840
|
+
}
|
426
841
|
}
|
427
842
|
|
428
843
|
idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector) {
|
429
844
|
while (true) {
|
430
|
-
// resolve the
|
845
|
+
// resolve the equality_predicates for this set of keys
|
431
846
|
idx_t result_count = ResolvePredicates(keys, result_vector, nullptr);
|
432
847
|
|
433
848
|
// after doing all the comparisons set the found_match vector
|
@@ -448,13 +863,19 @@ idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vect
|
|
448
863
|
}
|
449
864
|
}
|
450
865
|
|
451
|
-
void ScanStructure::AdvancePointers(const SelectionVector &sel, idx_t sel_count) {
|
866
|
+
void ScanStructure::AdvancePointers(const SelectionVector &sel, const idx_t sel_count) {
|
867
|
+
|
868
|
+
if (!ht.chains_longer_than_one) {
|
869
|
+
this->count = 0;
|
870
|
+
return;
|
871
|
+
}
|
872
|
+
|
452
873
|
// now for all the pointers, we move on to the next set of pointers
|
453
874
|
idx_t new_count = 0;
|
454
875
|
auto ptrs = FlatVector::GetData<data_ptr_t>(this->pointers);
|
455
876
|
for (idx_t i = 0; i < sel_count; i++) {
|
456
877
|
auto idx = sel.get_index(i);
|
457
|
-
ptrs[idx] =
|
878
|
+
ptrs[idx] = LoadPointer(ptrs[idx] + ht.pointer_offset);
|
458
879
|
if (ptrs[idx]) {
|
459
880
|
this->sel_vector.set_index(new_count++, idx);
|
460
881
|
}
|
@@ -462,20 +883,6 @@ void ScanStructure::AdvancePointers(const SelectionVector &sel, idx_t sel_count)
|
|
462
883
|
this->count = new_count;
|
463
884
|
}
|
464
885
|
|
465
|
-
void ScanStructure::InitializeSelectionVector(const SelectionVector *¤t_sel) {
|
466
|
-
idx_t non_empty_count = 0;
|
467
|
-
auto ptrs = FlatVector::GetData<data_ptr_t>(pointers);
|
468
|
-
auto cnt = count;
|
469
|
-
for (idx_t i = 0; i < cnt; i++) {
|
470
|
-
const auto idx = current_sel->get_index(i);
|
471
|
-
ptrs[idx] = Load<data_ptr_t>(ptrs[idx]);
|
472
|
-
if (ptrs[idx]) {
|
473
|
-
sel_vector.set_index(non_empty_count++, idx);
|
474
|
-
}
|
475
|
-
}
|
476
|
-
count = non_empty_count;
|
477
|
-
}
|
478
|
-
|
479
886
|
void ScanStructure::AdvancePointers() {
|
480
887
|
AdvancePointers(this->sel_vector, this->count);
|
481
888
|
}
|
@@ -499,17 +906,17 @@ void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &r
|
|
499
906
|
return;
|
500
907
|
}
|
501
908
|
|
502
|
-
|
909
|
+
idx_t result_count = ScanInnerJoin(keys, chain_match_sel_vector);
|
503
910
|
|
504
|
-
idx_t result_count = ScanInnerJoin(keys, result_vector);
|
505
911
|
if (result_count > 0) {
|
506
912
|
if (PropagatesBuildSide(ht.join_type)) {
|
507
913
|
// full/right outer join: mark join matches as FOUND in the HT
|
508
914
|
auto ptrs = FlatVector::GetData<data_ptr_t>(pointers);
|
509
915
|
for (idx_t i = 0; i < result_count; i++) {
|
510
|
-
auto idx =
|
511
|
-
// NOTE: threadsan reports this as a data race because this can be set concurrently by separate
|
512
|
-
// Technically it is, but it does not matter, since the only value that can be written is
|
916
|
+
auto idx = chain_match_sel_vector.get_index(i);
|
917
|
+
// NOTE: threadsan reports this as a data race because this can be set concurrently by separate
|
918
|
+
// threads Technically it is, but it does not matter, since the only value that can be written is
|
919
|
+
// "true"
|
513
920
|
Store<bool>(true, ptrs[idx] + ht.tuple_size);
|
514
921
|
}
|
515
922
|
}
|
@@ -518,14 +925,14 @@ void ScanStructure::NextInnerJoin(DataChunk &keys, DataChunk &left, DataChunk &r
|
|
518
925
|
// matches were found
|
519
926
|
// construct the result
|
520
927
|
// on the LHS, we create a slice using the result vector
|
521
|
-
result.Slice(left,
|
928
|
+
result.Slice(left, chain_match_sel_vector, result_count);
|
522
929
|
|
523
930
|
// on the RHS, we need to fetch the data from the hash table
|
524
931
|
for (idx_t i = 0; i < ht.output_columns.size(); i++) {
|
525
932
|
auto &vector = result.data[left.ColumnCount() + i];
|
526
933
|
const auto output_col_idx = ht.output_columns[i];
|
527
934
|
D_ASSERT(vector.GetType() == ht.layout.GetTypes()[output_col_idx]);
|
528
|
-
GatherResult(vector,
|
935
|
+
GatherResult(vector, chain_match_sel_vector, result_count, output_col_idx);
|
529
936
|
}
|
530
937
|
}
|
531
938
|
AdvancePointers();
|
@@ -538,18 +945,19 @@ void ScanStructure::ScanKeyMatches(DataChunk &keys) {
|
|
538
945
|
// we handle the entire chunk in one call to Next().
|
539
946
|
// for every pointer, we keep chasing pointers and doing comparisons.
|
540
947
|
// this results in a boolean array indicating whether or not the tuple has a match
|
541
|
-
|
948
|
+
// Start with the scan selection
|
949
|
+
|
542
950
|
while (this->count > 0) {
|
543
|
-
// resolve the
|
544
|
-
idx_t match_count = ResolvePredicates(keys,
|
951
|
+
// resolve the equality_predicates for the current set of pointers
|
952
|
+
idx_t match_count = ResolvePredicates(keys, chain_match_sel_vector, &chain_no_match_sel_vector);
|
545
953
|
idx_t no_match_count = this->count - match_count;
|
546
954
|
|
547
955
|
// mark each of the matches as found
|
548
956
|
for (idx_t i = 0; i < match_count; i++) {
|
549
|
-
found_match[
|
957
|
+
found_match[chain_match_sel_vector.get_index(i)] = true;
|
550
958
|
}
|
551
959
|
// continue searching for the ones where we did not find a match yet
|
552
|
-
AdvancePointers(
|
960
|
+
AdvancePointers(chain_no_match_sel_vector, no_match_count);
|
553
961
|
}
|
554
962
|
}
|
555
963
|
|
@@ -594,6 +1002,41 @@ void ScanStructure::NextAntiJoin(DataChunk &keys, DataChunk &left, DataChunk &re
|
|
594
1002
|
finished = true;
|
595
1003
|
}
|
596
1004
|
|
1005
|
+
void ScanStructure::NextRightSemiOrAntiJoin(DataChunk &keys) {
|
1006
|
+
const auto ptrs = FlatVector::GetData<data_ptr_t>(pointers);
|
1007
|
+
while (!PointersExhausted()) {
|
1008
|
+
// resolve the equality_predicates for this set of keys
|
1009
|
+
idx_t result_count = ResolvePredicates(keys, chain_match_sel_vector, nullptr);
|
1010
|
+
|
1011
|
+
// for each match, fully follow the chain
|
1012
|
+
for (idx_t i = 0; i < result_count; i++) {
|
1013
|
+
const auto idx = chain_match_sel_vector.get_index(i);
|
1014
|
+
auto &ptr = ptrs[idx];
|
1015
|
+
if (Load<bool>(ptr + ht.tuple_size)) { // Early out: chain has been fully marked as found before
|
1016
|
+
ptr = ht.dead_end.get();
|
1017
|
+
continue;
|
1018
|
+
}
|
1019
|
+
|
1020
|
+
// Fully mark chain as found
|
1021
|
+
while (true) {
|
1022
|
+
// NOTE: threadsan reports this as a data race because this can be set concurrently by separate threads
|
1023
|
+
// Technically it is, but it does not matter, since the only value that can be written is "true"
|
1024
|
+
Store<bool>(true, ptr + ht.tuple_size);
|
1025
|
+
auto next_ptr = LoadPointer(ptr + ht.pointer_offset);
|
1026
|
+
if (!next_ptr) {
|
1027
|
+
break;
|
1028
|
+
}
|
1029
|
+
ptr = next_ptr;
|
1030
|
+
}
|
1031
|
+
}
|
1032
|
+
|
1033
|
+
// check the next set of pointers
|
1034
|
+
AdvancePointers();
|
1035
|
+
}
|
1036
|
+
|
1037
|
+
finished = true;
|
1038
|
+
}
|
1039
|
+
|
597
1040
|
void ScanStructure::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &child, DataChunk &result) {
|
598
1041
|
// for the initial set of columns we just reference the left side
|
599
1042
|
result.SetCardinality(child);
|
@@ -637,15 +1080,15 @@ void ScanStructure::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &chi
|
|
637
1080
|
}
|
638
1081
|
}
|
639
1082
|
|
640
|
-
void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &
|
641
|
-
D_ASSERT(result.ColumnCount() ==
|
1083
|
+
void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
1084
|
+
D_ASSERT(result.ColumnCount() == left.ColumnCount() + 1);
|
642
1085
|
D_ASSERT(result.data.back().GetType() == LogicalType::BOOLEAN);
|
643
1086
|
// this method should only be called for a non-empty HT
|
644
1087
|
D_ASSERT(ht.Count() > 0);
|
645
1088
|
|
646
1089
|
ScanKeyMatches(keys);
|
647
1090
|
if (ht.correlated_mark_join_info.correlated_types.empty()) {
|
648
|
-
ConstructMarkJoinResult(keys,
|
1091
|
+
ConstructMarkJoinResult(keys, left, result);
|
649
1092
|
} else {
|
650
1093
|
auto &info = ht.correlated_mark_join_info;
|
651
1094
|
lock_guard<mutex> mj_lock(info.mj_lock);
|
@@ -660,9 +1103,9 @@ void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &input, DataChunk &r
|
|
660
1103
|
info.correlated_counts->FetchAggregates(info.group_chunk, info.result_chunk);
|
661
1104
|
|
662
1105
|
// for the initial set of columns we just reference the left side
|
663
|
-
result.SetCardinality(
|
664
|
-
for (idx_t i = 0; i <
|
665
|
-
result.data[i].Reference(
|
1106
|
+
result.SetCardinality(left);
|
1107
|
+
for (idx_t i = 0; i < left.ColumnCount(); i++) {
|
1108
|
+
result.data[i].Reference(left.data[i]);
|
666
1109
|
}
|
667
1110
|
// create the result matching vector
|
668
1111
|
auto &last_key = keys.data.back();
|
@@ -674,16 +1117,16 @@ void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &input, DataChunk &r
|
|
674
1117
|
switch (last_key.GetVectorType()) {
|
675
1118
|
case VectorType::CONSTANT_VECTOR:
|
676
1119
|
if (ConstantVector::IsNull(last_key)) {
|
677
|
-
mask.SetAllInvalid(
|
1120
|
+
mask.SetAllInvalid(left.size());
|
678
1121
|
}
|
679
1122
|
break;
|
680
1123
|
case VectorType::FLAT_VECTOR:
|
681
|
-
mask.Copy(FlatVector::Validity(last_key),
|
1124
|
+
mask.Copy(FlatVector::Validity(last_key), left.size());
|
682
1125
|
break;
|
683
1126
|
default: {
|
684
1127
|
UnifiedVectorFormat kdata;
|
685
1128
|
last_key.ToUnifiedFormat(keys.size(), kdata);
|
686
|
-
for (idx_t i = 0; i <
|
1129
|
+
for (idx_t i = 0; i < left.size(); i++) {
|
687
1130
|
auto kidx = kdata.sel->get_index(i);
|
688
1131
|
mask.Set(i, kdata.validity.RowIsValid(kidx));
|
689
1132
|
}
|
@@ -694,7 +1137,7 @@ void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &input, DataChunk &r
|
|
694
1137
|
auto count_star = FlatVector::GetData<int64_t>(info.result_chunk.data[0]);
|
695
1138
|
auto count = FlatVector::GetData<int64_t>(info.result_chunk.data[1]);
|
696
1139
|
// set the entries to either true or false based on whether a match was found
|
697
|
-
for (idx_t i = 0; i <
|
1140
|
+
for (idx_t i = 0; i < left.size(); i++) {
|
698
1141
|
D_ASSERT(count_star[i] >= count[i]);
|
699
1142
|
bool_result[i] = found_match ? found_match[i] : false;
|
700
1143
|
if (!bool_result[i] && count_star[i] > count[i]) {
|
@@ -742,39 +1185,40 @@ void ScanStructure::NextLeftJoin(DataChunk &keys, DataChunk &left, DataChunk &re
|
|
742
1185
|
}
|
743
1186
|
}
|
744
1187
|
|
745
|
-
void ScanStructure::NextSingleJoin(DataChunk &keys, DataChunk &
|
1188
|
+
void ScanStructure::NextSingleJoin(DataChunk &keys, DataChunk &left, DataChunk &result) {
|
746
1189
|
// single join
|
747
1190
|
// this join is similar to the semi join except that
|
748
1191
|
// (1) we actually return data from the RHS and
|
749
1192
|
// (2) we return NULL for that data if there is no match
|
1193
|
+
// (3) if single_join_error_on_multiple_rows is set, we need to keep looking for duplicates after fetching
|
750
1194
|
idx_t result_count = 0;
|
751
1195
|
SelectionVector result_sel(STANDARD_VECTOR_SIZE);
|
752
|
-
|
1196
|
+
|
753
1197
|
while (this->count > 0) {
|
754
|
-
// resolve the
|
755
|
-
idx_t match_count = ResolvePredicates(keys,
|
1198
|
+
// resolve the equality_predicates for the current set of pointers
|
1199
|
+
idx_t match_count = ResolvePredicates(keys, chain_match_sel_vector, &chain_no_match_sel_vector);
|
756
1200
|
idx_t no_match_count = this->count - match_count;
|
757
1201
|
|
758
1202
|
// mark each of the matches as found
|
759
1203
|
for (idx_t i = 0; i < match_count; i++) {
|
760
1204
|
// found a match for this index
|
761
|
-
auto index =
|
1205
|
+
auto index = chain_match_sel_vector.get_index(i);
|
762
1206
|
found_match[index] = true;
|
763
1207
|
result_sel.set_index(result_count++, index);
|
764
1208
|
}
|
765
1209
|
// continue searching for the ones where we did not find a match yet
|
766
|
-
AdvancePointers(
|
1210
|
+
AdvancePointers(chain_no_match_sel_vector, no_match_count);
|
767
1211
|
}
|
768
1212
|
// reference the columns of the left side from the result
|
769
|
-
D_ASSERT(
|
770
|
-
for (idx_t i = 0; i <
|
771
|
-
result.data[i].Reference(
|
1213
|
+
D_ASSERT(left.ColumnCount() > 0);
|
1214
|
+
for (idx_t i = 0; i < left.ColumnCount(); i++) {
|
1215
|
+
result.data[i].Reference(left.data[i]);
|
772
1216
|
}
|
773
1217
|
// now fetch the data from the RHS
|
774
1218
|
for (idx_t i = 0; i < ht.output_columns.size(); i++) {
|
775
|
-
auto &vector = result.data[
|
1219
|
+
auto &vector = result.data[left.ColumnCount() + i];
|
776
1220
|
// set NULL entries for every entry that was not found
|
777
|
-
for (idx_t j = 0; j <
|
1221
|
+
for (idx_t j = 0; j < left.size(); j++) {
|
778
1222
|
if (!found_match[j]) {
|
779
1223
|
FlatVector::SetNull(vector, j, true);
|
780
1224
|
}
|
@@ -783,13 +1227,31 @@ void ScanStructure::NextSingleJoin(DataChunk &keys, DataChunk &input, DataChunk
|
|
783
1227
|
D_ASSERT(vector.GetType() == ht.layout.GetTypes()[output_col_idx]);
|
784
1228
|
GatherResult(vector, result_sel, result_sel, result_count, output_col_idx);
|
785
1229
|
}
|
786
|
-
result.SetCardinality(
|
1230
|
+
result.SetCardinality(left.size());
|
787
1231
|
|
788
1232
|
// like the SEMI, ANTI and MARK join types, the SINGLE join only ever does one pass over the HT per input chunk
|
789
1233
|
finished = true;
|
1234
|
+
|
1235
|
+
if (ht.single_join_error_on_multiple_rows && result_count > 0) {
|
1236
|
+
// we need to throw an error if there are multiple rows per key
|
1237
|
+
// advance pointers for those rows
|
1238
|
+
AdvancePointers(result_sel, result_count);
|
1239
|
+
|
1240
|
+
// now resolve the predicates
|
1241
|
+
idx_t match_count = ResolvePredicates(keys, chain_match_sel_vector, nullptr);
|
1242
|
+
if (match_count > 0) {
|
1243
|
+
// we found at least one duplicate row - throw
|
1244
|
+
throw InvalidInputException(
|
1245
|
+
"More than one row returned by a subquery used as an expression - scalar subqueries can only "
|
1246
|
+
"return a single row.\n\nUse \"SET scalar_subquery_error_on_multiple_rows=false\" to revert to "
|
1247
|
+
"previous behavior of returning a random row.");
|
1248
|
+
}
|
1249
|
+
|
1250
|
+
this->count = 0;
|
1251
|
+
}
|
790
1252
|
}
|
791
1253
|
|
792
|
-
void JoinHashTable::ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result) {
|
1254
|
+
void JoinHashTable::ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result) const {
|
793
1255
|
// scan the HT starting from the current position and check which rows from the build side did not find a match
|
794
1256
|
auto key_locations = FlatVector::GetData<data_ptr_t>(addresses);
|
795
1257
|
idx_t found_entries = 0;
|
@@ -899,7 +1361,7 @@ idx_t JoinHashTable::GetTotalSize(const vector<idx_t> &partition_sizes, const ve
|
|
899
1361
|
return total_size + PointerTableSize(total_count);
|
900
1362
|
}
|
901
1363
|
|
902
|
-
idx_t JoinHashTable::GetTotalSize(vector<unique_ptr<JoinHashTable>> &local_hts, idx_t &max_partition_size,
|
1364
|
+
idx_t JoinHashTable::GetTotalSize(const vector<unique_ptr<JoinHashTable>> &local_hts, idx_t &max_partition_size,
|
903
1365
|
idx_t &max_partition_count) const {
|
904
1366
|
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
905
1367
|
vector<idx_t> partition_sizes(num_partitions, 0);
|
@@ -911,7 +1373,7 @@ idx_t JoinHashTable::GetTotalSize(vector<unique_ptr<JoinHashTable>> &local_hts,
|
|
911
1373
|
return GetTotalSize(partition_sizes, partition_counts, max_partition_size, max_partition_count);
|
912
1374
|
}
|
913
1375
|
|
914
|
-
idx_t JoinHashTable::GetRemainingSize() {
|
1376
|
+
idx_t JoinHashTable::GetRemainingSize() const {
|
915
1377
|
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
|
916
1378
|
auto &partitions = sink_collection->GetPartitions();
|
917
1379
|
|
@@ -929,21 +1391,21 @@ void JoinHashTable::Unpartition() {
|
|
929
1391
|
data_collection = sink_collection->GetUnpartitioned();
|
930
1392
|
}
|
931
1393
|
|
932
|
-
void JoinHashTable::SetRepartitionRadixBits(
|
933
|
-
const idx_t
|
1394
|
+
void JoinHashTable::SetRepartitionRadixBits(const idx_t max_ht_size, const idx_t max_partition_size,
|
1395
|
+
const idx_t max_partition_count) {
|
934
1396
|
D_ASSERT(max_partition_size + PointerTableSize(max_partition_count) > max_ht_size);
|
935
1397
|
|
936
1398
|
const auto max_added_bits = RadixPartitioning::MAX_RADIX_BITS - radix_bits;
|
937
1399
|
idx_t added_bits = 1;
|
938
1400
|
for (; added_bits < max_added_bits; added_bits++) {
|
939
|
-
double partition_multiplier = RadixPartitioning::NumberOfPartitions(added_bits);
|
1401
|
+
double partition_multiplier = static_cast<double>(RadixPartitioning::NumberOfPartitions(added_bits));
|
940
1402
|
|
941
|
-
auto new_estimated_size = double(max_partition_size) / partition_multiplier;
|
942
|
-
auto new_estimated_count = double(max_partition_count) / partition_multiplier;
|
1403
|
+
auto new_estimated_size = static_cast<double>(max_partition_size) / partition_multiplier;
|
1404
|
+
auto new_estimated_count = static_cast<double>(max_partition_count) / partition_multiplier;
|
943
1405
|
auto new_estimated_ht_size =
|
944
|
-
new_estimated_size + static_cast<double>(PointerTableSize(
|
1406
|
+
new_estimated_size + static_cast<double>(PointerTableSize(LossyNumericCast<idx_t>(new_estimated_count)));
|
945
1407
|
|
946
|
-
if (new_estimated_ht_size <= double(max_ht_size) / 4) {
|
1408
|
+
if (new_estimated_ht_size <= static_cast<double>(max_ht_size) / 4) {
|
947
1409
|
// Aim for an estimated partition size of max_ht_size / 4
|
948
1410
|
break;
|
949
1411
|
}
|
@@ -963,6 +1425,7 @@ void JoinHashTable::Repartition(JoinHashTable &global_ht) {
|
|
963
1425
|
|
964
1426
|
void JoinHashTable::Reset() {
|
965
1427
|
data_collection->Reset();
|
1428
|
+
hash_map.Reset();
|
966
1429
|
finalized = false;
|
967
1430
|
}
|
968
1431
|
|
@@ -1019,10 +1482,9 @@ static void CreateSpillChunk(DataChunk &spill_chunk, DataChunk &keys, DataChunk
|
|
1019
1482
|
spill_chunk.data[spill_col_idx].Reference(hashes);
|
1020
1483
|
}
|
1021
1484
|
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
DataChunk &spill_chunk) {
|
1485
|
+
void JoinHashTable::ProbeAndSpill(ScanStructure &scan_structure, DataChunk &keys, TupleDataChunkState &key_state,
|
1486
|
+
ProbeState &probe_state, DataChunk &payload, ProbeSpill &probe_spill,
|
1487
|
+
ProbeSpillLocalAppendState &spill_state, DataChunk &spill_chunk) {
|
1026
1488
|
// hash all the keys
|
1027
1489
|
Vector hashes(LogicalType::HASH);
|
1028
1490
|
Hash(keys, *FlatVector::IncrementalSelectionVector(), keys.size(), hashes);
|
@@ -1049,18 +1511,14 @@ unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, TupleDat
|
|
1049
1511
|
payload.Slice(true_sel, true_count);
|
1050
1512
|
|
1051
1513
|
const SelectionVector *current_sel;
|
1052
|
-
|
1053
|
-
if (
|
1054
|
-
return
|
1514
|
+
InitializeScanStructure(scan_structure, keys, key_state, current_sel);
|
1515
|
+
if (scan_structure.count == 0) {
|
1516
|
+
return;
|
1055
1517
|
}
|
1056
1518
|
|
1057
1519
|
// now initialize the pointers of the scan structure based on the hashes
|
1058
|
-
|
1059
|
-
|
1060
|
-
// create the selection vector linking to only non-empty entries
|
1061
|
-
ss->InitializeSelectionVector(current_sel);
|
1062
|
-
|
1063
|
-
return ss;
|
1520
|
+
GetRowPointers(keys, key_state, probe_state, hashes, *current_sel, scan_structure.count, scan_structure.pointers,
|
1521
|
+
scan_structure.sel_vector);
|
1064
1522
|
}
|
1065
1523
|
|
1066
1524
|
ProbeSpill::ProbeSpill(JoinHashTable &ht, ClientContext &context, const vector<LogicalType> &probe_types)
|