duckdb 0.9.3-dev2.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- package/.github/workflows/NodeJS.yml +1 -1
- package/README.md +6 -6
- package/binding.gyp +32 -7
- package/package.json +1 -1
- package/src/connection.cpp +6 -6
- package/src/database.cpp +12 -10
- package/src/duckdb/extension/icu/icu-datefunc.cpp +22 -10
- package/src/duckdb/extension/icu/icu-datepart.cpp +42 -22
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +40 -7
- package/src/duckdb/extension/icu/icu-strptime.cpp +14 -8
- package/src/duckdb/extension/icu/icu-table-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +43 -16
- package/src/duckdb/extension/icu/icu_extension.cpp +1 -1
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -0
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +78 -62
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +11 -7
- package/src/duckdb/extension/json/include/json_common.hpp +0 -14
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
- package/src/duckdb/extension/json/include/json_functions.hpp +1 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +19 -5
- package/src/duckdb/extension/json/include/json_serializer.hpp +2 -1
- package/src/duckdb/extension/json/include/json_structure.hpp +12 -10
- package/src/duckdb/extension/json/json_common.cpp +1 -0
- package/src/duckdb/extension/json/json_deserializer.cpp +13 -0
- package/src/duckdb/extension/json/json_extension.cpp +3 -3
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +8 -4
- package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_contains.cpp +3 -3
- package/src/duckdb/extension/json/json_functions/json_create.cpp +53 -8
- package/src/duckdb/extension/json/json_functions/json_extract.cpp +10 -6
- package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -3
- package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +210 -0
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +22 -19
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +71 -43
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +105 -8
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/read_json.cpp +43 -18
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +9 -5
- package/src/duckdb/extension/json/json_scan.cpp +147 -125
- package/src/duckdb/extension/json/json_serializer.cpp +9 -0
- package/src/duckdb/extension/json/serialize_json.cpp +6 -0
- package/src/duckdb/extension/parquet/column_reader.cpp +53 -18
- package/src/duckdb/extension/parquet/column_writer.cpp +29 -6
- package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -1
- package/src/duckdb/extension/parquet/include/decode_utils.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +87 -0
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +4 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +16 -3
- package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +10 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +34 -6
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +3 -2
- package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +2 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -1
- package/src/duckdb/extension/parquet/parquet_crypto.cpp +370 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +254 -24
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +204 -16
- package/src/duckdb/extension/parquet/parquet_reader.cpp +108 -34
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +75 -30
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +15 -8
- package/src/duckdb/extension/parquet/parquet_writer.cpp +62 -10
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +60 -0
- package/src/duckdb/src/catalog/catalog.cpp +23 -25
- package/src/duckdb/src/catalog/catalog_entry/column_dependency_manager.cpp +1 -0
- package/src/duckdb/src/catalog/catalog_entry/dependency/dependency_dependent_entry.cpp +31 -0
- package/src/duckdb/src/catalog/catalog_entry/dependency/dependency_entry.cpp +44 -0
- package/src/duckdb/src/catalog/catalog_entry/dependency/dependency_subject_entry.cpp +31 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +35 -10
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +22 -6
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +110 -33
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +33 -17
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +16 -0
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +7 -6
- package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +88 -14
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +6 -15
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +20 -20
- package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +4 -0
- package/src/duckdb/src/catalog/catalog_entry.cpp +29 -0
- package/src/duckdb/src/catalog/catalog_set.cpp +358 -343
- package/src/duckdb/src/catalog/catalog_transaction.cpp +4 -0
- package/src/duckdb/src/catalog/default/default_functions.cpp +13 -4
- package/src/duckdb/src/catalog/default/default_schemas.cpp +5 -1
- package/src/duckdb/src/catalog/default/default_views.cpp +6 -2
- package/src/duckdb/src/catalog/dependency_catalog_set.cpp +51 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +510 -114
- package/src/duckdb/src/catalog/duck_catalog.cpp +4 -4
- package/src/duckdb/src/common/adbc/adbc.cpp +73 -53
- package/src/duckdb/src/common/adbc/driver_manager.cpp +1101 -268
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -9
- package/src/duckdb/src/common/bind_helpers.cpp +1 -0
- package/src/duckdb/src/common/box_renderer.cpp +52 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -0
- package/src/duckdb/src/common/constants.cpp +0 -1
- package/src/duckdb/src/common/enum_util.cpp +522 -107
- package/src/duckdb/src/common/enums/catalog_type.cpp +64 -1
- package/src/duckdb/src/common/enums/compression_type.cpp +14 -0
- package/src/duckdb/src/common/enums/date_part_specifier.cpp +1 -0
- package/src/duckdb/src/common/enums/expression_type.cpp +4 -0
- package/src/duckdb/src/common/enums/file_compression_type.cpp +1 -0
- package/src/duckdb/src/common/enums/join_type.cpp +33 -0
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +5 -3
- package/src/duckdb/src/common/enums/optimizer_type.cpp +9 -1
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +8 -4
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -2
- package/src/duckdb/src/common/error_data.cpp +113 -0
- package/src/duckdb/src/common/exception/binder_exception.cpp +47 -0
- package/src/duckdb/src/common/exception/catalog_exception.cpp +55 -0
- package/src/duckdb/src/common/exception/parser_exception.cpp +19 -0
- package/src/duckdb/src/common/exception.cpp +110 -121
- package/src/duckdb/src/common/exception_format_value.cpp +9 -1
- package/src/duckdb/src/common/extra_type_info.cpp +48 -0
- package/src/duckdb/src/common/file_system.cpp +12 -7
- package/src/duckdb/src/common/gzip_file_system.cpp +18 -18
- package/src/duckdb/src/common/hive_partitioning.cpp +5 -1
- package/src/duckdb/src/common/http_state.cpp +20 -3
- package/src/duckdb/src/common/local_file_system.cpp +214 -15
- package/src/duckdb/src/common/multi_file_reader.cpp +20 -7
- package/src/duckdb/src/common/operator/cast_operators.cpp +397 -414
- package/src/duckdb/src/common/operator/convert_to_string.cpp +4 -0
- package/src/duckdb/src/common/operator/string_cast.cpp +5 -0
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +61 -12
- package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +13 -4
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +2 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +7 -1
- package/src/duckdb/src/common/row_operations/row_heap_gather.cpp +78 -12
- package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +222 -61
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +6 -1
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +51 -0
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +8 -1
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +6 -0
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +5 -0
- package/src/duckdb/src/common/serializer/serializer.cpp +19 -0
- package/src/duckdb/src/common/sort/comparators.cpp +126 -0
- package/src/duckdb/src/common/sort/partition_state.cpp +17 -17
- package/src/duckdb/src/common/sort/radix_sort.cpp +2 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +10 -5
- package/src/duckdb/src/common/sort/sorted_block.cpp +7 -6
- package/src/duckdb/src/common/string_util.cpp +302 -24
- package/src/duckdb/src/common/tree_renderer.cpp +8 -6
- package/src/duckdb/src/common/types/cast_helpers.cpp +6 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +58 -0
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +8 -1
- package/src/duckdb/src/common/types/data_chunk.cpp +9 -0
- package/src/duckdb/src/common/types/date.cpp +2 -2
- package/src/duckdb/src/common/types/hash.cpp +9 -1
- package/src/duckdb/src/common/types/hugeint.cpp +229 -51
- package/src/duckdb/src/common/types/hyperloglog.cpp +10 -3
- package/src/duckdb/src/common/types/interval.cpp +67 -12
- package/src/duckdb/src/common/types/list_segment.cpp +98 -4
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +11 -1
- package/src/duckdb/src/common/types/row/row_data_collection.cpp +1 -1
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +2 -2
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +3 -2
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +63 -3
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +331 -127
- package/src/duckdb/src/common/types/time.cpp +47 -75
- package/src/duckdb/src/common/types/timestamp.cpp +16 -3
- package/src/duckdb/src/common/types/uhugeint.cpp +746 -0
- package/src/duckdb/src/common/types/validity_mask.cpp +6 -2
- package/src/duckdb/src/common/types/value.cpp +183 -27
- package/src/duckdb/src/common/types/vector.cpp +331 -30
- package/src/duckdb/src/common/types/vector_buffer.cpp +29 -1
- package/src/duckdb/src/common/types/vector_cache.cpp +22 -1
- package/src/duckdb/src/common/types.cpp +606 -90
- package/src/duckdb/src/common/value_operations/comparison_operations.cpp +21 -1
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +5 -0
- package/src/duckdb/src/common/vector_operations/generators.cpp +2 -2
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +131 -2
- package/src/duckdb/src/common/vector_operations/vector_copy.cpp +26 -4
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +41 -0
- package/src/duckdb/src/common/vector_operations/vector_storage.cpp +7 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +0 -1
- package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +2 -1
- package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +144 -56
- package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +27 -0
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +4 -3
- package/src/duckdb/src/core_functions/aggregate/distributive/kurtosis.cpp +25 -5
- package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +100 -3
- package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +2 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +9 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +83 -52
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +485 -289
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +3 -3
- package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +24 -26
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +34 -37
- package/src/duckdb/src/core_functions/function_list.cpp +30 -1
- package/src/duckdb/src/core_functions/lambda_functions.cpp +416 -0
- package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +294 -0
- package/src/duckdb/src/core_functions/scalar/array/array_value.cpp +87 -0
- package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +686 -0
- package/src/duckdb/src/core_functions/scalar/blob/encode.cpp +1 -0
- package/src/duckdb/src/core_functions/scalar/date/current.cpp +3 -3
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +295 -20
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +1 -0
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +8 -7
- package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +84 -23
- package/src/duckdb/src/core_functions/scalar/generic/error.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/generic/least.cpp +7 -8
- package/src/duckdb/src/core_functions/scalar/generic/stats.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/generic/system_functions.cpp +17 -6
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +8 -0
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +28 -14
- package/src/duckdb/src/core_functions/scalar/list/list_filter.cpp +49 -0
- package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +230 -0
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +85 -16
- package/src/duckdb/src/core_functions/scalar/list/list_transform.cpp +41 -0
- package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +21 -2
- package/src/duckdb/src/core_functions/scalar/map/map.cpp +6 -5
- package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/map/map_from_entries.cpp +1 -2
- package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +24 -4
- package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +6 -0
- package/src/duckdb/src/core_functions/scalar/random/random.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +28 -0
- package/src/duckdb/src/core_functions/scalar/string/bar.cpp +9 -4
- package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +7 -2
- package/src/duckdb/src/core_functions/scalar/string/hex.cpp +63 -4
- package/src/duckdb/src/core_functions/scalar/string/pad.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/string/parse_path.cpp +348 -0
- package/src/duckdb/src/core_functions/scalar/string/regexp_escape.cpp +22 -0
- package/src/duckdb/src/core_functions/scalar/string/string_split.cpp +6 -5
- package/src/duckdb/src/core_functions/scalar/struct/struct_insert.cpp +3 -3
- package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +9 -2
- package/src/duckdb/src/execution/column_binding_resolver.cpp +44 -10
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +4 -0
- package/src/duckdb/src/execution/expression_executor/execute_case.cpp +4 -0
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +4 -0
- package/src/duckdb/src/execution/expression_executor.cpp +2 -1
- package/src/duckdb/src/execution/index/art/art.cpp +202 -53
- package/src/duckdb/src/execution/index/art/art_key.cpp +20 -27
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +52 -17
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -8
- package/src/duckdb/src/execution/index/index_type_set.cpp +32 -0
- package/src/duckdb/src/execution/index/unknown_index.cpp +65 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +151 -174
- package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp +4 -0
- package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp +4 -0
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +2 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +82 -36
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +58 -32
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +35 -19
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +124 -0
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +97 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +71 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +98 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +105 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +63 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +1091 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +124 -26
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +117 -129
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +46 -22
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +83 -199
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +21 -122
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +18 -17
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +22 -0
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +201 -0
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +221 -0
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +204 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +186 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +532 -0
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +85 -0
- package/src/duckdb/src/execution/operator/helper/physical_create_secret.cpp +21 -0
- package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +2 -2
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +34 -9
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +10 -0
- package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +1 -0
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +25 -10
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +7 -8
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -2
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -127
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +221 -61
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +18 -21
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +10 -5
- package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +137 -0
- package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +11 -4
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +19 -4
- package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +121 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +59 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +132 -92
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +54 -54
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +4 -0
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +21 -3
- package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +1 -0
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +61 -43
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +15 -13
- package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +1 -0
- package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +10 -0
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +44 -90
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -0
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +8 -4
- package/src/duckdb/src/execution/physical_operator.cpp +3 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +30 -143
- package/src/duckdb/src/execution/physical_plan/plan_copy_database.cpp +12 -0
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +11 -4
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +8 -8
- package/src/duckdb/src/execution/physical_plan/plan_create_secret.cpp +11 -0
- package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +9 -8
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +5 -3
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +15 -6
- package/src/duckdb/src/execution/physical_plan/plan_positional_join.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan/plan_pragma.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -1
- package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +90 -12
- package/src/duckdb/src/execution/physical_plan/plan_window.cpp +67 -22
- package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +163 -32
- package/src/duckdb/src/execution/reservoir_sample.cpp +112 -32
- package/src/duckdb/src/execution/window_executor.cpp +291 -26
- package/src/duckdb/src/execution/window_segment_tree.cpp +958 -114
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +18 -16
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +11 -4
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +303 -131
- package/src/duckdb/src/function/cast/array_casts.cpp +226 -0
- package/src/duckdb/src/function/cast/bit_cast.cpp +2 -0
- package/src/duckdb/src/function/cast/cast_function_set.cpp +13 -2
- package/src/duckdb/src/function/cast/decimal_cast.cpp +2 -0
- package/src/duckdb/src/function/cast/default_casts.cpp +4 -1
- package/src/duckdb/src/function/cast/list_casts.cpp +151 -6
- package/src/duckdb/src/function/cast/numeric_casts.cpp +4 -0
- package/src/duckdb/src/function/cast/string_cast.cpp +95 -5
- package/src/duckdb/src/function/cast/struct_cast.cpp +53 -19
- package/src/duckdb/src/function/cast/time_casts.cpp +23 -1
- package/src/duckdb/src/function/cast/union/from_struct.cpp +1 -0
- package/src/duckdb/src/function/cast/union_casts.cpp +4 -3
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +8 -4
- package/src/duckdb/src/function/cast_rules.cpp +197 -31
- package/src/duckdb/src/function/compression_config.cpp +4 -0
- package/src/duckdb/src/function/function.cpp +15 -9
- package/src/duckdb/src/function/function_binder.cpp +80 -29
- package/src/duckdb/src/function/function_set.cpp +6 -6
- package/src/duckdb/src/function/pragma/pragma_functions.cpp +10 -8
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +34 -38
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +12 -0
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +10 -1
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +11 -1
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +14 -3
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +4 -0
- package/src/duckdb/src/function/scalar/list/list_select.cpp +176 -0
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +165 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +33 -0
- package/src/duckdb/src/function/scalar/operators/add.cpp +53 -6
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +48 -14
- package/src/duckdb/src/function/scalar/operators/multiply.cpp +9 -1
- package/src/duckdb/src/function/scalar/operators/subtract.cpp +19 -4
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +28 -55
- package/src/duckdb/src/function/scalar/strftime_format.cpp +242 -19
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/concat.cpp +16 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +124 -24
- package/src/duckdb/src/function/scalar/string/regexp.cpp +27 -27
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +64 -15
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +4 -3
- package/src/duckdb/src/function/scalar_function.cpp +8 -7
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +12 -0
- package/src/duckdb/src/function/table/arrow.cpp +20 -27
- package/src/duckdb/src/function/table/arrow_conversion.cpp +433 -150
- package/src/duckdb/src/function/table/copy_csv.cpp +62 -62
- package/src/duckdb/src/function/table/range.cpp +6 -3
- package/src/duckdb/src/function/table/read_csv.cpp +107 -759
- package/src/duckdb/src/function/table/read_file.cpp +242 -0
- package/src/duckdb/src/function/table/sniff_csv.cpp +275 -0
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +15 -3
- package/src/duckdb/src/function/table/system/duckdb_databases.cpp +5 -0
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +9 -13
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +67 -14
- package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +12 -15
- package/src/duckdb/src/function/table/system/duckdb_memory.cpp +63 -0
- package/src/duckdb/src/function/table/system/duckdb_optimizers.cpp +57 -0
- package/src/duckdb/src/function/table/system/duckdb_schemas.cpp +5 -0
- package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +128 -0
- package/src/duckdb/src/function/table/system/duckdb_sequences.cpp +12 -6
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +0 -1
- package/src/duckdb/src/function/table/system/duckdb_tables.cpp +5 -0
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +6 -0
- package/src/duckdb/src/function/table/system/duckdb_views.cpp +5 -0
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +166 -64
- package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -1
- package/src/duckdb/src/function/table/system_functions.cpp +3 -2
- package/src/duckdb/src/function/table/table_scan.cpp +50 -110
- package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -44
- package/src/duckdb/src/function/table_function.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +4 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/dependency/dependency_dependent_entry.hpp +27 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/dependency/dependency_entry.hpp +66 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/dependency/dependency_subject_entry.hpp +27 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +25 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +19 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +26 -11
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +15 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +41 -49
- package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +3 -1
- package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +120 -8
- package/src/duckdb/src/include/duckdb/catalog/dependency_catalog_set.hpp +32 -0
- package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +92 -12
- package/src/duckdb/src/include/duckdb/catalog/standard_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1153 -12
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/adbc/driver_manager.h +0 -2
- package/src/duckdb/src/include/duckdb/common/adbc/options.h +64 -0
- package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow.hpp +25 -6
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +30 -0
- package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/constants.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +75 -19
- package/src/duckdb/src/include/duckdb/common/enums/catalog_type.hpp +11 -1
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/index_constraint_type.hpp +35 -0
- package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +16 -3
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +6 -5
- package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +32 -0
- package/src/duckdb/src/include/duckdb/common/enums/on_create_conflict.hpp +26 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +6 -5
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +72 -0
- package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +47 -0
- package/src/duckdb/src/include/duckdb/common/exception/catalog_exception.hpp +39 -0
- package/src/duckdb/src/include/duckdb/common/exception/conversion_exception.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/exception/http_exception.hpp +62 -0
- package/src/duckdb/src/include/duckdb/common/exception/list.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +29 -0
- package/src/duckdb/src/include/duckdb/common/exception/transaction_exception.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +52 -166
- package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +7 -4
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +53 -1
- package/src/duckdb/src/include/duckdb/common/helper.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +18 -4
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/limits.hpp +19 -1
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +16 -3
- package/src/duckdb/src/include/duckdb/common/operator/add.hpp +13 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +114 -5
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +29 -0
- package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +52 -0
- package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +459 -0
- package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +136 -0
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/platform.h +53 -0
- package/src/duckdb/src/include/duckdb/common/progress_bar/display/terminal_progress_bar_display.hpp +5 -5
- package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +22 -6
- package/src/duckdb/src/include/duckdb/common/radix.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +20 -6
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +43 -4
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -2
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +0 -78
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +23 -1
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +26 -3
- package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -3
- package/src/duckdb/src/include/duckdb/common/types/hash.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +81 -15
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +57 -29
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +26 -22
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/time.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +216 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +18 -10
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +31 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +18 -2
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +22 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +151 -49
- package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +70 -6
- package/src/duckdb/src/include/duckdb/common/vector_size.hpp +6 -6
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +29 -2
- package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +131 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +69 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -4
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +42 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/secret_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +56 -2
- package/src/duckdb/src/include/duckdb/core_functions/to_interval.hpp +29 -0
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +11 -13
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +34 -19
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +18 -14
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +57 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_type_set.hpp +29 -0
- package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +65 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +35 -24
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +630 -0
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +74 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +60 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +155 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +163 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +35 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +228 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +70 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +93 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +60 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +197 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +189 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +30 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +99 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +87 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +70 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +80 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +137 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +104 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +0 -4
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +37 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_create_secret.hpp +38 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +4 -7
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +6 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -18
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +14 -5
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +37 -0
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +37 -0
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_database.hpp +35 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +14 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -4
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_expression_scan.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +17 -12
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_union.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +32 -18
- package/src/duckdb/src/include/duckdb/execution/task_error_manager.hpp +57 -0
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +101 -19
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +46 -14
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +6 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +26 -1
- package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +7 -1
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -0
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +16 -4
- package/src/duckdb/src/include/duckdb/function/function.hpp +12 -7
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +15 -12
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +4 -3
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -3
- package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +21 -1
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +22 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +16 -2
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +22 -8
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +6 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +35 -3
- package/src/duckdb/src/include/duckdb/function/table/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/table/range.hpp +12 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +14 -16
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +12 -8
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +0 -2
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -3
- package/src/duckdb/src/include/duckdb/main/appender.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +89 -0
- package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +53 -0
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +4 -4
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +8 -10
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +22 -23
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +37 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +1 -7
- package/src/duckdb/src/include/duckdb/main/config.hpp +24 -7
- package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -1
- package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +17 -26
- package/src/duckdb/src/include/duckdb/main/database.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +39 -7
- package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +5 -4
- package/src/duckdb/src/include/duckdb/main/error_manager.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +203 -197
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +42 -16
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +8 -1
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +4 -4
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +0 -32
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +13 -12
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +206 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +211 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +164 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +52 -13
- package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +9 -1
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +4 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +14 -22
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +20 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -5
- package/src/duckdb/src/include/duckdb/parallel/task.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -4
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +8 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +11 -4
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +16 -9
- package/src/duckdb/src/include/duckdb/parser/expression/lambdaref_expression.hpp +47 -0
- package/src/duckdb/src/include/duckdb/parser/expression/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +29 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +47 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/bound_pragma_info.hpp +29 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +45 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +21 -22
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +5 -11
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +23 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_secret_info.hpp +47 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +6 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp +66 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +8 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +1 -3
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +5 -22
- package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/statement/copy_database_statement.hpp +40 -0
- package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +4 -3
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/showref.hpp +47 -0
- package/src/duckdb/src/include/duckdb/parser/tableref.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +20 -3
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +5 -1
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -27
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +5 -6
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +11 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +3 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +65 -22
- package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +41 -0
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_database.hpp +45 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_secret.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +3 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_pragma.hpp +5 -10
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +11 -4
- package/src/duckdb/src/include/duckdb/planner/parsed_data/bound_create_table_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/pragma_handler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_set_operation_node.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +29 -0
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +9 -7
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_dummytableref.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +9 -5
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +23 -6
- package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +7 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +11 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -3
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +7 -7
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +408 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +173 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +283 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +134 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_fetch.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +244 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +103 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +235 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +134 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +301 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_constants.hpp +35 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_fetch.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +252 -0
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp +7 -103
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +5 -234
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_analyze.hpp +7 -107
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +5 -184
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +12 -17
- package/src/duckdb/src/include/duckdb/storage/database_size.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/index.hpp +40 -42
- package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +77 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -2
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +12 -6
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +8 -3
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +17 -17
- package/src/duckdb/src/include/duckdb/storage/statistics/array_stats.hpp +40 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +12 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +49 -24
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +8 -4
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +67 -0
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -2
- package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +8 -3
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -0
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +16 -12
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +6 -9
- package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +119 -0
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +13 -55
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +4 -5
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -2
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -3
- package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +4 -4
- package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +25 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +571 -143
- package/src/duckdb/src/main/appender.cpp +17 -2
- package/src/duckdb/src/main/attached_database.cpp +24 -12
- package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +96 -0
- package/src/duckdb/src/main/capi/appender-c.cpp +42 -3
- package/src/duckdb/src/main/capi/arrow-c.cpp +32 -9
- package/src/duckdb/src/main/capi/datetime-c.cpp +22 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +14 -4
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +66 -2
- package/src/duckdb/src/main/capi/helper-c.cpp +76 -2
- package/src/duckdb/src/main/capi/hugeint-c.cpp +23 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +27 -3
- package/src/duckdb/src/main/capi/pending-c.cpp +43 -9
- package/src/duckdb/src/main/capi/prepared-c.cpp +38 -2
- package/src/duckdb/src/main/capi/result-c.cpp +54 -3
- package/src/duckdb/src/main/capi/table_function-c.cpp +4 -4
- package/src/duckdb/src/main/capi/value-c.cpp +10 -0
- package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +3 -3
- package/src/duckdb/src/main/client_context.cpp +259 -250
- package/src/duckdb/src/main/client_data.cpp +0 -1
- package/src/duckdb/src/main/client_verify.cpp +26 -8
- package/src/duckdb/src/main/config.cpp +34 -13
- package/src/duckdb/src/main/connection.cpp +27 -6
- package/src/duckdb/src/main/connection_manager.cpp +54 -0
- package/src/duckdb/src/main/database.cpp +44 -39
- package/src/duckdb/src/main/database_manager.cpp +106 -8
- package/src/duckdb/src/main/database_path_and_type.cpp +27 -8
- package/src/duckdb/src/main/db_instance_cache.cpp +4 -4
- package/src/duckdb/src/main/error_manager.cpp +12 -3
- package/src/duckdb/src/main/extension/extension_alias.cpp +2 -2
- package/src/duckdb/src/main/extension/extension_helper.cpp +15 -16
- package/src/duckdb/src/main/extension/extension_install.cpp +33 -24
- package/src/duckdb/src/main/extension/extension_load.cpp +22 -21
- package/src/duckdb/src/main/extension/extension_util.cpp +12 -0
- package/src/duckdb/src/main/materialized_query_result.cpp +1 -1
- package/src/duckdb/src/main/pending_query_result.cpp +25 -8
- package/src/duckdb/src/main/prepared_statement.cpp +5 -5
- package/src/duckdb/src/main/prepared_statement_data.cpp +8 -1
- package/src/duckdb/src/main/query_profiler.cpp +11 -11
- package/src/duckdb/src/main/query_result.cpp +32 -6
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/join_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +38 -32
- package/src/duckdb/src/main/relation/setop_relation.cpp +5 -3
- package/src/duckdb/src/main/relation.cpp +5 -5
- package/src/duckdb/src/main/secret/secret.cpp +135 -0
- package/src/duckdb/src/main/secret/secret_manager.cpp +634 -0
- package/src/duckdb/src/main/secret/secret_storage.cpp +233 -0
- package/src/duckdb/src/main/settings/settings.cpp +133 -38
- package/src/duckdb/src/main/stream_query_result.cpp +53 -14
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +6 -0
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +1 -1
- package/src/duckdb/src/optimizer/deliminator.cpp +136 -14
- package/src/duckdb/src/optimizer/filter_combiner.cpp +72 -26
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -0
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -1
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +0 -1
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +4 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +3 -6
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +20 -0
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +71 -40
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +12 -3
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +77 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +7 -7
- package/src/duckdb/src/optimizer/pushdown/pushdown_semi_anti_join.cpp +56 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -0
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +15 -10
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +70 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +17 -5
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +1 -0
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +1 -2
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +22 -9
- package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +28 -4
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +12 -7
- package/src/duckdb/src/parallel/event.cpp +2 -2
- package/src/duckdb/src/parallel/executor.cpp +114 -81
- package/src/duckdb/src/parallel/executor_task.cpp +2 -4
- package/src/duckdb/src/parallel/meta_pipeline.cpp +28 -29
- package/src/duckdb/src/parallel/pipeline.cpp +41 -41
- package/src/duckdb/src/parallel/pipeline_event.cpp +2 -4
- package/src/duckdb/src/parallel/pipeline_executor.cpp +13 -75
- package/src/duckdb/src/parallel/task_scheduler.cpp +22 -13
- package/src/duckdb/src/parser/column_definition.cpp +22 -4
- package/src/duckdb/src/parser/column_list.cpp +2 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +51 -0
- package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +59 -0
- package/src/duckdb/src/parser/expression/window_expression.cpp +9 -1
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +40 -0
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +19 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +11 -9
- package/src/duckdb/src/parser/parsed_data/create_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/create_secret_info.cpp +22 -0
- package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +19 -0
- package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +13 -9
- package/src/duckdb/src/parser/parsed_data/drop_info.cpp +8 -9
- package/src/duckdb/src/parser/parsed_data/extra_drop_info.cpp +16 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +3 -1
- package/src/duckdb/src/parser/parser.cpp +14 -8
- package/src/duckdb/src/parser/query_error_context.cpp +12 -13
- package/src/duckdb/src/parser/query_node/select_node.cpp +5 -1
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +8 -13
- package/src/duckdb/src/parser/statement/copy_database_statement.cpp +41 -0
- package/src/duckdb/src/parser/statement/set_statement.cpp +5 -1
- package/src/duckdb/src/parser/tableref/basetableref.cpp +1 -0
- package/src/duckdb/src/parser/tableref/showref.cpp +47 -0
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +6 -2
- package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -0
- package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +30 -15
- package/src/duckdb/src/parser/transform/expression/transform_case.cpp +1 -0
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +3 -2
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +34 -4
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +26 -12
- package/src/duckdb/src/parser/transform/expression/transform_grouping_function.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +34 -5
- package/src/duckdb/src/parser/transform/expression/transform_is_null.cpp +3 -1
- package/src/duckdb/src/parser/transform/expression/transform_lambda.cpp +3 -1
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +3 -3
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +3 -3
- package/src/duckdb/src/parser/transform/expression/transform_positional_reference.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +13 -13
- package/src/duckdb/src/parser/transform/helpers/nodetype_to_string.cpp +2 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +32 -1
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_attach.cpp +1 -0
- package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +108 -0
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +45 -37
- package/src/duckdb/src/parser/transform/statement/transform_copy_database.cpp +29 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +5 -14
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_drop.cpp +25 -6
- package/src/duckdb/src/parser/transform/statement/transform_import.cpp +2 -1
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +6 -14
- package/src/duckdb/src/parser/transform/statement/transform_secret.cpp +103 -0
- package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -1
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +4 -8
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +18 -5
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +14 -41
- package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +13 -6
- package/src/duckdb/src/parser/transform/statement/transform_use.cpp +3 -1
- package/src/duckdb/src/parser/transform/tableref/transform_base_tableref.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +1 -2
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +6 -5
- package/src/duckdb/src/parser/transform/tableref/transform_table_function.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +22 -0
- package/src/duckdb/src/planner/bind_context.cpp +23 -14
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +15 -14
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +20 -9
- package/src/duckdb/src/planner/binder/expression/bind_case_expression.cpp +12 -7
- package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +4 -4
- package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +279 -195
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +103 -17
- package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +91 -68
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +116 -84
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +85 -15
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +68 -31
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +11 -7
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +42 -19
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +38 -16
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +16 -7
- package/src/duckdb/src/planner/binder/query_node/bind_recursive_cte_node.cpp +3 -0
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +83 -12
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +28 -37
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +3 -4
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +3 -3
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +7 -5
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +10 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +48 -50
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +187 -0
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +38 -22
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +8 -15
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +6 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +12 -10
- package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +37 -13
- package/src/duckdb/src/planner/binder/statement/bind_set.cpp +8 -2
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +29 -14
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +16 -6
- package/src/duckdb/src/planner/binder/tableref/bind_expressionlistref.cpp +11 -4
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/bind_named_parameters.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +18 -17
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +85 -0
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -17
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -9
- package/src/duckdb/src/planner/binder.cpp +31 -26
- package/src/duckdb/src/planner/bound_result_modifier.cpp +24 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +10 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +20 -4
- package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +9 -10
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +65 -3
- package/src/duckdb/src/planner/expression.cpp +15 -5
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +5 -6
- package/src/duckdb/src/planner/expression_binder/check_binder.cpp +9 -8
- package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +10 -7
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +9 -4
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +0 -25
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -11
- package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +15 -8
- package/src/duckdb/src/planner/expression_binder/where_binder.cpp +3 -4
- package/src/duckdb/src/planner/expression_binder.cpp +51 -25
- package/src/duckdb/src/planner/expression_iterator.cpp +2 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +1 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +33 -0
- package/src/duckdb/src/planner/joinside.cpp +1 -1
- package/src/duckdb/src/planner/logical_operator.cpp +2 -1
- package/src/duckdb/src/planner/operator/logical_copy_database.cpp +32 -0
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +13 -4
- package/src/duckdb/src/planner/operator/logical_create_table.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +4 -1
- package/src/duckdb/src/planner/operator/logical_join.cpp +8 -0
- package/src/duckdb/src/planner/planner.cpp +24 -23
- package/src/duckdb/src/planner/pragma_handler.cpp +10 -19
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +99 -6
- package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +36 -0
- package/src/duckdb/src/planner/table_binding.cpp +14 -12
- package/src/duckdb/src/storage/buffer/block_handle.cpp +12 -10
- package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -1
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +25 -9
- package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +4 -3
- package/src/duckdb/src/storage/buffer_manager.cpp +14 -3
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -8
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +15 -7
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +94 -41
- package/src/duckdb/src/storage/compression/alp/alp.cpp +57 -0
- package/src/duckdb/src/storage/compression/alp/alp_constants.cpp +13 -0
- package/src/duckdb/src/storage/compression/alprd.cpp +57 -0
- package/src/duckdb/src/storage/compression/bitpacking.cpp +86 -55
- package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +41 -41
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -3
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -0
- package/src/duckdb/src/storage/compression/fsst.cpp +4 -4
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +6 -4
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +7 -7
- package/src/duckdb/src/storage/compression/uncompressed.cpp +1 -0
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +6 -6
- package/src/duckdb/src/storage/data_table.cpp +32 -96
- package/src/duckdb/src/storage/index.cpp +23 -11
- package/src/duckdb/src/storage/local_storage.cpp +36 -19
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
- package/src/duckdb/src/storage/partial_block_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +9 -4
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_extra_drop_info.cpp +42 -0
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -17
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +84 -77
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +63 -4
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +23 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +35 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +19 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +22 -1
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +50 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +46 -7
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +57 -28
- package/src/duckdb/src/storage/statistics/array_stats.cpp +131 -0
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +62 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -0
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +3 -1
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +5 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +47 -22
- package/src/duckdb/src/storage/table/array_column_data.cpp +241 -0
- package/src/duckdb/src/storage/table/chunk_info.cpp +2 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -1
- package/src/duckdb/src/storage/table/column_data.cpp +41 -18
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +12 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +40 -6
- package/src/duckdb/src/storage/table/list_column_data.cpp +18 -15
- package/src/duckdb/src/storage/table/row_group.cpp +73 -21
- package/src/duckdb/src/storage/table/row_group_collection.cpp +395 -20
- package/src/duckdb/src/storage/table/row_version_manager.cpp +2 -1
- package/src/duckdb/src/storage/table/scan_state.cpp +4 -0
- package/src/duckdb/src/storage/table/standard_column_data.cpp +11 -5
- package/src/duckdb/src/storage/table/struct_column_data.cpp +30 -10
- package/src/duckdb/src/storage/table/table_statistics.cpp +7 -1
- package/src/duckdb/src/storage/table/update_segment.cpp +18 -2
- package/src/duckdb/src/storage/table_index_list.cpp +73 -7
- package/src/duckdb/src/storage/temporary_memory_manager.cpp +148 -0
- package/src/duckdb/src/storage/wal_replay.cpp +329 -152
- package/src/duckdb/src/storage/write_ahead_log.cpp +157 -137
- package/src/duckdb/src/transaction/cleanup_state.cpp +3 -2
- package/src/duckdb/src/transaction/commit_state.cpp +89 -63
- package/src/duckdb/src/transaction/duck_transaction.cpp +5 -3
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +26 -54
- package/src/duckdb/src/transaction/meta_transaction.cpp +37 -23
- package/src/duckdb/src/transaction/transaction_context.cpp +23 -4
- package/src/duckdb/src/transaction/undo_buffer.cpp +16 -2
- package/src/duckdb/src/verification/fetch_row_verifier.cpp +13 -0
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +5 -7
- package/src/duckdb/src/verification/statement_verifier.cpp +6 -5
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +100 -29
- package/src/duckdb/third_party/fmt/include/fmt/format-inl.h +1 -1
- package/src/duckdb/third_party/fmt/include/fmt/format.h +4 -2
- package/src/duckdb/third_party/fmt/include/fmt/printf.h +5 -5
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +4 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +82 -21
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +551 -1004
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +17 -3
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24861 -23465
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +420 -389
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aes.h +640 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aes_alt.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aria.h +358 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aria_alt.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia.h +316 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia_alt.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm_alt.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/chacha20.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/chachapoly.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/cipher.h +124 -124
- package/src/duckdb/third_party/mbedtls/include/mbedtls/cmac.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/entropy.h +293 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm.h +383 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm_alt.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +9 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/nist_kw.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls/timing.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +35 -6
- package/src/duckdb/third_party/mbedtls/library/aes.cpp +2171 -0
- package/src/duckdb/third_party/mbedtls/library/aesni.h +1 -0
- package/src/duckdb/third_party/mbedtls/library/aria.cpp +1058 -0
- package/src/duckdb/third_party/mbedtls/library/camellia.cpp +1087 -0
- package/src/duckdb/third_party/mbedtls/library/cipher.cpp +1633 -0
- package/src/duckdb/third_party/mbedtls/library/cipher_wrap.cpp +2270 -0
- package/src/duckdb/third_party/mbedtls/library/cipher_wrap.h +146 -0
- package/src/duckdb/third_party/mbedtls/library/entropy.cpp +701 -0
- package/src/duckdb/third_party/mbedtls/library/entropy_poll.cpp +237 -0
- package/src/duckdb/third_party/mbedtls/library/entropy_poll.h +76 -0
- package/src/duckdb/third_party/mbedtls/library/gcm.cpp +1161 -0
- package/src/duckdb/third_party/mbedtls/library/padlock.h +1 -0
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +132 -24
- package/src/duckdb/third_party/pcg/pcg_uint128.hpp +1 -1
- package/src/duckdb/third_party/skiplist/HeadNode.h +934 -0
- package/src/duckdb/third_party/skiplist/IntegrityEnums.h +62 -0
- package/src/duckdb/third_party/skiplist/Node.h +641 -0
- package/src/duckdb/third_party/skiplist/NodeRefs.h +251 -0
- package/src/duckdb/third_party/skiplist/RollingMedian.h +202 -0
- package/src/duckdb/third_party/skiplist/SkipList.cpp +40 -0
- package/src/duckdb/third_party/skiplist/SkipList.h +549 -0
- package/src/duckdb/third_party/thrift/thrift/thrift-config.h +1 -1
- package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
- package/src/duckdb/ub_src_catalog.cpp +3 -1
- package/src/duckdb/ub_src_catalog_catalog_entry_dependency.cpp +6 -0
- package/src/duckdb/ub_src_common.cpp +1 -1
- package/src/duckdb/ub_src_common_exception.cpp +6 -0
- package/src/duckdb/ub_src_common_types.cpp +2 -2
- package/src/duckdb/ub_src_core_functions.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_array.cpp +4 -0
- package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +7 -3
- package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_buffer_manager.cpp +6 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +10 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_state_machine.cpp +4 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_table_function.cpp +4 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +4 -0
- package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
- package/src/duckdb/ub_src_execution_operator_join.cpp +4 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +4 -2
- package/src/duckdb/ub_src_function_cast.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +4 -0
- package/src/duckdb/ub_src_function_table.cpp +4 -4
- package/src/duckdb/ub_src_function_table_system.cpp +6 -0
- package/src/duckdb/ub_src_main.cpp +2 -0
- package/src/duckdb/ub_src_main_buffered_data.cpp +2 -0
- package/src/duckdb/ub_src_main_secret.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_parser_expression.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +6 -0
- package/src/duckdb/ub_src_parser_statement.cpp +2 -2
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +6 -0
- package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -2
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner_filter.cpp +2 -0
- package/src/duckdb/ub_src_planner_operator.cpp +2 -0
- package/src/duckdb/ub_src_planner_subquery.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
- package/src/duckdb/ub_src_storage_compression.cpp +2 -0
- package/src/duckdb/ub_src_storage_compression_alp.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/ub_src_storage_statistics.cpp +2 -0
- package/src/duckdb/ub_src_storage_table.cpp +2 -0
- package/src/duckdb_node.hpp +1 -1
- package/src/statement.cpp +18 -8
- package/src/utils.cpp +1 -15
- package/test/affected.test.ts +2 -2
- package/test/columns.test.ts +4 -3
- package/test/config.test.ts +30 -0
- package/test/each.test.ts +2 -2
- package/test/exec.test.ts +2 -2
- package/test/named_columns.test.ts +2 -2
- package/test/null_error.test.ts +2 -2
- package/test/prepare.test.ts +28 -26
- package/test/serialization.test.ts +7 -6
- package/test/test_all_types.test.ts +9 -4
- package/test/typescript_decls.test.ts +6 -6
- package/test/udf.test.ts +23 -23
- package/test/unicode.test.ts +2 -2
@@ -1,23 +1,29 @@
|
|
1
1
|
#include "duckdb/function/table/read_csv.hpp"
|
2
|
+
|
2
3
|
#include "duckdb/common/enum_util.hpp"
|
3
4
|
#include "duckdb/common/multi_file_reader.hpp"
|
5
|
+
#include "duckdb/common/serializer/deserializer.hpp"
|
6
|
+
#include "duckdb/common/serializer/serializer.hpp"
|
4
7
|
#include "duckdb/common/string_util.hpp"
|
5
8
|
#include "duckdb/common/union_by_name.hpp"
|
9
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
10
|
+
#include "duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp"
|
11
|
+
#include "duckdb/execution/operator/csv_scanner/util/csv_error.hpp"
|
6
12
|
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|
7
|
-
#include "duckdb/execution/operator/scan/csv/csv_line_info.hpp"
|
8
|
-
#include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
|
9
13
|
#include "duckdb/function/function_set.hpp"
|
10
14
|
#include "duckdb/main/client_context.hpp"
|
11
15
|
#include "duckdb/main/client_data.hpp"
|
12
16
|
#include "duckdb/main/config.hpp"
|
13
17
|
#include "duckdb/main/database.hpp"
|
18
|
+
#include "duckdb/main/extension_helper.hpp"
|
14
19
|
#include "duckdb/parser/expression/constant_expression.hpp"
|
15
20
|
#include "duckdb/parser/expression/function_expression.hpp"
|
16
21
|
#include "duckdb/parser/tableref/table_function_ref.hpp"
|
17
22
|
#include "duckdb/planner/operator/logical_get.hpp"
|
18
|
-
#include "duckdb/
|
19
|
-
#include "duckdb/
|
20
|
-
|
23
|
+
#include "duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp"
|
24
|
+
#include "duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp"
|
25
|
+
|
26
|
+
#include "duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp"
|
21
27
|
|
22
28
|
#include <limits>
|
23
29
|
|
@@ -30,38 +36,12 @@ unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompress
|
|
30
36
|
return CSVFileHandle::OpenFile(fs, allocator, file_path, compression);
|
31
37
|
}
|
32
38
|
|
39
|
+
ReadCSVData::ReadCSVData() {
|
40
|
+
}
|
41
|
+
|
33
42
|
void ReadCSVData::FinalizeRead(ClientContext &context) {
|
34
43
|
BaseCSVData::Finalize();
|
35
|
-
// Here we identify if we can run this CSV file on parallel or not.
|
36
|
-
bool not_supported_options = options.null_padding;
|
37
|
-
|
38
|
-
auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
39
|
-
//! If we have many csv files, we run single-threaded on each file and parallelize on the number of files
|
40
|
-
bool many_csv_files = files.size() > 1 && int64_t(files.size() * 2) >= number_of_threads;
|
41
|
-
if (options.parallel_mode != ParallelMode::PARALLEL && (many_csv_files || number_of_threads == 1)) {
|
42
|
-
single_threaded = true;
|
43
|
-
}
|
44
|
-
if (options.parallel_mode == ParallelMode::SINGLE_THREADED || not_supported_options ||
|
45
|
-
options.dialect_options.new_line == NewLineIdentifier::MIX) {
|
46
|
-
// not supported for parallel CSV reading
|
47
|
-
single_threaded = true;
|
48
|
-
}
|
49
|
-
|
50
|
-
// Validate rejects_table options
|
51
|
-
if (!options.rejects_table_name.empty()) {
|
52
|
-
if (!options.ignore_errors) {
|
53
|
-
throw BinderException("REJECTS_TABLE option is only supported when IGNORE_ERRORS is set to true");
|
54
|
-
}
|
55
|
-
if (options.file_options.union_by_name) {
|
56
|
-
throw BinderException("REJECTS_TABLE option is not supported when UNION_BY_NAME is set to true");
|
57
|
-
}
|
58
|
-
}
|
59
|
-
|
60
44
|
if (!options.rejects_recovery_columns.empty()) {
|
61
|
-
if (options.rejects_table_name.empty()) {
|
62
|
-
throw BinderException(
|
63
|
-
"REJECTS_RECOVERY_COLUMNS option is only supported when REJECTS_TABLE is set to a table name");
|
64
|
-
}
|
65
45
|
for (auto &recovery_col : options.rejects_recovery_columns) {
|
66
46
|
bool found = false;
|
67
47
|
for (idx_t col_idx = 0; col_idx < return_names.size(); col_idx++) {
|
@@ -77,12 +57,6 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
|
|
77
57
|
}
|
78
58
|
}
|
79
59
|
}
|
80
|
-
|
81
|
-
if (options.rejects_limit != 0) {
|
82
|
-
if (options.rejects_table_name.empty()) {
|
83
|
-
throw BinderException("REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name");
|
84
|
-
}
|
85
|
-
}
|
86
60
|
}
|
87
61
|
|
88
62
|
static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
|
@@ -93,7 +67,27 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
93
67
|
result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
|
94
68
|
|
95
69
|
options.FromNamedParameters(input.named_parameters, context, return_types, names);
|
96
|
-
|
70
|
+
|
71
|
+
// Validate rejects_table options
|
72
|
+
if (!options.rejects_table_name.empty()) {
|
73
|
+
if (!options.ignore_errors) {
|
74
|
+
throw BinderException("REJECTS_TABLE option is only supported when IGNORE_ERRORS is set to true");
|
75
|
+
}
|
76
|
+
if (options.file_options.union_by_name) {
|
77
|
+
throw BinderException("REJECTS_TABLE option is not supported when UNION_BY_NAME is set to true");
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
if (options.rejects_limit != 0) {
|
82
|
+
if (options.rejects_table_name.empty()) {
|
83
|
+
throw BinderException("REJECTS_LIMIT option is only supported when REJECTS_TABLE is set to a table name");
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
if (!options.rejects_recovery_columns.empty() && options.rejects_table_name.empty()) {
|
88
|
+
throw BinderException(
|
89
|
+
"REJECTS_RECOVERY_COLUMNS option is only supported when REJECTS_TABLE is set to a table name");
|
90
|
+
}
|
97
91
|
|
98
92
|
options.file_options.AutoDetectHivePartitioning(result->files, context);
|
99
93
|
|
@@ -102,783 +96,141 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
102
96
|
"read_csv_auto or set read_csv(..., "
|
103
97
|
"AUTO_DETECT=TRUE) to automatically guess columns.");
|
104
98
|
}
|
105
|
-
if (options.auto_detect) {
|
99
|
+
if (options.auto_detect && !options.file_options.union_by_name) {
|
106
100
|
options.file_path = result->files[0];
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
CSVSniffer sniffer(options, result->buffer_manager, result->state_machine_cache, explicitly_set_columns);
|
101
|
+
result->buffer_manager = make_shared<CSVBufferManager>(context, options, result->files[0], 0);
|
102
|
+
CSVSniffer sniffer(options, result->buffer_manager, CSVStateMachineCache::Get(context),
|
103
|
+
{&return_types, &names});
|
111
104
|
auto sniffer_result = sniffer.SniffCSV();
|
112
105
|
if (names.empty()) {
|
113
106
|
names = sniffer_result.names;
|
114
107
|
return_types = sniffer_result.return_types;
|
115
|
-
} else {
|
116
|
-
if (explicitly_set_columns) {
|
117
|
-
// The user has influenced the names, can't assume they are valid anymore
|
118
|
-
if (return_types.size() != names.size()) {
|
119
|
-
throw BinderException("The amount of names specified (%d) and the observed amount of types (%d) in "
|
120
|
-
"the file don't match",
|
121
|
-
names.size(), return_types.size());
|
122
|
-
}
|
123
|
-
} else {
|
124
|
-
D_ASSERT(return_types.size() == names.size());
|
125
|
-
}
|
126
108
|
}
|
127
|
-
|
128
|
-
|
129
|
-
D_ASSERT(return_types.size() == names.size());
|
109
|
+
result->csv_types = return_types;
|
110
|
+
result->csv_names = names;
|
130
111
|
}
|
131
|
-
result->csv_types = return_types;
|
132
|
-
result->csv_names = names;
|
133
112
|
|
113
|
+
D_ASSERT(return_types.size() == names.size());
|
114
|
+
result->options.dialect_options.num_cols = names.size();
|
134
115
|
if (options.file_options.union_by_name) {
|
135
116
|
result->reader_bind =
|
136
|
-
MultiFileReader::BindUnionReader<
|
117
|
+
MultiFileReader::BindUnionReader<CSVFileScan>(context, return_types, names, *result, options);
|
137
118
|
if (result->union_readers.size() > 1) {
|
138
|
-
result->column_info.emplace_back(result->
|
119
|
+
result->column_info.emplace_back(result->initial_reader->names, result->initial_reader->types);
|
139
120
|
for (idx_t i = 1; i < result->union_readers.size(); i++) {
|
140
|
-
result->column_info.emplace_back(result->union_readers[i]->names,
|
141
|
-
result->union_readers[i]->return_types);
|
121
|
+
result->column_info.emplace_back(result->union_readers[i]->names, result->union_readers[i]->types);
|
142
122
|
}
|
143
123
|
}
|
144
124
|
if (!options.sql_types_per_column.empty()) {
|
145
|
-
auto exception =
|
146
|
-
if (!exception.empty()) {
|
147
|
-
throw BinderException(exception);
|
125
|
+
auto exception = CSVError::ColumnTypesError(options.sql_types_per_column, names);
|
126
|
+
if (!exception.error_message.empty()) {
|
127
|
+
throw BinderException(exception.error_message);
|
128
|
+
}
|
129
|
+
for (idx_t i = 0; i < names.size(); i++) {
|
130
|
+
auto it = options.sql_types_per_column.find(names[i]);
|
131
|
+
if (it != options.sql_types_per_column.end()) {
|
132
|
+
return_types[i] = options.sql_type_list[it->second];
|
133
|
+
}
|
148
134
|
}
|
149
135
|
}
|
136
|
+
result->csv_types = return_types;
|
137
|
+
result->csv_names = names;
|
150
138
|
} else {
|
139
|
+
result->csv_types = return_types;
|
140
|
+
result->csv_names = names;
|
151
141
|
result->reader_bind = MultiFileReader::BindOptions(options.file_options, result->files, return_types, names);
|
152
142
|
}
|
153
143
|
result->return_types = return_types;
|
154
144
|
result->return_names = names;
|
155
|
-
result->FinalizeRead(context);
|
156
145
|
|
146
|
+
result->FinalizeRead(context);
|
157
147
|
return std::move(result);
|
158
148
|
}
|
159
149
|
|
160
|
-
static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFunctionBindInput &input,
|
161
|
-
vector<LogicalType> &return_types, vector<string> &names) {
|
162
|
-
input.named_parameters["auto_detect"] = Value::BOOLEAN(true);
|
163
|
-
return ReadCSVBind(context, input, return_types, names);
|
164
|
-
}
|
165
|
-
|
166
150
|
//===--------------------------------------------------------------------===//
|
167
|
-
//
|
151
|
+
// Read CSV Local State
|
168
152
|
//===--------------------------------------------------------------------===//
|
169
|
-
|
170
|
-
struct ParallelCSVGlobalState : public GlobalTableFunctionState {
|
153
|
+
struct CSVLocalState : public LocalTableFunctionState {
|
171
154
|
public:
|
172
|
-
|
173
|
-
const CSVReaderOptions &options, idx_t system_threads_p, const vector<string> &files_path_p,
|
174
|
-
bool force_parallelism_p, vector<column_t> column_ids_p)
|
175
|
-
: buffer_manager(std::move(buffer_manager_p)), system_threads(system_threads_p),
|
176
|
-
force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
|
177
|
-
line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
|
178
|
-
current_file_path = files_path_p[0];
|
179
|
-
CSVFileHandle *file_handle_ptr;
|
180
|
-
|
181
|
-
if (!buffer_manager || (options.skip_rows_set && options.dialect_options.skip_rows > 0) ||
|
182
|
-
buffer_manager->file_handle->GetFilePath() != current_file_path) {
|
183
|
-
// If our buffers are too small, and we skip too many rows there is a chance things will go over-buffer
|
184
|
-
// for now don't reuse the buffer manager
|
185
|
-
buffer_manager.reset();
|
186
|
-
file_handle = ReadCSV::OpenCSV(current_file_path, options.compression, context);
|
187
|
-
file_handle_ptr = file_handle.get();
|
188
|
-
} else {
|
189
|
-
file_handle_ptr = buffer_manager->file_handle.get();
|
190
|
-
}
|
191
|
-
|
192
|
-
file_size = file_handle_ptr->FileSize();
|
193
|
-
first_file_size = file_size;
|
194
|
-
on_disk_file = file_handle_ptr->OnDiskFile();
|
195
|
-
bytes_read = 0;
|
196
|
-
running_threads = MaxThreads();
|
197
|
-
|
198
|
-
// Initialize all the book-keeping variables
|
199
|
-
auto file_count = files_path_p.size();
|
200
|
-
line_info.current_batches.resize(file_count);
|
201
|
-
line_info.lines_read.resize(file_count);
|
202
|
-
line_info.lines_errored.resize(file_count);
|
203
|
-
tuple_start.resize(file_count);
|
204
|
-
tuple_end.resize(file_count);
|
205
|
-
tuple_end_to_batch.resize(file_count);
|
206
|
-
batch_to_tuple_end.resize(file_count);
|
207
|
-
|
208
|
-
// Initialize the lines read
|
209
|
-
line_info.lines_read[0][0] = options.dialect_options.skip_rows;
|
210
|
-
if (options.has_header && options.dialect_options.header) {
|
211
|
-
line_info.lines_read[0][0]++;
|
212
|
-
}
|
213
|
-
first_position = options.dialect_options.true_start;
|
214
|
-
next_byte = options.dialect_options.true_start;
|
215
|
-
}
|
216
|
-
explicit ParallelCSVGlobalState(idx_t system_threads_p)
|
217
|
-
: system_threads(system_threads_p), line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
|
218
|
-
running_threads = MaxThreads();
|
155
|
+
explicit CSVLocalState(unique_ptr<StringValueScanner> csv_reader_p) : csv_reader(std::move(csv_reader_p)) {
|
219
156
|
}
|
220
157
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
//! How many bytes were read up to this point
|
225
|
-
atomic<idx_t> bytes_read;
|
226
|
-
//! Size of current file
|
227
|
-
idx_t file_size;
|
228
|
-
|
229
|
-
public:
|
230
|
-
idx_t MaxThreads() const override;
|
231
|
-
//! Updates the CSV reader with the next buffer to read. Returns false if no more buffers are available.
|
232
|
-
bool Next(ClientContext &context, const ReadCSVData &bind_data, unique_ptr<ParallelCSVReader> &reader);
|
233
|
-
//! Verify if the CSV File was read correctly
|
234
|
-
void Verify();
|
235
|
-
|
236
|
-
void UpdateVerification(VerificationPositions positions, idx_t file_number, idx_t batch_idx);
|
237
|
-
|
238
|
-
void UpdateLinesRead(CSVBufferRead &buffer_read, idx_t file_idx);
|
239
|
-
|
240
|
-
void DecrementThread();
|
241
|
-
|
242
|
-
bool Finished();
|
243
|
-
|
244
|
-
double GetProgress(const ReadCSVData &bind_data) const {
|
245
|
-
idx_t total_files = bind_data.files.size();
|
246
|
-
|
247
|
-
// get the progress WITHIN the current file
|
248
|
-
double progress;
|
249
|
-
if (file_size == 0) {
|
250
|
-
progress = 1.0;
|
251
|
-
} else {
|
252
|
-
progress = double(bytes_read) / double(file_size);
|
253
|
-
}
|
254
|
-
// now get the total percentage of files read
|
255
|
-
double percentage = double(file_index - 1) / total_files;
|
256
|
-
percentage += (double(1) / double(total_files)) * progress;
|
257
|
-
return percentage * 100;
|
258
|
-
}
|
259
|
-
|
260
|
-
private:
|
261
|
-
//! File Handle for current file
|
262
|
-
shared_ptr<CSVBufferManager> buffer_manager;
|
263
|
-
|
264
|
-
//! The index of the next file to read (i.e. current file + 1)
|
265
|
-
idx_t file_index = 1;
|
266
|
-
string current_file_path;
|
267
|
-
|
268
|
-
//! Mutex to lock when getting next batch of bytes (Parallel Only)
|
269
|
-
mutex main_mutex;
|
270
|
-
//! Byte set from for last thread
|
271
|
-
idx_t next_byte = 0;
|
272
|
-
//! Size of first file
|
273
|
-
idx_t first_file_size = 0;
|
274
|
-
//! Whether or not this is an on-disk file
|
275
|
-
bool on_disk_file = true;
|
276
|
-
//! Basically max number of threads in DuckDB
|
277
|
-
idx_t system_threads;
|
278
|
-
//! Current batch index
|
279
|
-
idx_t batch_index = 0;
|
280
|
-
idx_t local_batch_index = 0;
|
281
|
-
|
282
|
-
//! Forces parallelism for small CSV Files, should only be used for testing.
|
283
|
-
bool force_parallelism = false;
|
284
|
-
//! First Position of First Buffer
|
285
|
-
idx_t first_position = 0;
|
286
|
-
//! Current File Number
|
287
|
-
idx_t max_tuple_end = 0;
|
288
|
-
//! The vector stores positions where threads ended the last line they read in the CSV File, and the set stores
|
289
|
-
//! Positions where they started reading the first line.
|
290
|
-
vector<vector<idx_t>> tuple_end;
|
291
|
-
vector<set<idx_t>> tuple_start;
|
292
|
-
//! Tuple end to batch
|
293
|
-
vector<unordered_map<idx_t, idx_t>> tuple_end_to_batch;
|
294
|
-
//! Batch to Tuple End
|
295
|
-
vector<unordered_map<idx_t, idx_t>> batch_to_tuple_end;
|
296
|
-
idx_t running_threads = 0;
|
297
|
-
//! The column ids to read
|
298
|
-
vector<column_t> column_ids;
|
299
|
-
//! Line Info used in error messages
|
300
|
-
LineInfo line_info;
|
301
|
-
//! Current Buffer index
|
302
|
-
idx_t cur_buffer_idx = 0;
|
303
|
-
//! Only used if we don't run auto_detection first
|
304
|
-
unique_ptr<CSVFileHandle> file_handle;
|
158
|
+
//! The CSV reader
|
159
|
+
unique_ptr<StringValueScanner> csv_reader;
|
160
|
+
bool done = false;
|
305
161
|
};
|
306
162
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
idx_t threads_per_mb = first_file_size / one_mb + 1;
|
313
|
-
if (threads_per_mb < system_threads || threads_per_mb == 1) {
|
314
|
-
return threads_per_mb;
|
315
|
-
}
|
316
|
-
|
317
|
-
return system_threads;
|
318
|
-
}
|
319
|
-
|
320
|
-
void ParallelCSVGlobalState::DecrementThread() {
|
321
|
-
lock_guard<mutex> parallel_lock(main_mutex);
|
322
|
-
D_ASSERT(running_threads > 0);
|
323
|
-
running_threads--;
|
324
|
-
}
|
325
|
-
|
326
|
-
bool ParallelCSVGlobalState::Finished() {
|
327
|
-
lock_guard<mutex> parallel_lock(main_mutex);
|
328
|
-
return running_threads == 0;
|
329
|
-
}
|
330
|
-
|
331
|
-
void ParallelCSVGlobalState::Verify() {
|
332
|
-
// All threads are done, we run some magic sweet verification code
|
333
|
-
lock_guard<mutex> parallel_lock(main_mutex);
|
334
|
-
if (running_threads == 0) {
|
335
|
-
D_ASSERT(tuple_end.size() == tuple_start.size());
|
336
|
-
for (idx_t i = 0; i < tuple_start.size(); i++) {
|
337
|
-
auto ¤t_tuple_end = tuple_end[i];
|
338
|
-
auto ¤t_tuple_start = tuple_start[i];
|
339
|
-
// figure out max value of last_pos
|
340
|
-
if (current_tuple_end.empty()) {
|
341
|
-
return;
|
342
|
-
}
|
343
|
-
auto max_value = *max_element(std::begin(current_tuple_end), std::end(current_tuple_end));
|
344
|
-
for (idx_t tpl_idx = 0; tpl_idx < current_tuple_end.size(); tpl_idx++) {
|
345
|
-
auto last_pos = current_tuple_end[tpl_idx];
|
346
|
-
auto first_pos = current_tuple_start.find(last_pos);
|
347
|
-
if (first_pos == current_tuple_start.end()) {
|
348
|
-
// this might be necessary due to carriage returns outside buffer scopes.
|
349
|
-
first_pos = current_tuple_start.find(last_pos + 1);
|
350
|
-
}
|
351
|
-
if (first_pos == current_tuple_start.end() && last_pos != max_value) {
|
352
|
-
auto batch_idx = tuple_end_to_batch[i][last_pos];
|
353
|
-
auto problematic_line = line_info.GetLine(batch_idx);
|
354
|
-
throw InvalidInputException(
|
355
|
-
"CSV File not supported for multithreading. This can be a problematic line in your CSV File or "
|
356
|
-
"that this CSV can't be read in Parallel. Please, inspect if the line %llu is correct. If so, "
|
357
|
-
"please run single-threaded CSV Reading by setting parallel=false in the read_csv call.",
|
358
|
-
problematic_line);
|
359
|
-
}
|
360
|
-
}
|
361
|
-
}
|
362
|
-
}
|
363
|
-
}
|
364
|
-
|
365
|
-
void LineInfo::Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos) {
|
366
|
-
auto &tuple_start_set = tuple_start[file_idx];
|
367
|
-
auto &processed_batches = batch_to_tuple_end[file_idx];
|
368
|
-
auto &tuple_end_vec = tuple_end[file_idx];
|
369
|
-
bool has_error = false;
|
370
|
-
idx_t problematic_line;
|
371
|
-
if (batch_idx == 0 || tuple_start_set.empty()) {
|
372
|
-
return;
|
373
|
-
}
|
374
|
-
for (idx_t cur_batch = 0; cur_batch < batch_idx - 1; cur_batch++) {
|
375
|
-
auto cur_end = tuple_end_vec[processed_batches[cur_batch]];
|
376
|
-
auto first_pos = tuple_start_set.find(cur_end);
|
377
|
-
if (first_pos == tuple_start_set.end()) {
|
378
|
-
has_error = true;
|
379
|
-
problematic_line = GetLine(cur_batch);
|
380
|
-
break;
|
381
|
-
}
|
382
|
-
}
|
383
|
-
if (!has_error) {
|
384
|
-
auto cur_end = tuple_end_vec[processed_batches[batch_idx - 1]];
|
385
|
-
if (cur_end != cur_first_pos) {
|
386
|
-
has_error = true;
|
387
|
-
problematic_line = GetLine(batch_idx);
|
388
|
-
}
|
389
|
-
}
|
390
|
-
if (has_error) {
|
391
|
-
throw InvalidInputException(
|
392
|
-
"CSV File not supported for multithreading. This can be a problematic line in your CSV File or "
|
393
|
-
"that this CSV can't be read in Parallel. Please, inspect if the line %llu is correct. If so, "
|
394
|
-
"please run single-threaded CSV Reading by setting parallel=false in the read_csv call.",
|
395
|
-
problematic_line);
|
396
|
-
}
|
397
|
-
}
|
398
|
-
bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bind_data,
|
399
|
-
unique_ptr<ParallelCSVReader> &reader) {
|
400
|
-
lock_guard<mutex> parallel_lock(main_mutex);
|
401
|
-
if (!buffer_manager && file_handle) {
|
402
|
-
buffer_manager = make_shared<CSVBufferManager>(context, std::move(file_handle), bind_data.options);
|
403
|
-
}
|
404
|
-
if (!buffer_manager) {
|
405
|
-
return false;
|
406
|
-
}
|
407
|
-
auto current_buffer = buffer_manager->GetBuffer(cur_buffer_idx);
|
408
|
-
auto next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
|
409
|
-
|
410
|
-
if (!current_buffer) {
|
411
|
-
// This means we are done with the current file, we need to go to the next one (if exists).
|
412
|
-
if (file_index < bind_data.files.size()) {
|
413
|
-
current_file_path = bind_data.files[file_index];
|
414
|
-
file_handle = ReadCSV::OpenCSV(current_file_path, bind_data.options.compression, context);
|
415
|
-
buffer_manager =
|
416
|
-
make_shared<CSVBufferManager>(context, std::move(file_handle), bind_data.options, file_index);
|
417
|
-
cur_buffer_idx = 0;
|
418
|
-
first_position = 0;
|
419
|
-
local_batch_index = 0;
|
420
|
-
|
421
|
-
line_info.lines_read[file_index++][local_batch_index] = (bind_data.options.has_header ? 1 : 0);
|
422
|
-
|
423
|
-
current_buffer = buffer_manager->GetBuffer(cur_buffer_idx);
|
424
|
-
next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
|
425
|
-
} else {
|
426
|
-
// We are done scanning.
|
427
|
-
reader.reset();
|
428
|
-
return false;
|
429
|
-
}
|
430
|
-
}
|
431
|
-
// set up the current buffer
|
432
|
-
line_info.current_batches[file_index - 1].insert(local_batch_index);
|
433
|
-
idx_t bytes_per_local_state = current_buffer->actual_size / MaxThreads() + 1;
|
434
|
-
auto result = make_uniq<CSVBufferRead>(
|
435
|
-
buffer_manager->GetBuffer(cur_buffer_idx), buffer_manager->GetBuffer(cur_buffer_idx + 1), next_byte,
|
436
|
-
next_byte + bytes_per_local_state, batch_index++, local_batch_index++, &line_info);
|
437
|
-
// move the byte index of the CSV reader to the next buffer
|
438
|
-
next_byte += bytes_per_local_state;
|
439
|
-
if (next_byte >= current_buffer->actual_size) {
|
440
|
-
// We replace the current buffer with the next buffer
|
441
|
-
next_byte = 0;
|
442
|
-
bytes_read += current_buffer->actual_size;
|
443
|
-
current_buffer = std::move(next_buffer);
|
444
|
-
cur_buffer_idx++;
|
445
|
-
if (current_buffer) {
|
446
|
-
// Next buffer gets the next-next buffer
|
447
|
-
next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
|
448
|
-
}
|
449
|
-
}
|
450
|
-
if (!reader || reader->options.file_path != current_file_path) {
|
451
|
-
// we either don't have a reader, or the reader was created for a different file
|
452
|
-
// we need to create a new reader and instantiate it
|
453
|
-
if (file_index > 0 && file_index <= bind_data.union_readers.size() && bind_data.union_readers[file_index - 1]) {
|
454
|
-
// we are doing UNION BY NAME - fetch the options from the union reader for this file
|
455
|
-
auto &union_reader = *bind_data.union_readers[file_index - 1];
|
456
|
-
reader = make_uniq<ParallelCSVReader>(context, union_reader.options, std::move(result), first_position,
|
457
|
-
union_reader.GetTypes(), file_index - 1);
|
458
|
-
reader->names = union_reader.GetNames();
|
459
|
-
} else if (file_index <= bind_data.column_info.size()) {
|
460
|
-
// Serialized Union By name
|
461
|
-
reader = make_uniq<ParallelCSVReader>(context, bind_data.options, std::move(result), first_position,
|
462
|
-
bind_data.column_info[file_index - 1].types, file_index - 1);
|
463
|
-
reader->names = bind_data.column_info[file_index - 1].names;
|
464
|
-
} else {
|
465
|
-
// regular file - use the standard options
|
466
|
-
if (!result) {
|
467
|
-
return false;
|
468
|
-
}
|
469
|
-
reader = make_uniq<ParallelCSVReader>(context, bind_data.options, std::move(result), first_position,
|
470
|
-
bind_data.csv_types, file_index - 1);
|
471
|
-
reader->names = bind_data.csv_names;
|
472
|
-
}
|
473
|
-
reader->options.file_path = current_file_path;
|
474
|
-
MultiFileReader::InitializeReader(*reader, bind_data.options.file_options, bind_data.reader_bind,
|
475
|
-
bind_data.return_types, bind_data.return_names, column_ids, nullptr,
|
476
|
-
bind_data.files.front(), context);
|
477
|
-
} else {
|
478
|
-
// update the current reader
|
479
|
-
reader->SetBufferRead(std::move(result));
|
480
|
-
}
|
481
|
-
|
482
|
-
return true;
|
483
|
-
}
|
484
|
-
void ParallelCSVGlobalState::UpdateVerification(VerificationPositions positions, idx_t file_number_p, idx_t batch_idx) {
|
485
|
-
lock_guard<mutex> parallel_lock(main_mutex);
|
486
|
-
if (positions.end_of_last_line > max_tuple_end) {
|
487
|
-
max_tuple_end = positions.end_of_last_line;
|
488
|
-
}
|
489
|
-
tuple_end_to_batch[file_number_p][positions.end_of_last_line] = batch_idx;
|
490
|
-
batch_to_tuple_end[file_number_p][batch_idx] = tuple_end[file_number_p].size();
|
491
|
-
tuple_start[file_number_p].insert(positions.beginning_of_first_line);
|
492
|
-
tuple_end[file_number_p].push_back(positions.end_of_last_line);
|
493
|
-
}
|
494
|
-
|
495
|
-
void ParallelCSVGlobalState::UpdateLinesRead(CSVBufferRead &buffer_read, idx_t file_idx) {
|
496
|
-
auto batch_idx = buffer_read.local_batch_index;
|
497
|
-
auto lines_read = buffer_read.lines_read;
|
498
|
-
lock_guard<mutex> parallel_lock(main_mutex);
|
499
|
-
line_info.current_batches[file_idx].erase(batch_idx);
|
500
|
-
line_info.lines_read[file_idx][batch_idx] += lines_read;
|
501
|
-
}
|
502
|
-
|
503
|
-
bool LineInfo::CanItGetLine(idx_t file_idx, idx_t batch_idx) {
|
504
|
-
lock_guard<mutex> parallel_lock(main_mutex);
|
505
|
-
if (current_batches.empty() || done) {
|
506
|
-
return true;
|
507
|
-
}
|
508
|
-
if (file_idx >= current_batches.size() || current_batches[file_idx].empty()) {
|
509
|
-
return true;
|
510
|
-
}
|
511
|
-
auto min_value = *current_batches[file_idx].begin();
|
512
|
-
if (min_value >= batch_idx) {
|
513
|
-
return true;
|
514
|
-
}
|
515
|
-
return false;
|
516
|
-
}
|
517
|
-
|
518
|
-
void LineInfo::Increment(idx_t file_idx, idx_t batch_idx) {
|
519
|
-
auto parallel_lock = duckdb::make_uniq<lock_guard<mutex>>(main_mutex);
|
520
|
-
lines_errored[file_idx][batch_idx]++;
|
521
|
-
}
|
522
|
-
|
523
|
-
// Returns the 1-indexed line number
|
524
|
-
idx_t LineInfo::GetLine(idx_t batch_idx, idx_t line_error, idx_t file_idx, idx_t cur_start, bool verify,
|
525
|
-
bool stop_at_first) {
|
526
|
-
unique_ptr<lock_guard<mutex>> parallel_lock;
|
527
|
-
if (!verify) {
|
528
|
-
parallel_lock = duckdb::make_uniq<lock_guard<mutex>>(main_mutex);
|
529
|
-
}
|
530
|
-
idx_t line_count = 0;
|
531
|
-
|
532
|
-
if (!stop_at_first) {
|
533
|
-
// Figure out the amount of lines read in the current file
|
534
|
-
for (idx_t cur_batch_idx = 0; cur_batch_idx <= batch_idx; cur_batch_idx++) {
|
535
|
-
if (cur_batch_idx < batch_idx) {
|
536
|
-
line_count += lines_errored[file_idx][cur_batch_idx];
|
537
|
-
}
|
538
|
-
line_count += lines_read[file_idx][cur_batch_idx];
|
539
|
-
}
|
540
|
-
return line_count + line_error + 1;
|
541
|
-
}
|
542
|
-
|
543
|
-
// Otherwise, check if we already have an error on another thread
|
544
|
-
if (done) {
|
545
|
-
// line count is 0-indexed, but we want to return 1-indexed
|
546
|
-
return first_line + 1;
|
547
|
-
}
|
548
|
-
for (idx_t i = 0; i <= batch_idx; i++) {
|
549
|
-
if (lines_read[file_idx].find(i) == lines_read[file_idx].end() && i != batch_idx) {
|
550
|
-
throw InternalException("Missing batch index on Parallel CSV Reader GetLine");
|
551
|
-
}
|
552
|
-
line_count += lines_read[file_idx][i];
|
553
|
-
}
|
163
|
+
//===--------------------------------------------------------------------===//
|
164
|
+
// Read CSV Functions
|
165
|
+
//===--------------------------------------------------------------------===//
|
166
|
+
static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
|
167
|
+
auto &bind_data = input.bind_data->Cast<ReadCSVData>();
|
554
168
|
|
555
|
-
//
|
556
|
-
|
557
|
-
|
169
|
+
// Create the temporary rejects table
|
170
|
+
auto rejects_table = bind_data.options.rejects_table_name;
|
171
|
+
if (!rejects_table.empty()) {
|
172
|
+
CSVRejectsTable::GetOrCreate(context, rejects_table)->InitializeTable(context, bind_data);
|
558
173
|
}
|
559
|
-
done = true;
|
560
|
-
first_line = line_count + line_error;
|
561
|
-
// line count is 0-indexed, but we want to return 1-indexed
|
562
|
-
return first_line + 1;
|
563
|
-
}
|
564
|
-
|
565
|
-
static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext &context,
|
566
|
-
TableFunctionInitInput &input) {
|
567
|
-
auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
|
568
174
|
if (bind_data.files.empty()) {
|
569
175
|
// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
|
570
|
-
return
|
176
|
+
return nullptr;
|
571
177
|
}
|
572
|
-
bind_data.
|
573
|
-
|
574
|
-
return make_uniq<ParallelCSVGlobalState>(context, buffer_manager, bind_data.options, context.db->NumberOfThreads(),
|
575
|
-
bind_data.files, ClientConfig::GetConfig(context).verify_parallelism,
|
576
|
-
input.column_ids);
|
178
|
+
return make_uniq<CSVGlobalState>(context, bind_data.buffer_manager, bind_data.options,
|
179
|
+
context.db->NumberOfThreads(), bind_data.files, input.column_ids, bind_data);
|
577
180
|
}
|
578
181
|
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
public:
|
584
|
-
explicit ParallelCSVLocalState(unique_ptr<ParallelCSVReader> csv_reader_p) : csv_reader(std::move(csv_reader_p)) {
|
182
|
+
unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
|
183
|
+
GlobalTableFunctionState *global_state_p) {
|
184
|
+
if (!global_state_p) {
|
185
|
+
return nullptr;
|
585
186
|
}
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
CSVBufferRead previous_buffer;
|
590
|
-
bool done = false;
|
591
|
-
};
|
592
|
-
|
593
|
-
unique_ptr<LocalTableFunctionState> ParallelReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
|
594
|
-
GlobalTableFunctionState *global_state_p) {
|
595
|
-
auto &csv_data = input.bind_data->Cast<ReadCSVData>();
|
596
|
-
auto &global_state = global_state_p->Cast<ParallelCSVGlobalState>();
|
597
|
-
unique_ptr<ParallelCSVReader> csv_reader;
|
598
|
-
auto has_next = global_state.Next(context.client, csv_data, csv_reader);
|
599
|
-
if (!has_next) {
|
187
|
+
auto &global_state = global_state_p->Cast<CSVGlobalState>();
|
188
|
+
auto csv_scanner = global_state.Next();
|
189
|
+
if (!csv_scanner) {
|
600
190
|
global_state.DecrementThread();
|
601
|
-
csv_reader.reset();
|
602
191
|
}
|
603
|
-
return make_uniq<
|
192
|
+
return make_uniq<CSVLocalState>(std::move(csv_scanner));
|
604
193
|
}
|
605
194
|
|
606
|
-
static void
|
195
|
+
static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
607
196
|
auto &bind_data = data_p.bind_data->Cast<ReadCSVData>();
|
608
|
-
|
609
|
-
|
197
|
+
if (!data_p.global_state) {
|
198
|
+
return;
|
199
|
+
}
|
200
|
+
auto &csv_global_state = data_p.global_state->Cast<CSVGlobalState>();
|
201
|
+
auto &csv_local_state = data_p.local_state->Cast<CSVLocalState>();
|
610
202
|
|
611
203
|
if (!csv_local_state.csv_reader) {
|
612
204
|
// no csv_reader was set, this can happen when a filename-based filter has filtered out all possible files
|
613
205
|
return;
|
614
206
|
}
|
615
|
-
|
616
207
|
do {
|
617
208
|
if (output.size() != 0) {
|
618
|
-
MultiFileReader::FinalizeChunk(bind_data.reader_bind,
|
209
|
+
MultiFileReader::FinalizeChunk(bind_data.reader_bind,
|
210
|
+
csv_local_state.csv_reader->csv_file_scan->reader_data, output);
|
619
211
|
break;
|
620
212
|
}
|
621
|
-
if (csv_local_state.csv_reader->
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
csv_global_state.UpdateLinesRead(*csv_local_state.csv_reader->buffer, csv_local_state.csv_reader->file_idx);
|
627
|
-
auto has_next = csv_global_state.Next(context, bind_data, csv_local_state.csv_reader);
|
628
|
-
if (csv_local_state.csv_reader) {
|
629
|
-
csv_local_state.csv_reader->linenr = 0;
|
630
|
-
}
|
631
|
-
if (!has_next) {
|
213
|
+
if (csv_local_state.csv_reader->FinishedIterator()) {
|
214
|
+
csv_local_state.csv_reader->csv_file_scan->error_handler->Insert(
|
215
|
+
csv_local_state.csv_reader->GetBoundaryIndex(), csv_local_state.csv_reader->GetLinesRead());
|
216
|
+
csv_local_state.csv_reader = csv_global_state.Next();
|
217
|
+
if (!csv_local_state.csv_reader) {
|
632
218
|
csv_global_state.DecrementThread();
|
633
219
|
break;
|
634
220
|
}
|
635
221
|
}
|
636
|
-
csv_local_state.csv_reader->
|
222
|
+
csv_local_state.csv_reader->Flush(output);
|
637
223
|
|
638
224
|
} while (true);
|
639
|
-
if (csv_global_state.Finished()) {
|
640
|
-
csv_global_state.Verify();
|
641
|
-
}
|
642
|
-
}
|
643
|
-
|
644
|
-
//===--------------------------------------------------------------------===//
|
645
|
-
// Single-Threaded CSV Reader
|
646
|
-
//===--------------------------------------------------------------------===//
|
647
|
-
struct SingleThreadedCSVState : public GlobalTableFunctionState {
|
648
|
-
explicit SingleThreadedCSVState(idx_t total_files) : total_files(total_files), next_file(0), progress_in_files(0) {
|
649
|
-
}
|
650
|
-
|
651
|
-
mutex csv_lock;
|
652
|
-
unique_ptr<BufferedCSVReader> initial_reader;
|
653
|
-
//! The total number of files to read from
|
654
|
-
idx_t total_files;
|
655
|
-
//! The index of the next file to read (i.e. current file + 1)
|
656
|
-
atomic<idx_t> next_file;
|
657
|
-
//! How far along we are in reading the current set of open files
|
658
|
-
//! This goes from [0...next_file] * 100
|
659
|
-
atomic<idx_t> progress_in_files;
|
660
|
-
//! The set of SQL types
|
661
|
-
vector<LogicalType> csv_types;
|
662
|
-
//! The set of SQL names to be read from the file
|
663
|
-
vector<string> csv_names;
|
664
|
-
//! The column ids to read
|
665
|
-
vector<column_t> column_ids;
|
666
|
-
|
667
|
-
idx_t MaxThreads() const override {
|
668
|
-
return total_files;
|
669
|
-
}
|
670
|
-
|
671
|
-
double GetProgress(const ReadCSVData &bind_data) const {
|
672
|
-
D_ASSERT(total_files == bind_data.files.size());
|
673
|
-
D_ASSERT(progress_in_files <= total_files * 100);
|
674
|
-
return (double(progress_in_files) / double(total_files));
|
675
|
-
}
|
676
|
-
|
677
|
-
unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
|
678
|
-
idx_t &total_size) {
|
679
|
-
return GetCSVReaderInternal(context, bind_data, file_index, total_size);
|
680
|
-
}
|
681
|
-
|
682
|
-
private:
|
683
|
-
unique_ptr<BufferedCSVReader> GetCSVReaderInternal(ClientContext &context, ReadCSVData &bind_data,
|
684
|
-
idx_t &file_index, idx_t &total_size) {
|
685
|
-
CSVReaderOptions options;
|
686
|
-
{
|
687
|
-
lock_guard<mutex> l(csv_lock);
|
688
|
-
if (initial_reader) {
|
689
|
-
total_size = initial_reader->file_handle ? initial_reader->file_handle->FileSize() : 0;
|
690
|
-
return std::move(initial_reader);
|
691
|
-
}
|
692
|
-
if (next_file >= total_files) {
|
693
|
-
return nullptr;
|
694
|
-
}
|
695
|
-
options = bind_data.options;
|
696
|
-
file_index = next_file;
|
697
|
-
next_file++;
|
698
|
-
}
|
699
|
-
// reuse csv_readers was created during binding
|
700
|
-
unique_ptr<BufferedCSVReader> result;
|
701
|
-
if (file_index < bind_data.union_readers.size() && bind_data.union_readers[file_index]) {
|
702
|
-
result = std::move(bind_data.union_readers[file_index]);
|
703
|
-
} else {
|
704
|
-
auto union_by_name = options.file_options.union_by_name;
|
705
|
-
options.file_path = bind_data.files[file_index];
|
706
|
-
result = make_uniq<BufferedCSVReader>(context, std::move(options), csv_types);
|
707
|
-
if (!union_by_name) {
|
708
|
-
result->names = csv_names;
|
709
|
-
}
|
710
|
-
MultiFileReader::InitializeReader(*result, bind_data.options.file_options, bind_data.reader_bind,
|
711
|
-
bind_data.return_types, bind_data.return_names, column_ids, nullptr,
|
712
|
-
bind_data.files.front(), context);
|
713
|
-
}
|
714
|
-
total_size = result->file_handle->FileSize();
|
715
|
-
return result;
|
716
|
-
}
|
717
|
-
};
|
718
|
-
|
719
|
-
struct SingleThreadedCSVLocalState : public LocalTableFunctionState {
|
720
|
-
public:
|
721
|
-
explicit SingleThreadedCSVLocalState() : bytes_read(0), total_size(0), current_progress(0), file_index(0) {
|
722
|
-
}
|
723
|
-
|
724
|
-
//! The CSV reader
|
725
|
-
unique_ptr<BufferedCSVReader> csv_reader;
|
726
|
-
//! The current amount of bytes read by this reader
|
727
|
-
idx_t bytes_read;
|
728
|
-
//! The total amount of bytes in the file
|
729
|
-
idx_t total_size;
|
730
|
-
//! The current progress from 0..100
|
731
|
-
idx_t current_progress;
|
732
|
-
//! The file index of this reader
|
733
|
-
idx_t file_index;
|
734
|
-
};
|
735
|
-
|
736
|
-
static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext &context,
|
737
|
-
TableFunctionInitInput &input) {
|
738
|
-
auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
|
739
|
-
auto result = make_uniq<SingleThreadedCSVState>(bind_data.files.size());
|
740
|
-
if (bind_data.files.empty()) {
|
741
|
-
// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
|
742
|
-
return std::move(result);
|
743
|
-
} else {
|
744
|
-
bind_data.options.file_path = bind_data.files[0];
|
745
|
-
result->initial_reader = make_uniq<BufferedCSVReader>(context, bind_data.options, bind_data.csv_types);
|
746
|
-
if (!bind_data.options.file_options.union_by_name) {
|
747
|
-
result->initial_reader->names = bind_data.csv_names;
|
748
|
-
}
|
749
|
-
if (bind_data.options.auto_detect) {
|
750
|
-
bind_data.options = result->initial_reader->options;
|
751
|
-
}
|
752
|
-
}
|
753
|
-
MultiFileReader::InitializeReader(*result->initial_reader, bind_data.options.file_options, bind_data.reader_bind,
|
754
|
-
bind_data.return_types, bind_data.return_names, input.column_ids, input.filters,
|
755
|
-
bind_data.files.front(), context);
|
756
|
-
for (auto &reader : bind_data.union_readers) {
|
757
|
-
if (!reader) {
|
758
|
-
continue;
|
759
|
-
}
|
760
|
-
MultiFileReader::InitializeReader(*reader, bind_data.options.file_options, bind_data.reader_bind,
|
761
|
-
bind_data.return_types, bind_data.return_names, input.column_ids,
|
762
|
-
input.filters, bind_data.files.front(), context);
|
763
|
-
}
|
764
|
-
result->column_ids = input.column_ids;
|
765
|
-
|
766
|
-
if (!bind_data.options.file_options.union_by_name) {
|
767
|
-
// if we are reading multiple files - run auto-detect only on the first file
|
768
|
-
// UNLESS union by name is turned on - in that case we assume that different files have different schemas
|
769
|
-
// as such, we need to re-run the auto detection on each file
|
770
|
-
bind_data.options.auto_detect = false;
|
771
|
-
}
|
772
|
-
result->csv_types = bind_data.csv_types;
|
773
|
-
result->csv_names = bind_data.csv_names;
|
774
|
-
result->next_file = 1;
|
775
|
-
return std::move(result);
|
776
|
-
}
|
777
|
-
|
778
|
-
unique_ptr<LocalTableFunctionState> SingleThreadedReadCSVInitLocal(ExecutionContext &context,
|
779
|
-
TableFunctionInitInput &input,
|
780
|
-
GlobalTableFunctionState *global_state_p) {
|
781
|
-
auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
|
782
|
-
auto &data = global_state_p->Cast<SingleThreadedCSVState>();
|
783
|
-
auto result = make_uniq<SingleThreadedCSVLocalState>();
|
784
|
-
result->csv_reader = data.GetCSVReader(context.client, bind_data, result->file_index, result->total_size);
|
785
|
-
return std::move(result);
|
786
|
-
}
|
787
|
-
|
788
|
-
static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
789
|
-
auto &bind_data = data_p.bind_data->CastNoConst<ReadCSVData>();
|
790
|
-
auto &data = data_p.global_state->Cast<SingleThreadedCSVState>();
|
791
|
-
auto &lstate = data_p.local_state->Cast<SingleThreadedCSVLocalState>();
|
792
|
-
if (!lstate.csv_reader) {
|
793
|
-
// no csv_reader was set, this can happen when a filename-based filter has filtered out all possible files
|
794
|
-
return;
|
795
|
-
}
|
796
|
-
|
797
|
-
do {
|
798
|
-
lstate.csv_reader->ParseCSV(output);
|
799
|
-
// update the number of bytes read
|
800
|
-
D_ASSERT(lstate.bytes_read <= lstate.csv_reader->bytes_in_chunk);
|
801
|
-
auto bytes_read = MinValue<idx_t>(lstate.total_size, lstate.csv_reader->bytes_in_chunk);
|
802
|
-
auto current_progress = lstate.total_size == 0 ? 100 : 100 * bytes_read / lstate.total_size;
|
803
|
-
if (current_progress > lstate.current_progress) {
|
804
|
-
if (current_progress > 100) {
|
805
|
-
throw InternalException("Progress should never exceed 100");
|
806
|
-
}
|
807
|
-
data.progress_in_files += current_progress - lstate.current_progress;
|
808
|
-
lstate.current_progress = current_progress;
|
809
|
-
}
|
810
|
-
if (output.size() == 0) {
|
811
|
-
// exhausted this file, but we might have more files we can read
|
812
|
-
auto csv_reader = data.GetCSVReader(context, bind_data, lstate.file_index, lstate.total_size);
|
813
|
-
// add any left-over progress for this file to the progress bar
|
814
|
-
if (lstate.current_progress < 100) {
|
815
|
-
data.progress_in_files += 100 - lstate.current_progress;
|
816
|
-
}
|
817
|
-
// reset the current progress
|
818
|
-
lstate.current_progress = 0;
|
819
|
-
lstate.bytes_read = 0;
|
820
|
-
lstate.csv_reader = std::move(csv_reader);
|
821
|
-
if (!lstate.csv_reader) {
|
822
|
-
// no more files - we are done
|
823
|
-
return;
|
824
|
-
}
|
825
|
-
lstate.bytes_read = 0;
|
826
|
-
} else {
|
827
|
-
MultiFileReader::FinalizeChunk(bind_data.reader_bind, lstate.csv_reader->reader_data, output);
|
828
|
-
break;
|
829
|
-
}
|
830
|
-
} while (true);
|
831
|
-
}
|
832
|
-
|
833
|
-
//===--------------------------------------------------------------------===//
|
834
|
-
// Read CSV Functions
|
835
|
-
//===--------------------------------------------------------------------===//
|
836
|
-
static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
|
837
|
-
auto &bind_data = input.bind_data->Cast<ReadCSVData>();
|
838
|
-
|
839
|
-
// Create the temporary rejects table
|
840
|
-
auto rejects_table = bind_data.options.rejects_table_name;
|
841
|
-
if (!rejects_table.empty()) {
|
842
|
-
CSVRejectsTable::GetOrCreate(context, rejects_table)->InitializeTable(context, bind_data);
|
843
|
-
}
|
844
|
-
if (bind_data.single_threaded) {
|
845
|
-
return SingleThreadedCSVInit(context, input);
|
846
|
-
} else {
|
847
|
-
return ParallelCSVInitGlobal(context, input);
|
848
|
-
}
|
849
|
-
}
|
850
|
-
|
851
|
-
unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
|
852
|
-
GlobalTableFunctionState *global_state_p) {
|
853
|
-
auto &csv_data = input.bind_data->Cast<ReadCSVData>();
|
854
|
-
if (csv_data.single_threaded) {
|
855
|
-
return SingleThreadedReadCSVInitLocal(context, input, global_state_p);
|
856
|
-
} else {
|
857
|
-
return ParallelReadCSVInitLocal(context, input, global_state_p);
|
858
|
-
}
|
859
|
-
}
|
860
|
-
|
861
|
-
static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
862
|
-
auto &bind_data = data_p.bind_data->Cast<ReadCSVData>();
|
863
|
-
if (bind_data.single_threaded) {
|
864
|
-
SingleThreadedCSVFunction(context, data_p, output);
|
865
|
-
} else {
|
866
|
-
ParallelReadCSVFunction(context, data_p, output);
|
867
|
-
}
|
868
225
|
}
|
869
226
|
|
870
227
|
static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
|
871
228
|
LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
|
872
|
-
auto &
|
873
|
-
|
874
|
-
auto &data = local_state->Cast<SingleThreadedCSVLocalState>();
|
875
|
-
return data.file_index;
|
876
|
-
}
|
877
|
-
auto &data = local_state->Cast<ParallelCSVLocalState>();
|
878
|
-
return data.csv_reader->buffer->batch_index;
|
229
|
+
auto &data = local_state->Cast<CSVLocalState>();
|
230
|
+
return data.csv_reader->scanner_idx;
|
879
231
|
}
|
880
232
|
|
881
|
-
|
233
|
+
void ReadCSVTableFunction::ReadCSVAddNamedParameters(TableFunction &table_function) {
|
882
234
|
table_function.named_parameters["sep"] = LogicalType::VARCHAR;
|
883
235
|
table_function.named_parameters["delim"] = LogicalType::VARCHAR;
|
884
236
|
table_function.named_parameters["quote"] = LogicalType::VARCHAR;
|
@@ -912,19 +264,15 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
|
|
912
264
|
table_function.named_parameters["types"] = LogicalType::ANY;
|
913
265
|
table_function.named_parameters["names"] = LogicalType::LIST(LogicalType::VARCHAR);
|
914
266
|
table_function.named_parameters["column_names"] = LogicalType::LIST(LogicalType::VARCHAR);
|
267
|
+
table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;
|
915
268
|
MultiFileReader::AddParameters(table_function);
|
916
269
|
}
|
917
270
|
|
918
271
|
double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
|
919
272
|
const GlobalTableFunctionState *global_state) {
|
920
273
|
auto &bind_data = bind_data_p->Cast<ReadCSVData>();
|
921
|
-
|
922
|
-
|
923
|
-
return data.GetProgress(bind_data);
|
924
|
-
} else {
|
925
|
-
auto &data = global_state->Cast<ParallelCSVGlobalState>();
|
926
|
-
return data.GetProgress(bind_data);
|
927
|
-
}
|
274
|
+
auto &data = global_state->Cast<CSVGlobalState>();
|
275
|
+
return data.GetProgress(bind_data);
|
928
276
|
}
|
929
277
|
|
930
278
|
void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
|
@@ -981,7 +329,7 @@ TableFunction ReadCSVTableFunction::GetFunction() {
|
|
981
329
|
TableFunction ReadCSVTableFunction::GetAutoFunction() {
|
982
330
|
auto read_csv_auto = ReadCSVTableFunction::GetFunction();
|
983
331
|
read_csv_auto.name = "read_csv_auto";
|
984
|
-
read_csv_auto.bind =
|
332
|
+
read_csv_auto.bind = ReadCSVBind;
|
985
333
|
return read_csv_auto;
|
986
334
|
}
|
987
335
|
|