duckdb 0.10.3-dev0.0 → 0.10.3-dev6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +95 -4
- package/binding.gyp +4 -5
- package/examples/example.js +10 -0
- package/package.json +1 -1
- package/src/database.cpp +3 -3
- package/src/duckdb/extension/icu/icu_extension.cpp +44 -15
- package/src/duckdb/extension/icu/include/icu_extension.hpp +1 -0
- package/src/duckdb/extension/icu/third_party/icu/i18n/basictz.cpp +5 -5
- package/src/duckdb/extension/json/include/json_common.hpp +6 -1
- package/src/duckdb/extension/json/include/json_executors.hpp +5 -5
- package/src/duckdb/extension/json/include/json_extension.hpp +1 -0
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -2
- package/src/duckdb/extension/json/include/json_serializer.hpp +2 -2
- package/src/duckdb/extension/json/json_common.cpp +69 -43
- package/src/duckdb/extension/json/json_extension.cpp +8 -0
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +17 -16
- package/src/duckdb/extension/json/json_functions/json_create.cpp +3 -1
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +18 -13
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +4 -0
- package/src/duckdb/extension/json/json_functions/json_type.cpp +2 -2
- package/src/duckdb/extension/json/json_functions/read_json.cpp +14 -11
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +11 -8
- package/src/duckdb/extension/json/json_functions.cpp +4 -3
- package/src/duckdb/extension/json/json_scan.cpp +21 -11
- package/src/duckdb/extension/parquet/column_reader.cpp +9 -5
- package/src/duckdb/extension/parquet/column_writer.cpp +31 -18
- package/src/duckdb/extension/parquet/include/column_writer.hpp +1 -0
- package/src/duckdb/extension/parquet/include/null_column_reader.hpp +54 -0
- package/src/duckdb/extension/parquet/include/parquet_extension.hpp +1 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +7 -2
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +6 -1
- package/src/duckdb/extension/parquet/parquet_crypto.cpp +8 -6
- package/src/duckdb/extension/parquet/parquet_extension.cpp +271 -126
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +39 -37
- package/src/duckdb/extension/parquet/parquet_reader.cpp +7 -4
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +5 -4
- package/src/duckdb/extension/parquet/parquet_writer.cpp +55 -2
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +2 -2
- package/src/duckdb/src/catalog/catalog.cpp +19 -39
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +6 -6
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +47 -31
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +84 -52
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +4 -0
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +7 -0
- package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +4 -1
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +36 -15
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -1
- package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +5 -1
- package/src/duckdb/src/catalog/catalog_entry.cpp +7 -0
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +64 -0
- package/src/duckdb/src/catalog/catalog_set.cpp +32 -17
- package/src/duckdb/src/catalog/default/default_functions.cpp +2 -1
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
- package/src/duckdb/src/catalog/dependency_manager.cpp +129 -9
- package/src/duckdb/src/catalog/duck_catalog.cpp +5 -0
- package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +2 -2
- package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +3 -3
- package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +7 -6
- package/src/duckdb/src/common/allocator.cpp +6 -2
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +1 -0
- package/src/duckdb/src/common/arrow/appender/struct_data.cpp +1 -1
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +2 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +7 -5
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -5
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +6 -3
- package/src/duckdb/src/common/compressed_file_system.cpp +11 -7
- package/src/duckdb/src/common/enum_util.cpp +230 -17
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/relation_type.cpp +2 -0
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
- package/src/duckdb/src/common/error_data.cpp +12 -0
- package/src/duckdb/src/common/exception_format_value.cpp +1 -0
- package/src/duckdb/src/common/extra_type_info.cpp +86 -8
- package/src/duckdb/src/common/file_system.cpp +39 -7
- package/src/duckdb/src/common/gzip_file_system.cpp +38 -14
- package/src/duckdb/src/common/hive_partitioning.cpp +28 -76
- package/src/duckdb/src/common/http_state.cpp +4 -4
- package/src/duckdb/src/common/local_file_system.cpp +29 -12
- package/src/duckdb/src/common/multi_file_list.cpp +285 -0
- package/src/duckdb/src/common/multi_file_reader.cpp +112 -80
- package/src/duckdb/src/common/operator/cast_operators.cpp +27 -225
- package/src/duckdb/src/common/operator/string_cast.cpp +13 -14
- package/src/duckdb/src/common/pipe_file_system.cpp +3 -2
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +1 -1
- package/src/duckdb/src/common/random_engine.cpp +2 -1
- package/src/duckdb/src/common/re2_regex.cpp +6 -4
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +10 -10
- package/src/duckdb/src/common/row_operations/row_external.cpp +4 -3
- package/src/duckdb/src/common/row_operations/row_heap_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +17 -4
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +4 -4
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +9 -8
- package/src/duckdb/src/common/serializer/memory_stream.cpp +6 -3
- package/src/duckdb/src/common/serializer/serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/comparators.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +6 -6
- package/src/duckdb/src/common/sort/radix_sort.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +3 -3
- package/src/duckdb/src/common/sort/sorted_block.cpp +5 -5
- package/src/duckdb/src/common/string_util.cpp +69 -162
- package/src/duckdb/src/common/types/bit.cpp +1 -1
- package/src/duckdb/src/common/types/blob.cpp +3 -3
- package/src/duckdb/src/common/types/cast_helpers.cpp +197 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -9
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +1 -1
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +13 -5
- package/src/duckdb/src/common/types/conflict_info.cpp +1 -1
- package/src/duckdb/src/common/types/conflict_manager.cpp +1 -1
- package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
- package/src/duckdb/src/common/types/date.cpp +2 -2
- package/src/duckdb/src/common/types/decimal.cpp +12 -12
- package/src/duckdb/src/common/types/hash.cpp +1 -1
- package/src/duckdb/src/common/types/hugeint.cpp +10 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -4
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +6 -5
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -18
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -2
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +7 -0
- package/src/duckdb/src/common/types/string_heap.cpp +4 -0
- package/src/duckdb/src/common/types/timestamp.cpp +23 -1
- package/src/duckdb/src/common/types/uhugeint.cpp +1 -1
- package/src/duckdb/src/common/types/uuid.cpp +7 -6
- package/src/duckdb/src/common/types/value.cpp +54 -30
- package/src/duckdb/src/common/types/vector.cpp +71 -96
- package/src/duckdb/src/common/types/vector_buffer.cpp +4 -0
- package/src/duckdb/src/common/types/vector_cache.cpp +3 -3
- package/src/duckdb/src/common/types.cpp +124 -18
- package/src/duckdb/src/common/vector_operations/generators.cpp +4 -16
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +20 -0
- package/src/duckdb/src/common/vector_operations/null_operations.cpp +1 -1
- package/src/duckdb/src/common/vector_operations/numeric_inplace_operators.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +1 -1
- package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +13 -6
- package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +8 -5
- package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +8 -5
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +14 -8
- package/src/duckdb/src/core_functions/function_list.cpp +2 -1
- package/src/duckdb/src/core_functions/lambda_functions.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +5 -0
- package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +3 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +17 -0
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +19 -0
- package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +6 -5
- package/src/duckdb/src/core_functions/scalar/generic/current_setting.cpp +2 -3
- package/src/duckdb/src/core_functions/scalar/generic/system_functions.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +30 -21
- package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +3 -3
- package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/list/range.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/map/map.cpp +44 -14
- package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +17 -4
- package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +30 -13
- package/src/duckdb/src/core_functions/scalar/map/map_extract.cpp +25 -12
- package/src/duckdb/src/core_functions/scalar/map/map_keys_values.cpp +16 -4
- package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/bar.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/chr.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/string/hex.cpp +13 -13
- package/src/duckdb/src/core_functions/scalar/string/instr.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/pad.cpp +8 -8
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +15 -7
- package/src/duckdb/src/core_functions/scalar/string/string_split.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/translate.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/string/trim.cpp +13 -9
- package/src/duckdb/src/core_functions/scalar/string/unicode.cpp +1 -1
- package/src/duckdb/src/execution/adaptive_filter.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +17 -8
- package/src/duckdb/src/execution/index/art/art.cpp +6 -6
- package/src/duckdb/src/execution/index/bound_index.cpp +115 -0
- package/src/duckdb/src/execution/index/unbound_index.cpp +30 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +2 -1
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +40 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +16 -3
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +2 -12
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +2 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +21 -5
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +312 -260
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +45 -16
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +19 -18
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +200 -55
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +26 -23
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +12 -12
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +7 -7
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +31 -22
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_load.cpp +24 -2
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_update_extensions.cpp +57 -0
- package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +13 -8
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +9 -9
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +4 -4
- package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +4 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +20 -13
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +70 -60
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +18 -7
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +88 -12
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +47 -27
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +34 -9
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +3 -0
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +2 -3
- package/src/duckdb/src/execution/operator/scan/physical_expression_scan.cpp +22 -7
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +9 -9
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -1
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +5 -4
- package/src/duckdb/src/execution/physical_operator.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_column_data_get.cpp +2 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_delim_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +4 -5
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +6 -5
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +4 -0
- package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_update.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan/plan_window.cpp +1 -24
- package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +38 -33
- package/src/duckdb/src/execution/reservoir_sample.cpp +42 -31
- package/src/duckdb/src/execution/window_executor.cpp +39 -39
- package/src/duckdb/src/execution/window_segment_tree.cpp +5 -2
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +1 -1
- package/src/duckdb/src/function/cast/string_cast.cpp +3 -3
- package/src/duckdb/src/function/cast_rules.cpp +1 -0
- package/src/duckdb/src/function/function.cpp +2 -2
- package/src/duckdb/src/function/function_binder.cpp +9 -4
- package/src/duckdb/src/function/pragma/pragma_functions.cpp +2 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +4 -3
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -1
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +3 -2
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +1 -1
- package/src/duckdb/src/function/scalar/list/list_select.cpp +11 -4
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -1
- package/src/duckdb/src/function/scalar/operators/add.cpp +19 -9
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +77 -48
- package/src/duckdb/src/function/scalar/strftime_format.cpp +61 -39
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +12 -12
- package/src/duckdb/src/function/scalar/string/contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/length.cpp +9 -9
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +2 -1
- package/src/duckdb/src/function/scalar/string/substring.cpp +11 -9
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +2 -2
- package/src/duckdb/src/function/scalar_function.cpp +2 -1
- package/src/duckdb/src/function/table/arrow.cpp +18 -4
- package/src/duckdb/src/function/table/arrow_conversion.cpp +88 -66
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -28
- package/src/duckdb/src/function/table/glob.cpp +17 -9
- package/src/duckdb/src/function/table/read_csv.cpp +37 -14
- package/src/duckdb/src/function/table/read_file.cpp +6 -2
- package/src/duckdb/src/function/table/repeat.cpp +5 -1
- package/src/duckdb/src/function/table/repeat_row.cpp +1 -1
- package/src/duckdb/src/function/table/sniff_csv.cpp +9 -3
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -3
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +31 -16
- package/src/duckdb/src/function/table/system/duckdb_databases.cpp +6 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +2 -2
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +48 -6
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +8 -2
- package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +9 -4
- package/src/duckdb/src/function/table/system/duckdb_memory.cpp +2 -2
- package/src/duckdb/src/function/table/system/duckdb_schemas.cpp +7 -2
- package/src/duckdb/src/function/table/system/duckdb_sequences.cpp +8 -3
- package/src/duckdb/src/function/table/system/duckdb_tables.cpp +18 -10
- package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +12 -5
- package/src/duckdb/src/function/table/system/duckdb_views.cpp +9 -4
- package/src/duckdb/src/function/table/system/duckdb_which_secret.cpp +75 -0
- package/src/duckdb/src/function/table/system/pragma_database_size.cpp +4 -4
- package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +3 -3
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +6 -6
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +11 -20
- package/src/duckdb/src/function/table/unnest.cpp +1 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +5 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -10
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +6 -8
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +3 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +2 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +3 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +4 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +72 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +3 -0
- package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +4 -0
- package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +7 -1
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/standard_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +63 -98
- package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enable_shared_from_this.ipp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/enums/checkpoint_type.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/scan_options.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/undo_flags.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/exception.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +15 -1
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/gzip_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -1
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +3 -17
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +129 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +151 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +132 -56
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/add.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +233 -0
- package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +5 -4
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/optionally_owned_ptr.hpp +91 -0
- package/src/duckdb/src/include/duckdb/common/platform.h +6 -1
- package/src/duckdb/src/include/duckdb/common/radix.hpp +12 -4
- package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +8 -6
- package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +36 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +65 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +52 -6
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +36 -3
- package/src/duckdb/src/include/duckdb/common/shared_ptr.ipp +268 -0
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/string.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -7
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +43 -215
- package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +36 -11
- package/src/duckdb/src/include/duckdb/common/types/hash.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +20 -15
- package/src/duckdb/src/include/duckdb/common/types.hpp +14 -2
- package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/vector.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/vector_size.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/weak_ptr.ipp +117 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +18 -0
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +145 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +63 -0
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +24 -18
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +7 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +24 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +90 -20
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_update_extensions.hpp +52 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_delete.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +8 -5
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_expression_scan.hpp +6 -2
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +63 -8
- package/src/duckdb/src/include/duckdb/function/function.hpp +0 -5
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +6 -3
- package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +25 -2
- package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +3 -4
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +13 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +7 -3
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +15 -2
- package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +81 -0
- package/src/duckdb/src/include/duckdb/main/appender.hpp +1 -3
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +8 -0
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +1 -19
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +11 -1
- package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +27 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +36 -3
- package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +7 -17
- package/src/duckdb/src/include/duckdb/main/database.hpp +25 -20
- package/src/duckdb/src/include/duckdb/main/extension.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +155 -10
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +79 -12
- package/src/duckdb/src/include/duckdb/main/extension_install_info.hpp +89 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/external_dependencies.hpp +46 -4
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/materialized_relation.hpp +35 -0
- package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation.hpp +5 -6
- package/src/duckdb/src/include/duckdb/main/settings.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +4 -4
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +0 -51
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +10 -17
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +5 -7
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +10 -2
- package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/rule/timestamp_comparison.hpp +30 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/task.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +12 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +10 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +6 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +3 -30
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +12 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/load_info.hpp +4 -7
- package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +7 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +2 -11
- package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/update_extensions_info.hpp +36 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -4
- package/src/duckdb/src/include/duckdb/parser/statement/alter_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/attach_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/call_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/copy_database_statement.hpp +1 -2
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -3
- package/src/duckdb/src/include/duckdb/parser/statement/detach_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/drop_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/explain_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/export_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/extension_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/load_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/logical_plan_statement.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/pragma_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/prepare_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/relation_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +6 -4
- package/src/duckdb/src/include/duckdb/parser/statement/transaction_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/update_extensions_statement.hpp +36 -0
- package/src/duckdb/src/include/duckdb/parser/statement/vacuum_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/column_data_ref.hpp +46 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +0 -4
- package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -0
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +42 -16
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/alter_binder.hpp +7 -6
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +5 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +5 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +6 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +7 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/parsed_data/bound_create_table_info.hpp +0 -4
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_column_data_ref.hpp +30 -0
- package/src/duckdb/src/include/duckdb/planner/tableref/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +16 -7
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +10 -7
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +24 -15
- package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +8 -7
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +5 -3
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/bit_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +55 -16
- package/src/duckdb/src/include/duckdb/storage/index.hpp +33 -97
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +22 -9
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/storage_lock.hpp +17 -13
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +27 -8
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +12 -0
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +29 -10
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +32 -6
- package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +23 -0
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +14 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +5 -5
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +25 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/table/update_state.hpp +20 -0
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +20 -4
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +10 -5
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +19 -4
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +30 -7
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +8 -5
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +8 -0
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +9 -1
- package/src/duckdb/src/include/duckdb.h +15 -11
- package/src/duckdb/src/main/appender.cpp +3 -1
- package/src/duckdb/src/main/attached_database.cpp +5 -3
- package/src/duckdb/src/main/capi/appender-c.cpp +4 -3
- package/src/duckdb/src/main/capi/arrow-c.cpp +4 -4
- package/src/duckdb/src/main/capi/helper-c.cpp +3 -3
- package/src/duckdb/src/main/capi/replacement_scan-c.cpp +6 -5
- package/src/duckdb/src/main/capi/result-c.cpp +19 -5
- package/src/duckdb/src/main/capi/table_function-c.cpp +1 -1
- package/src/duckdb/src/main/client_context.cpp +32 -23
- package/src/duckdb/src/main/client_context_file_opener.cpp +31 -0
- package/src/duckdb/src/main/client_context_wrapper.cpp +22 -0
- package/src/duckdb/src/main/client_data.cpp +5 -3
- package/src/duckdb/src/main/config.cpp +71 -2
- package/src/duckdb/src/main/connection.cpp +11 -10
- package/src/duckdb/src/main/connection_manager.cpp +9 -23
- package/src/duckdb/src/main/database.cpp +26 -30
- package/src/duckdb/src/main/db_instance_cache.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +396 -17
- package/src/duckdb/src/main/extension/extension_install.cpp +297 -89
- package/src/duckdb/src/main/extension/extension_load.cpp +137 -135
- package/src/duckdb/src/main/extension/extension_util.cpp +8 -2
- package/src/duckdb/src/main/extension.cpp +56 -0
- package/src/duckdb/src/main/extension_install_info.cpp +116 -0
- package/src/duckdb/src/main/materialized_query_result.cpp +11 -0
- package/src/duckdb/src/main/query_profiler.cpp +1 -1
- package/src/duckdb/src/main/relation/create_view_relation.cpp +6 -0
- package/src/duckdb/src/main/relation/materialized_relation.cpp +58 -0
- package/src/duckdb/src/main/relation/query_relation.cpp +20 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +5 -3
- package/src/duckdb/src/main/relation/table_relation.cpp +4 -4
- package/src/duckdb/src/main/relation/value_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/view_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +36 -32
- package/src/duckdb/src/main/secret/secret.cpp +1 -1
- package/src/duckdb/src/main/settings/settings.cpp +137 -11
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +1 -1
- package/src/duckdb/src/optimizer/filter_combiner.cpp +3 -3
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -2
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +3 -3
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +1 -1
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +4 -27
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +32 -107
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +68 -61
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +4 -2
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +3 -3
- package/src/duckdb/src/optimizer/optimizer.cpp +3 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +6 -6
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_semi_anti_join.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +2 -2
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -1
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +1 -1
- package/src/duckdb/src/optimizer/rule/case_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +1 -1
- package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +1 -2
- package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +107 -0
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +4 -4
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +6 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +2 -1
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +2 -2
- package/src/duckdb/src/parallel/executor.cpp +12 -9
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parallel/pipeline.cpp +2 -2
- package/src/duckdb/src/parallel/task_scheduler.cpp +9 -3
- package/src/duckdb/src/parser/column_definition.cpp +1 -0
- package/src/duckdb/src/parser/constraints/foreign_key_constraint.cpp +9 -7
- package/src/duckdb/src/parser/expression/star_expression.cpp +2 -2
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +4 -0
- package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +4 -0
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +183 -0
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +23 -0
- package/src/duckdb/src/parser/parsed_data/comment_on_column_info.cpp +15 -2
- package/src/duckdb/src/parser/parsed_data/copy_info.cpp +100 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -2
- package/src/duckdb/src/parser/parsed_data/create_info.cpp +2 -0
- package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +40 -0
- package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +22 -0
- package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +12 -4
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +37 -14
- package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +4 -4
- package/src/duckdb/src/parser/parsed_data/detach_info.cpp +12 -0
- package/src/duckdb/src/parser/parsed_data/drop_info.cpp +21 -0
- package/src/duckdb/src/parser/parsed_data/load_info.cpp +46 -0
- package/src/duckdb/src/parser/parsed_data/parse_info.cpp +50 -0
- package/src/duckdb/src/parser/parsed_data/pragma_info.cpp +33 -0
- package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +22 -0
- package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +20 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +1 -0
- package/src/duckdb/src/parser/parser.cpp +5 -4
- package/src/duckdb/src/parser/query_node.cpp +6 -2
- package/src/duckdb/src/parser/statement/alter_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/attach_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/call_statement.cpp +8 -0
- package/src/duckdb/src/parser/statement/copy_statement.cpp +1 -91
- package/src/duckdb/src/parser/statement/detach_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/drop_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/execute_statement.cpp +15 -0
- package/src/duckdb/src/parser/statement/explain_statement.cpp +19 -0
- package/src/duckdb/src/parser/statement/export_statement.cpp +18 -0
- package/src/duckdb/src/parser/statement/extension_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/load_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/multi_statement.cpp +8 -0
- package/src/duckdb/src/parser/statement/pragma_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/prepare_statement.cpp +13 -0
- package/src/duckdb/src/parser/statement/relation_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/set_statement.cpp +33 -4
- package/src/duckdb/src/parser/statement/transaction_statement.cpp +4 -0
- package/src/duckdb/src/parser/statement/update_extensions_statement.cpp +34 -0
- package/src/duckdb/src/parser/statement/vacuum_statement.cpp +4 -0
- package/src/duckdb/src/parser/tableref/column_data_ref.cpp +81 -0
- package/src/duckdb/src/parser/tableref.cpp +1 -0
- package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +6 -1
- package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_positional_reference.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +16 -0
- package/src/duckdb/src/parser/transform/helpers/nodetype_to_string.cpp +2 -0
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +97 -63
- package/src/duckdb/src/parser/transform/statement/transform_checkpoint.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_load.cpp +4 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +19 -0
- package/src/duckdb/src/parser/transformer.cpp +5 -2
- package/src/duckdb/src/planner/bind_context.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +35 -8
- package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +13 -7
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +35 -2
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +0 -2
- package/src/duckdb/src/planner/binder/statement/bind_attach.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_call.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -3
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +21 -68
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +141 -28
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +115 -57
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +4 -0
- package/src/duckdb/src/planner/binder/statement/bind_detach.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +4 -2
- package/src/duckdb/src/planner/binder/statement/bind_explain.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +15 -4
- package/src/duckdb/src/planner/binder/statement/bind_extension.cpp +1 -0
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +5 -1
- package/src/duckdb/src/planner/binder/statement/bind_load.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_prepare.cpp +1 -0
- package/src/duckdb/src/planner/binder/statement/bind_select.cpp +1 -0
- package/src/duckdb/src/planner/binder/statement/bind_set.cpp +4 -0
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +6 -4
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -1
- package/src/duckdb/src/planner/binder/statement/bind_update_extensions.cpp +28 -0
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +2 -0
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +32 -29
- package/src/duckdb/src/planner/binder/tableref/bind_column_data_ref.cpp +16 -0
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +7 -4
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +32 -22
- package/src/duckdb/src/planner/binder/tableref/plan_column_data_ref.cpp +15 -0
- package/src/duckdb/src/planner/binder.cpp +50 -30
- package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_expression.cpp +3 -2
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +24 -7
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +27 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +34 -19
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +33 -0
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +10 -1
- package/src/duckdb/src/planner/expression_binder.cpp +4 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +3 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_column_data_get.cpp +16 -2
- package/src/duckdb/src/planner/operator/logical_delete.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +4 -1
- package/src/duckdb/src/planner/operator/logical_insert.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_update.cpp +2 -0
- package/src/duckdb/src/planner/planner.cpp +35 -9
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +34 -9
- package/src/duckdb/src/planner/table_binding.cpp +1 -1
- package/src/duckdb/src/storage/arena_allocator.cpp +5 -3
- package/src/duckdb/src/storage/buffer/block_handle.cpp +3 -3
- package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -1
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +83 -22
- package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +2 -2
- package/src/duckdb/src/storage/buffer_manager.cpp +6 -2
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +9 -0
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -2
- package/src/duckdb/src/storage/checkpoint_manager.cpp +68 -104
- package/src/duckdb/src/storage/compression/bitpacking.cpp +19 -13
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +9 -7
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/compression/fsst.cpp +11 -7
- package/src/duckdb/src/storage/compression/rle.cpp +1 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +5 -4
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/data_table.cpp +254 -101
- package/src/duckdb/src/storage/index.cpp +2 -106
- package/src/duckdb/src/storage/local_storage.cpp +38 -50
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
- package/src/duckdb/src/storage/optimistic_data_writer.cpp +9 -11
- package/src/duckdb/src/storage/partial_block_manager.cpp +6 -6
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +8 -0
- package/src/duckdb/src/storage/serialization/serialize_dependency.cpp +49 -0
- package/src/duckdb/src/storage/serialization/serialize_extension_install_info.cpp +28 -0
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +5 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +78 -2
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +21 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +16 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +6 -1
- package/src/duckdb/src/storage/single_file_block_manager.cpp +22 -19
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +68 -40
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +1 -1
- package/src/duckdb/src/storage/storage_info.cpp +67 -23
- package/src/duckdb/src/storage/storage_lock.cpp +77 -17
- package/src/duckdb/src/storage/storage_manager.cpp +56 -43
- package/src/duckdb/src/storage/table/array_column_data.cpp +13 -12
- package/src/duckdb/src/storage/table/column_data.cpp +80 -37
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +1 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -5
- package/src/duckdb/src/storage/table/list_column_data.cpp +15 -14
- package/src/duckdb/src/storage/table/row_group.cpp +38 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +52 -38
- package/src/duckdb/src/storage/table/row_version_manager.cpp +2 -2
- package/src/duckdb/src/storage/table/standard_column_data.cpp +28 -16
- package/src/duckdb/src/storage/table/struct_column_data.cpp +23 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -8
- package/src/duckdb/src/storage/table/update_segment.cpp +6 -6
- package/src/duckdb/src/storage/table/validity_column_data.cpp +5 -0
- package/src/duckdb/src/storage/table_index_list.cpp +69 -42
- package/src/duckdb/src/storage/temporary_file_manager.cpp +111 -17
- package/src/duckdb/src/storage/temporary_memory_manager.cpp +4 -4
- package/src/duckdb/src/storage/wal_replay.cpp +27 -22
- package/src/duckdb/src/storage/write_ahead_log.cpp +42 -22
- package/src/duckdb/src/transaction/cleanup_state.cpp +4 -7
- package/src/duckdb/src/transaction/commit_state.cpp +17 -8
- package/src/duckdb/src/transaction/duck_transaction.cpp +60 -15
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +154 -121
- package/src/duckdb/src/transaction/meta_transaction.cpp +19 -1
- package/src/duckdb/src/transaction/rollback_state.cpp +2 -0
- package/src/duckdb/src/transaction/transaction.cpp +7 -7
- package/src/duckdb/src/transaction/undo_buffer.cpp +37 -17
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +5 -5
- package/src/duckdb/third_party/fsst/fsst.h +1 -1
- package/src/duckdb/third_party/jaro_winkler/details/common.hpp +9 -9
- package/src/duckdb/third_party/jaro_winkler/details/intrinsics.hpp +1 -1
- package/src/duckdb/third_party/jaro_winkler/details/jaro_impl.hpp +18 -18
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +12 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +555 -1032
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +3 -0
- package/src/duckdb/third_party/libpg_query/include/utils/datetime.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -6
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +23925 -23444
- package/src/duckdb/third_party/mbedtls/library/constant_time.cpp +1 -1
- package/src/duckdb/third_party/parquet/parquet_types.cpp +3 -0
- package/src/duckdb/third_party/parquet/parquet_types.h +2 -1
- package/src/duckdb/third_party/re2/re2/compile.cc +2 -2
- package/src/duckdb/third_party/re2/re2/dfa.cc +3 -8
- package/src/duckdb/third_party/re2/re2/onepass.cc +4 -3
- package/src/duckdb/third_party/re2/re2/prog.cc +10 -10
- package/src/duckdb/third_party/re2/re2/prog.h +8 -8
- package/src/duckdb/third_party/tdigest/t_digest.hpp +6 -6
- package/src/duckdb/third_party/utf8proc/include/utf8proc.hpp +1 -1
- package/src/duckdb/third_party/yyjson/include/yyjson.hpp +7930 -0
- package/src/duckdb/third_party/yyjson/yyjson.cpp +9490 -0
- package/src/duckdb/ub_src_catalog.cpp +2 -0
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index.cpp +3 -1
- package/src/duckdb/ub_src_execution_operator_helper.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +4 -0
- package/src/duckdb/ub_src_main_relation.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +8 -8
- package/src/duckdb/ub_src_optimizer_join_order.cpp +0 -2
- package/src/duckdb/ub_src_optimizer_rule.cpp +4 -2
- package/src/duckdb/ub_src_parser_parsed_data.cpp +10 -0
- package/src/duckdb/ub_src_parser_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +4 -0
- package/src/duckdb/src/catalog/catalog_entry/ub_duckdb_catalog_entries.cpp +0 -16
- package/src/duckdb/src/catalog/default/ub_duckdb_catalog_default_entries.cpp +0 -5
- package/src/duckdb/src/catalog/ub_duckdb_catalog.cpp +0 -10
- package/src/duckdb/src/common/adbc/nanoarrow/ub_duckdb_adbc_nanoarrow.cpp +0 -5
- package/src/duckdb/src/common/adbc/ub_duckdb_adbc.cpp +0 -3
- package/src/duckdb/src/common/arrow/appender/ub_duckdb_common_arrow_appender.cpp +0 -6
- package/src/duckdb/src/common/arrow/ub_duckdb_common_arrow.cpp +0 -4
- package/src/duckdb/src/common/crypto/ub_duckdb_common_crypto.cpp +0 -2
- package/src/duckdb/src/common/enums/ub_duckdb_common_enums.cpp +0 -12
- package/src/duckdb/src/common/operator/ub_duckdb_common_operators.cpp +0 -4
- package/src/duckdb/src/common/progress_bar/ub_duckdb_progress_bar.cpp +0 -3
- package/src/duckdb/src/common/row_operations/ub_duckdb_row_operations.cpp +0 -9
- package/src/duckdb/src/common/serializer/ub_duckdb_common_serializer.cpp +0 -7
- package/src/duckdb/src/common/sort/ub_duckdb_sort.cpp +0 -7
- package/src/duckdb/src/common/types/column/ub_duckdb_common_types_column.cpp +0 -6
- package/src/duckdb/src/common/types/row/ub_duckdb_common_types_row.cpp +0 -11
- package/src/duckdb/src/common/types/ub_duckdb_common_types.cpp +0 -28
- package/src/duckdb/src/common/ub_duckdb_common.cpp +0 -34
- package/src/duckdb/src/common/value_operations/ub_duckdb_value_operations.cpp +0 -2
- package/src/duckdb/src/core_functions/aggregate/algebraic/ub_duckdb_aggr_algebraic.cpp +0 -5
- package/src/duckdb/src/core_functions/aggregate/distributive/ub_duckdb_aggr_distributive.cpp +0 -13
- package/src/duckdb/src/core_functions/aggregate/holistic/ub_duckdb_aggr_holistic.cpp +0 -5
- package/src/duckdb/src/core_functions/aggregate/nested/ub_duckdb_aggr_nested.cpp +0 -3
- package/src/duckdb/src/core_functions/aggregate/regression/ub_duckdb_aggr_regr.cpp +0 -8
- package/src/duckdb/src/core_functions/scalar/bit/ub_duckdb_func_bit.cpp +0 -2
- package/src/duckdb/src/core_functions/scalar/blob/ub_duckdb_func_blob.cpp +0 -3
- package/src/duckdb/src/core_functions/scalar/date/ub_duckdb_func_date.cpp +0 -12
- package/src/duckdb/src/core_functions/scalar/debug/ub_duckdb_func_debug.cpp +0 -2
- package/src/duckdb/src/core_functions/scalar/enum/ub_duckdb_func_enum.cpp +0 -2
- package/src/duckdb/src/core_functions/scalar/generic/ub_duckdb_func_generic.cpp +0 -9
- package/src/duckdb/src/core_functions/scalar/list/ub_duckdb_func_list.cpp +0 -11
- package/src/duckdb/src/core_functions/scalar/map/ub_duckdb_func_map_nested.cpp +0 -8
- package/src/duckdb/src/core_functions/scalar/math/ub_duckdb_func_math.cpp +0 -1
- package/src/duckdb/src/core_functions/scalar/operators/ub_duckdb_func_ops.cpp +0 -1
- package/src/duckdb/src/core_functions/scalar/random/ub_duckdb_func_random.cpp +0 -3
- package/src/duckdb/src/core_functions/scalar/string/ub_duckdb_func_string.cpp +0 -26
- package/src/duckdb/src/core_functions/scalar/struct/ub_duckdb_func_struct.cpp +0 -3
- package/src/duckdb/src/core_functions/scalar/union/ub_duckdb_func_union.cpp +0 -4
- package/src/duckdb/src/core_functions/ub_duckdb_core_functions.cpp +0 -3
- package/src/duckdb/src/execution/expression_executor/ub_duckdb_expression_executor.cpp +0 -11
- package/src/duckdb/src/execution/index/art/ub_duckdb_art_index_execution.cpp +0 -12
- package/src/duckdb/src/execution/index/art/ub_duckdb_execution_index_art.cpp +0 -11
- package/src/duckdb/src/execution/index/ub_duckdb_execution_index.cpp +0 -3
- package/src/duckdb/src/execution/nested_loop_join/ub_duckdb_nested_loop_join.cpp +0 -3
- package/src/duckdb/src/execution/operator/aggregate/ub_duckdb_operator_aggregate.cpp +0 -9
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/ub_duckdb_operator_csv_sniffer.cpp +0 -7
- package/src/duckdb/src/execution/operator/csv_scanner/ub_duckdb_operator_csv_scanner.cpp +0 -10
- package/src/duckdb/src/execution/operator/filter/ub_duckdb_operator_filter.cpp +0 -2
- package/src/duckdb/src/execution/operator/helper/ub_duckdb_operator_helper.cpp +0 -18
- package/src/duckdb/src/execution/operator/join/ub_duckdb_operator_join.cpp +0 -16
- package/src/duckdb/src/execution/operator/order/ub_duckdb_operator_order.cpp +0 -3
- package/src/duckdb/src/execution/operator/persistent/ub_duckdb_operator_persistent.cpp +0 -10
- package/src/duckdb/src/execution/operator/projection/ub_duckdb_operator_projection.cpp +0 -5
- package/src/duckdb/src/execution/operator/scan/ub_duckdb_operator_scan.cpp +0 -7
- package/src/duckdb/src/execution/operator/schema/ub_duckdb_operator_schema.cpp +0 -12
- package/src/duckdb/src/execution/operator/set/ub_duckdb_operator_set.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/ub_duckdb_physical_plan.cpp +0 -44
- package/src/duckdb/src/execution/ub_duckdb_execution.cpp +0 -15
- package/src/duckdb/src/function/aggregate/algebraic/ub_duckdb_aggr_algebraic.cpp +0 -5
- package/src/duckdb/src/function/aggregate/distributive/ub_duckdb_aggr_distr.cpp +0 -3
- package/src/duckdb/src/function/aggregate/holistic/ub_duckdb_aggr_holistic.cpp +0 -5
- package/src/duckdb/src/function/aggregate/nested/ub_duckdb_aggr_nested.cpp +0 -3
- package/src/duckdb/src/function/aggregate/regression/ub_duckdb_aggr_regr.cpp +0 -8
- package/src/duckdb/src/function/aggregate/ub_duckdb_func_aggr.cpp +0 -3
- package/src/duckdb/src/function/cast/ub_duckdb_func_cast.cpp +0 -17
- package/src/duckdb/src/function/cast/union/ub_duckdb_union_cast.cpp +0 -2
- package/src/duckdb/src/function/pragma/ub_duckdb_func_pragma.cpp +0 -3
- package/src/duckdb/src/function/scalar/bit/ub_duckdb_func_bit.cpp +0 -2
- package/src/duckdb/src/function/scalar/blob/ub_duckdb_func_blob.cpp +0 -3
- package/src/duckdb/src/function/scalar/compressed_materialization/ub_duckdb_func_compressed_materialization.cpp +0 -3
- package/src/duckdb/src/function/scalar/date/ub_duckdb_func_date.cpp +0 -12
- package/src/duckdb/src/function/scalar/enum/ub_duckdb_func_enum.cpp +0 -2
- package/src/duckdb/src/function/scalar/generic/ub_duckdb_func_generic.cpp +0 -8
- package/src/duckdb/src/function/scalar/generic/ub_duckdb_func_generic_main.cpp +0 -2
- package/src/duckdb/src/function/scalar/list/ub_duckdb_func_list.cpp +0 -11
- package/src/duckdb/src/function/scalar/list/ub_duckdb_func_list_nested.cpp +0 -5
- package/src/duckdb/src/function/scalar/map/ub_duckdb_func_map_nested.cpp +0 -7
- package/src/duckdb/src/function/scalar/math/ub_duckdb_func_math.cpp +0 -4
- package/src/duckdb/src/function/scalar/operators/ub_duckdb_func_ops.cpp +0 -6
- package/src/duckdb/src/function/scalar/operators/ub_duckdb_func_ops_main.cpp +0 -5
- package/src/duckdb/src/function/scalar/sequence/ub_duckdb_func_seq.cpp +0 -2
- package/src/duckdb/src/function/scalar/string/regexp/ub_duckdb_func_string_regexp.cpp +0 -3
- package/src/duckdb/src/function/scalar/string/ub_duckdb_func_string.cpp +0 -31
- package/src/duckdb/src/function/scalar/string/ub_duckdb_func_string_main.cpp +0 -12
- package/src/duckdb/src/function/scalar/struct/ub_duckdb_func_struct.cpp +0 -4
- package/src/duckdb/src/function/scalar/struct/ub_duckdb_func_struct_main.cpp +0 -2
- package/src/duckdb/src/function/scalar/system/ub_duckdb_func_system.cpp +0 -2
- package/src/duckdb/src/function/scalar/ub_duckdb_func_scalar.cpp +0 -9
- package/src/duckdb/src/function/scalar/union/ub_duckdb_func_union.cpp +0 -4
- package/src/duckdb/src/function/table/arrow/ub_duckdb_arrow_conversion.cpp +0 -2
- package/src/duckdb/src/function/table/system/ub_duckdb_table_func_system.cpp +0 -23
- package/src/duckdb/src/function/table/ub_duckdb_func_table.cpp +0 -16
- package/src/duckdb/src/function/table/version/ub_duckdb_func_table_version.cpp +0 -2
- package/src/duckdb/src/function/ub_duckdb_function.cpp +0 -14
- package/src/duckdb/src/main/capi/cast/ub_duckdb_main_capi_cast.cpp +0 -3
- package/src/duckdb/src/main/capi/ub_duckdb_main_capi.cpp +0 -19
- package/src/duckdb/src/main/chunk_scan_state/ub_duckdb_main_chunk_scan_state.cpp +0 -2
- package/src/duckdb/src/main/extension/ub_duckdb_main_extension.cpp +0 -6
- package/src/duckdb/src/main/relation/ub_duckdb_main_relation.cpp +0 -26
- package/src/duckdb/src/main/settings/ub_duckdb_main_settings.cpp +0 -2
- package/src/duckdb/src/main/ub_duckdb_main.cpp +0 -25
- package/src/duckdb/src/optimizer/compressed_materialization/ub_duckdb_optimizer_compressed_materialization.cpp +0 -4
- package/src/duckdb/src/optimizer/join_order/ub_duckdb_optimizer_join_order.cpp +0 -12
- package/src/duckdb/src/optimizer/matcher/ub_duckdb_optimizer_matcher.cpp +0 -2
- package/src/duckdb/src/optimizer/pullup/ub_duckdb_optimizer_pullup.cpp +0 -6
- package/src/duckdb/src/optimizer/pushdown/ub_duckdb_optimizer_pushdown.cpp +0 -12
- package/src/duckdb/src/optimizer/rule/ub_duckdb_optimizer_rules.cpp +0 -16
- package/src/duckdb/src/optimizer/statistics/expression/ub_duckdb_optimizer_statistics_expr.cpp +0 -11
- package/src/duckdb/src/optimizer/statistics/operator/ub_duckdb_optimizer_statistics_op.cpp +0 -11
- package/src/duckdb/src/optimizer/ub_duckdb_optimizer.cpp +0 -20
- package/src/duckdb/src/parallel/ub_duckdb_parallel.cpp +0 -15
- package/src/duckdb/src/parser/constraints/ub_duckdb_constraints.cpp +0 -5
- package/src/duckdb/src/parser/expression/ub_duckdb_expression.cpp +0 -18
- package/src/duckdb/src/parser/parsed_data/ub_duckdb_parsed_data.cpp +0 -24
- package/src/duckdb/src/parser/query_node/ub_duckdb_query_node.cpp +0 -5
- package/src/duckdb/src/parser/statement/ub_duckdb_statement.cpp +0 -25
- package/src/duckdb/src/parser/tableref/ub_duckdb_parser_tableref.cpp +0 -8
- package/src/duckdb/src/parser/transform/constraint/ub_duckdb_transformer_constraint.cpp +0 -2
- package/src/duckdb/src/parser/transform/expression/ub_duckdb_transformer_expression.cpp +0 -20
- package/src/duckdb/src/parser/transform/helpers/ub_duckdb_transformer_helpers.cpp +0 -8
- package/src/duckdb/src/parser/transform/statement/ub_duckdb_transformer_statement.cpp +0 -37
- package/src/duckdb/src/parser/transform/tableref/ub_duckdb_transformer_tableref.cpp +0 -8
- package/src/duckdb/src/parser/ub_duckdb_parser.cpp +0 -15
- package/src/duckdb/src/planner/binder/expression/ub_duckdb_bind_expression.cpp +0 -20
- package/src/duckdb/src/planner/binder/query_node/ub_duckdb_bind_query_node.cpp +0 -12
- package/src/duckdb/src/planner/binder/statement/ub_duckdb_bind_statement.cpp +0 -26
- package/src/duckdb/src/planner/binder/tableref/ub_duckdb_bind_tableref.cpp +0 -17
- package/src/duckdb/src/planner/expression/ub_duckdb_planner_expression.cpp +0 -19
- package/src/duckdb/src/planner/expression_binder/ub_duckdb_expression_binders.cpp +0 -20
- package/src/duckdb/src/planner/filter/ub_duckdb_planner_filter.cpp +0 -4
- package/src/duckdb/src/planner/operator/ub_duckdb_planner_operator.cpp +0 -43
- package/src/duckdb/src/planner/parsed_data/ub_duckdb_planner_parsed_data.cpp +0 -2
- package/src/duckdb/src/planner/subquery/ub_duckdb_planner_subquery.cpp +0 -4
- package/src/duckdb/src/planner/ub_duckdb_planner.cpp +0 -15
- package/src/duckdb/src/storage/buffer/ub_duckdb_storage_buffer.cpp +0 -6
- package/src/duckdb/src/storage/checkpoint/ub_duckdb_storage_checkpoint.cpp +0 -5
- package/src/duckdb/src/storage/compression/chimp/ub_duckdb_storage_compression_chimp.cpp +0 -6
- package/src/duckdb/src/storage/compression/ub_duckdb_storage_compression.cpp +0 -12
- package/src/duckdb/src/storage/metadata/ub_duckdb_storage_metadata.cpp +0 -4
- package/src/duckdb/src/storage/serialization/ub_duckdb_storage_serialization.cpp +0 -16
- package/src/duckdb/src/storage/statistics/ub_duckdb_storage_statistics.cpp +0 -10
- package/src/duckdb/src/storage/table/ub_duckdb_storage_table.cpp +0 -17
- package/src/duckdb/src/storage/ub_duckdb_storage.cpp +0 -20
- package/src/duckdb/src/transaction/ub_duckdb_transaction.cpp +0 -11
@@ -17,6 +17,7 @@
|
|
17
17
|
#include <string>
|
18
18
|
#include <vector>
|
19
19
|
#ifndef DUCKDB_AMALGAMATION
|
20
|
+
#include "duckdb/common/helper.hpp"
|
20
21
|
#include "duckdb/catalog/catalog.hpp"
|
21
22
|
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
|
22
23
|
#include "duckdb/common/constants.hpp"
|
@@ -44,8 +45,10 @@
|
|
44
45
|
namespace duckdb {
|
45
46
|
|
46
47
|
struct ParquetReadBindData : public TableFunctionData {
|
48
|
+
shared_ptr<MultiFileList> file_list;
|
49
|
+
unique_ptr<MultiFileReader> multi_file_reader;
|
50
|
+
|
47
51
|
shared_ptr<ParquetReader> initial_reader;
|
48
|
-
vector<string> files;
|
49
52
|
atomic<idx_t> chunk_count;
|
50
53
|
vector<string> names;
|
51
54
|
vector<LogicalType> types;
|
@@ -58,6 +61,7 @@ struct ParquetReadBindData : public TableFunctionData {
|
|
58
61
|
idx_t initial_file_cardinality;
|
59
62
|
idx_t initial_file_row_groups;
|
60
63
|
ParquetOptions parquet_options;
|
64
|
+
|
61
65
|
MultiFileReaderBindData reader_bind;
|
62
66
|
|
63
67
|
void Initialize(shared_ptr<ParquetReader> reader) {
|
@@ -74,23 +78,45 @@ struct ParquetReadLocalState : public LocalTableFunctionState {
|
|
74
78
|
bool is_parallel;
|
75
79
|
idx_t batch_index;
|
76
80
|
idx_t file_index;
|
77
|
-
//! The DataChunk containing all read columns (even
|
81
|
+
//! The DataChunk containing all read columns (even columns that are immediately removed)
|
78
82
|
DataChunk all_columns;
|
79
83
|
};
|
80
84
|
|
81
85
|
enum class ParquetFileState : uint8_t { UNOPENED, OPENING, OPEN, CLOSED };
|
82
86
|
|
87
|
+
struct ParquetFileReaderData {
|
88
|
+
// Create data for an unopened file
|
89
|
+
explicit ParquetFileReaderData(const string &file_to_be_opened)
|
90
|
+
: reader(nullptr), file_state(ParquetFileState::UNOPENED), file_mutex(make_uniq<mutex>()),
|
91
|
+
file_to_be_opened(file_to_be_opened) {
|
92
|
+
}
|
93
|
+
// Create data for an existing reader
|
94
|
+
explicit ParquetFileReaderData(shared_ptr<ParquetReader> reader_p)
|
95
|
+
: reader(std::move(reader_p)), file_state(ParquetFileState::OPEN), file_mutex(make_uniq<mutex>()) {
|
96
|
+
}
|
97
|
+
|
98
|
+
//! Currently opened reader for the file
|
99
|
+
shared_ptr<ParquetReader> reader;
|
100
|
+
//! Flag to indicate the file is being opened
|
101
|
+
ParquetFileState file_state;
|
102
|
+
//! Mutexes to wait for the file when it is being opened
|
103
|
+
unique_ptr<mutex> file_mutex;
|
104
|
+
|
105
|
+
//! (only set when file_state is UNOPENED) the file to be opened
|
106
|
+
string file_to_be_opened;
|
107
|
+
};
|
108
|
+
|
83
109
|
struct ParquetReadGlobalState : public GlobalTableFunctionState {
|
110
|
+
//! The scan over the file_list
|
111
|
+
MultiFileListScanData file_list_scan;
|
112
|
+
|
113
|
+
unique_ptr<MultiFileReaderGlobalState> multi_file_reader_state;
|
114
|
+
|
84
115
|
mutex lock;
|
85
116
|
|
86
|
-
//! The
|
87
|
-
|
88
|
-
|
89
|
-
vector<shared_ptr<ParquetReader>> readers;
|
90
|
-
//! Flag to indicate a file is being opened
|
91
|
-
vector<ParquetFileState> file_states;
|
92
|
-
//! Mutexes to wait for a file that is currently being opened
|
93
|
-
unique_ptr<mutex[]> file_mutexes;
|
117
|
+
//! The current set of parquet readers
|
118
|
+
vector<ParquetFileReaderData> readers;
|
119
|
+
|
94
120
|
//! Signal to other threads that a file failed to open, letting every thread abort.
|
95
121
|
bool error_opening_file = false;
|
96
122
|
|
@@ -111,7 +137,7 @@ struct ParquetReadGlobalState : public GlobalTableFunctionState {
|
|
111
137
|
return max_threads;
|
112
138
|
}
|
113
139
|
|
114
|
-
bool
|
140
|
+
bool CanRemoveColumns() const {
|
115
141
|
return !projection_ids.empty();
|
116
142
|
}
|
117
143
|
};
|
@@ -134,6 +160,8 @@ struct ParquetWriteBindData : public TableFunctionData {
|
|
134
160
|
double dictionary_compression_ratio_threshold = 1.0;
|
135
161
|
|
136
162
|
ChildFieldIDs field_ids;
|
163
|
+
//! The compression level, higher value is more
|
164
|
+
optional_idx compression_level;
|
137
165
|
};
|
138
166
|
|
139
167
|
struct ParquetWriteGlobalState : public GlobalFunctionData {
|
@@ -153,10 +181,12 @@ struct ParquetWriteLocalState : public LocalFunctionData {
|
|
153
181
|
BindInfo ParquetGetBindInfo(const optional_ptr<FunctionData> bind_data) {
|
154
182
|
auto bind_info = BindInfo(ScanType::PARQUET);
|
155
183
|
auto &parquet_bind = bind_data->Cast<ParquetReadBindData>();
|
184
|
+
|
156
185
|
vector<Value> file_path;
|
157
|
-
for (auto &
|
158
|
-
file_path.emplace_back(
|
186
|
+
for (const auto &file : parquet_bind.file_list->Files()) {
|
187
|
+
file_path.emplace_back(file);
|
159
188
|
}
|
189
|
+
|
160
190
|
// LCOV_EXCL_START
|
161
191
|
bind_info.InsertOption("file_path", Value::LIST(LogicalType::VARCHAR, file_path));
|
162
192
|
bind_info.InsertOption("binary_as_string", Value::BOOLEAN(parquet_bind.parquet_options.binary_as_string));
|
@@ -166,9 +196,26 @@ BindInfo ParquetGetBindInfo(const optional_ptr<FunctionData> bind_data) {
|
|
166
196
|
return bind_info;
|
167
197
|
}
|
168
198
|
|
199
|
+
static void ParseFileRowNumberOption(MultiFileReaderBindData &bind_data, ParquetOptions &options,
|
200
|
+
vector<LogicalType> &return_types, vector<string> &names) {
|
201
|
+
if (options.file_row_number) {
|
202
|
+
if (StringUtil::CIFind(names, "file_row_number") != DConstants::INVALID_INDEX) {
|
203
|
+
throw BinderException(
|
204
|
+
"Using file_row_number option on file with column named file_row_number is not supported");
|
205
|
+
}
|
206
|
+
|
207
|
+
bind_data.file_row_number_idx = names.size();
|
208
|
+
return_types.emplace_back(LogicalType::BIGINT);
|
209
|
+
names.emplace_back("file_row_number");
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
169
213
|
static MultiFileReaderBindData BindSchema(ClientContext &context, vector<LogicalType> &return_types,
|
170
214
|
vector<string> &names, ParquetReadBindData &result, ParquetOptions &options) {
|
171
215
|
D_ASSERT(!options.schema.empty());
|
216
|
+
|
217
|
+
options.file_options.AutoDetectHivePartitioning(*result.file_list, context);
|
218
|
+
|
172
219
|
auto &file_options = options.file_options;
|
173
220
|
if (file_options.union_by_name || file_options.hive_partitioning) {
|
174
221
|
throw BinderException("Parquet schema cannot be combined with union_by_name=true or hive_partitioning=true");
|
@@ -184,45 +231,42 @@ static MultiFileReaderBindData BindSchema(ClientContext &context, vector<Logical
|
|
184
231
|
}
|
185
232
|
|
186
233
|
// perform the binding on the obtained set of names + types
|
187
|
-
|
188
|
-
|
234
|
+
MultiFileReaderBindData bind_data;
|
235
|
+
result.multi_file_reader->BindOptions(options.file_options, *result.file_list, schema_col_types, schema_col_names,
|
236
|
+
bind_data);
|
189
237
|
|
190
238
|
names = schema_col_names;
|
191
239
|
return_types = schema_col_types;
|
192
240
|
D_ASSERT(names.size() == return_types.size());
|
193
241
|
|
194
|
-
|
195
|
-
if (std::find(names.begin(), names.end(), "file_row_number") != names.end()) {
|
196
|
-
throw BinderException(
|
197
|
-
"Using file_row_number option on file with column named file_row_number is not supported");
|
198
|
-
}
|
199
|
-
|
200
|
-
bind_data.file_row_number_idx = names.size();
|
201
|
-
return_types.emplace_back(LogicalType::BIGINT);
|
202
|
-
names.emplace_back("file_row_number");
|
203
|
-
}
|
242
|
+
ParseFileRowNumberOption(bind_data, options, return_types, names);
|
204
243
|
|
205
244
|
return bind_data;
|
206
245
|
}
|
207
246
|
|
208
247
|
static void InitializeParquetReader(ParquetReader &reader, const ParquetReadBindData &bind_data,
|
209
248
|
const vector<column_t> &global_column_ids,
|
210
|
-
optional_ptr<TableFilterSet> table_filters, ClientContext &context
|
249
|
+
optional_ptr<TableFilterSet> table_filters, ClientContext &context,
|
250
|
+
optional_idx file_idx, optional_ptr<MultiFileReaderGlobalState> reader_state) {
|
211
251
|
auto &parquet_options = bind_data.parquet_options;
|
212
252
|
auto &reader_data = reader.reader_data;
|
253
|
+
|
254
|
+
// Mark the file in the file list we are scanning here
|
255
|
+
reader_data.file_list_idx = file_idx;
|
256
|
+
|
213
257
|
if (bind_data.parquet_options.schema.empty()) {
|
214
|
-
|
215
|
-
|
216
|
-
|
258
|
+
bind_data.multi_file_reader->InitializeReader(
|
259
|
+
reader, parquet_options.file_options, bind_data.reader_bind, bind_data.types, bind_data.names,
|
260
|
+
global_column_ids, table_filters, bind_data.file_list->GetFirstFile(), context, reader_state);
|
217
261
|
return;
|
218
262
|
}
|
219
263
|
|
220
264
|
// a fixed schema was supplied, initialize the MultiFileReader settings here so we can read using the schema
|
221
265
|
|
222
266
|
// this deals with hive partitioning and filename=true
|
223
|
-
|
224
|
-
|
225
|
-
|
267
|
+
bind_data.multi_file_reader->FinalizeBind(parquet_options.file_options, bind_data.reader_bind, reader.GetFileName(),
|
268
|
+
reader.GetNames(), bind_data.types, bind_data.names, global_column_ids,
|
269
|
+
reader_data, context, reader_state);
|
226
270
|
|
227
271
|
// create a mapping from field id to column index in file
|
228
272
|
unordered_map<uint32_t, idx_t> field_id_to_column_index;
|
@@ -281,7 +325,7 @@ static void InitializeParquetReader(ParquetReader &reader, const ParquetReadBind
|
|
281
325
|
reader_data.empty_columns = reader_data.column_ids.empty();
|
282
326
|
|
283
327
|
// Finally, initialize the filters
|
284
|
-
|
328
|
+
bind_data.multi_file_reader->CreateFilterMap(bind_data.types, table_filters, reader_data, reader_state);
|
285
329
|
reader_data.filters = table_filters;
|
286
330
|
}
|
287
331
|
|
@@ -314,7 +358,6 @@ public:
|
|
314
358
|
{"type", LogicalType::VARCHAR},
|
315
359
|
{"default_value", LogicalType::VARCHAR}}}));
|
316
360
|
table_function.named_parameters["encryption_config"] = LogicalTypeId::ANY;
|
317
|
-
MultiFileReader::AddParameters(table_function);
|
318
361
|
table_function.get_batch_index = ParquetScanGetBatchIndex;
|
319
362
|
table_function.serialize = ParquetScanSerialize;
|
320
363
|
table_function.deserialize = ParquetScanDeserialize;
|
@@ -323,6 +366,9 @@ public:
|
|
323
366
|
table_function.filter_pushdown = true;
|
324
367
|
table_function.filter_prune = true;
|
325
368
|
table_function.pushdown_complex_filter = ParquetComplexFilterPushdown;
|
369
|
+
|
370
|
+
MultiFileReader::AddParameters(table_function);
|
371
|
+
|
326
372
|
return MultiFileReader::CreateFunctionSet(table_function);
|
327
373
|
}
|
328
374
|
|
@@ -352,8 +398,13 @@ public:
|
|
352
398
|
}
|
353
399
|
}
|
354
400
|
|
355
|
-
|
356
|
-
|
401
|
+
// TODO: Allow overriding the MultiFileReader for COPY FROM?
|
402
|
+
auto multi_file_reader = MultiFileReader::CreateDefault("ParquetCopy");
|
403
|
+
vector<string> paths = {info.file_path};
|
404
|
+
auto file_list = multi_file_reader->CreateFileList(context, paths);
|
405
|
+
|
406
|
+
return ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), expected_types,
|
407
|
+
expected_names, parquet_options);
|
357
408
|
}
|
358
409
|
|
359
410
|
static unique_ptr<BaseStatistics> ParquetScanStats(ClientContext &context, const FunctionData *bind_data_p,
|
@@ -367,7 +418,8 @@ public:
|
|
367
418
|
// NOTE: we do not want to parse the Parquet metadata for the sole purpose of getting column statistics
|
368
419
|
|
369
420
|
auto &config = DBConfig::GetConfig(context);
|
370
|
-
|
421
|
+
|
422
|
+
if (bind_data.file_list->GetExpandResult() != FileExpandResult::MULTIPLE_FILES) {
|
371
423
|
if (bind_data.initial_reader) {
|
372
424
|
// most common path, scanning single parquet file
|
373
425
|
return bind_data.initial_reader->ReadStatistics(bind_data.names[column_index]);
|
@@ -384,8 +436,7 @@ public:
|
|
384
436
|
// enabled at all)
|
385
437
|
FileSystem &fs = FileSystem::GetFileSystem(context);
|
386
438
|
|
387
|
-
for (
|
388
|
-
auto &file_name = bind_data.files[file_idx];
|
439
|
+
for (const auto &file_name : bind_data.file_list->Files()) {
|
389
440
|
auto metadata = cache.Get<ParquetFileMetadataCache>(file_name);
|
390
441
|
if (!metadata) {
|
391
442
|
// missing metadata entry in cache, no usable stats
|
@@ -422,17 +473,33 @@ public:
|
|
422
473
|
return nullptr;
|
423
474
|
}
|
424
475
|
|
425
|
-
static unique_ptr<FunctionData> ParquetScanBindInternal(ClientContext &context,
|
476
|
+
static unique_ptr<FunctionData> ParquetScanBindInternal(ClientContext &context,
|
477
|
+
unique_ptr<MultiFileReader> multi_file_reader,
|
478
|
+
unique_ptr<MultiFileList> file_list,
|
426
479
|
vector<LogicalType> &return_types, vector<string> &names,
|
427
480
|
ParquetOptions parquet_options) {
|
428
481
|
auto result = make_uniq<ParquetReadBindData>();
|
429
|
-
result->
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
482
|
+
result->multi_file_reader = std::move(multi_file_reader);
|
483
|
+
result->file_list = std::move(file_list);
|
484
|
+
|
485
|
+
bool bound_on_first_file = true;
|
486
|
+
if (result->multi_file_reader->Bind(parquet_options.file_options, *result->file_list, result->types,
|
487
|
+
result->names, result->reader_bind)) {
|
488
|
+
result->multi_file_reader->BindOptions(parquet_options.file_options, *result->file_list, result->types,
|
489
|
+
result->names, result->reader_bind);
|
490
|
+
// Enable the parquet file_row_number on the parquet options if the file_row_number_idx was set
|
491
|
+
if (result->reader_bind.file_row_number_idx != DConstants::INVALID_INDEX) {
|
492
|
+
parquet_options.file_row_number = true;
|
493
|
+
}
|
494
|
+
bound_on_first_file = false;
|
495
|
+
} else if (!parquet_options.schema.empty()) {
|
496
|
+
// A schema was supplied: use the schema for binding
|
435
497
|
result->reader_bind = BindSchema(context, result->types, result->names, *result, parquet_options);
|
498
|
+
} else {
|
499
|
+
parquet_options.file_options.AutoDetectHivePartitioning(*result->file_list, context);
|
500
|
+
// Default bind
|
501
|
+
result->reader_bind = result->multi_file_reader->BindReader<ParquetReader>(
|
502
|
+
context, result->types, result->names, *result->file_list, *result, parquet_options);
|
436
503
|
}
|
437
504
|
|
438
505
|
if (return_types.empty()) {
|
@@ -441,9 +508,11 @@ public:
|
|
441
508
|
names = result->names;
|
442
509
|
} else {
|
443
510
|
if (return_types.size() != result->types.size()) {
|
511
|
+
auto file_string = bound_on_first_file ? result->file_list->GetFirstFile()
|
512
|
+
: StringUtil::Join(result->file_list->GetPaths(), ",");
|
444
513
|
throw std::runtime_error(StringUtil::Format(
|
445
|
-
"Failed to read file \"%s\" - column count mismatch: expected %d columns but found %d",
|
446
|
-
|
514
|
+
"Failed to read file(s) \"%s\" - column count mismatch: expected %d columns but found %d",
|
515
|
+
file_string, return_types.size(), result->types.size()));
|
447
516
|
}
|
448
517
|
// expected types - overwrite the types we want to read instead
|
449
518
|
result->types = return_types;
|
@@ -454,11 +523,12 @@ public:
|
|
454
523
|
|
455
524
|
static unique_ptr<FunctionData> ParquetScanBind(ClientContext &context, TableFunctionBindInput &input,
|
456
525
|
vector<LogicalType> &return_types, vector<string> &names) {
|
457
|
-
auto
|
526
|
+
auto multi_file_reader = MultiFileReader::Create(input.table_function);
|
527
|
+
|
458
528
|
ParquetOptions parquet_options(context);
|
459
529
|
for (auto &kv : input.named_parameters) {
|
460
530
|
auto loption = StringUtil::Lower(kv.first);
|
461
|
-
if (
|
531
|
+
if (multi_file_reader->ParseOption(kv.first, kv.second, parquet_options.file_options, context)) {
|
462
532
|
continue;
|
463
533
|
}
|
464
534
|
if (loption == "binary_as_string") {
|
@@ -484,23 +554,27 @@ public:
|
|
484
554
|
parquet_options.encryption_config = ParquetEncryptionConfig::Create(context, kv.second);
|
485
555
|
}
|
486
556
|
}
|
487
|
-
|
488
|
-
|
557
|
+
|
558
|
+
auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0]);
|
559
|
+
return ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), return_types, names,
|
560
|
+
parquet_options);
|
489
561
|
}
|
490
562
|
|
491
563
|
static double ParquetProgress(ClientContext &context, const FunctionData *bind_data_p,
|
492
564
|
const GlobalTableFunctionState *global_state) {
|
493
565
|
auto &bind_data = bind_data_p->Cast<ParquetReadBindData>();
|
494
566
|
auto &gstate = global_state->Cast<ParquetReadGlobalState>();
|
495
|
-
|
567
|
+
|
568
|
+
auto total_count = bind_data.file_list->GetTotalFileCount();
|
569
|
+
if (total_count == 0) {
|
496
570
|
return 100.0;
|
497
571
|
}
|
498
572
|
if (bind_data.initial_file_cardinality == 0) {
|
499
|
-
return (100.0 * (gstate.file_index + 1)) /
|
573
|
+
return (100.0 * (gstate.file_index + 1)) / total_count;
|
500
574
|
}
|
501
575
|
auto percentage = MinValue<double>(
|
502
576
|
100.0, (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100.0 / bind_data.initial_file_cardinality));
|
503
|
-
return (percentage + 100.0 * gstate.file_index) /
|
577
|
+
return (percentage + 100.0 * gstate.file_index) / total_count;
|
504
578
|
}
|
505
579
|
|
506
580
|
static unique_ptr<LocalTableFunctionState>
|
@@ -511,7 +585,8 @@ public:
|
|
511
585
|
auto result = make_uniq<ParquetReadLocalState>();
|
512
586
|
result->is_parallel = true;
|
513
587
|
result->batch_index = 0;
|
514
|
-
|
588
|
+
|
589
|
+
if (gstate.CanRemoveColumns()) {
|
515
590
|
result->all_columns.Initialize(context.client, gstate.scanned_types);
|
516
591
|
}
|
517
592
|
if (!ParquetParallelStateNext(context.client, bind_data, *result, gstate)) {
|
@@ -524,35 +599,46 @@ public:
|
|
524
599
|
TableFunctionInitInput &input) {
|
525
600
|
auto &bind_data = input.bind_data->CastNoConst<ParquetReadBindData>();
|
526
601
|
auto result = make_uniq<ParquetReadGlobalState>();
|
527
|
-
|
528
|
-
|
529
|
-
result->
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
result->readers =
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
602
|
+
bind_data.file_list->InitializeScan(result->file_list_scan);
|
603
|
+
|
604
|
+
result->multi_file_reader_state = bind_data.multi_file_reader->InitializeGlobalState(
|
605
|
+
context, bind_data.parquet_options.file_options, bind_data.reader_bind, *bind_data.file_list,
|
606
|
+
bind_data.types, bind_data.names, input.column_ids);
|
607
|
+
if (bind_data.file_list->IsEmpty()) {
|
608
|
+
result->readers = {};
|
609
|
+
} else if (!bind_data.union_readers.empty()) {
|
610
|
+
// TODO: confirm we are not changing behaviour by modifying the order here?
|
611
|
+
for (auto &reader : bind_data.union_readers) {
|
612
|
+
if (!reader) {
|
613
|
+
break;
|
614
|
+
}
|
615
|
+
result->readers.push_back(ParquetFileReaderData(std::move(reader)));
|
538
616
|
}
|
539
|
-
if (bind_data.
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
result->
|
544
|
-
}
|
545
|
-
|
546
|
-
|
547
|
-
|
617
|
+
if (result->readers.size() != bind_data.file_list->GetTotalFileCount()) {
|
618
|
+
// This case happens with recursive CTEs: the first execution the readers have already
|
619
|
+
// been moved out of the bind data.
|
620
|
+
// FIXME: clean up this process and make it more explicit
|
621
|
+
result->readers = {};
|
622
|
+
}
|
623
|
+
} else if (bind_data.initial_reader) {
|
624
|
+
// Ensure the initial reader was actually constructed from the first file
|
625
|
+
if (bind_data.initial_reader->file_name != bind_data.file_list->GetFirstFile()) {
|
626
|
+
throw InternalException("First file from list ('%s') does not match first reader ('%s')",
|
627
|
+
bind_data.initial_reader->file_name, bind_data.file_list->GetFirstFile());
|
548
628
|
}
|
549
|
-
result->
|
629
|
+
result->readers.emplace_back(std::move(bind_data.initial_reader));
|
550
630
|
}
|
551
|
-
|
552
|
-
|
553
|
-
|
631
|
+
|
632
|
+
// Ensure all readers are initialized and FileListScan is sync with readers list
|
633
|
+
for (auto &reader_data : result->readers) {
|
634
|
+
string file_name;
|
635
|
+
idx_t file_idx = result->file_list_scan.current_file_idx;
|
636
|
+
bind_data.file_list->Scan(result->file_list_scan, file_name);
|
637
|
+
if (file_name != reader_data.reader->file_name) {
|
638
|
+
throw InternalException("Mismatch in filename order and reader order in parquet scan");
|
554
639
|
}
|
555
|
-
InitializeParquetReader(*reader, bind_data, input.column_ids, input.filters, context
|
640
|
+
InitializeParquetReader(*reader_data.reader, bind_data, input.column_ids, input.filters, context, file_idx,
|
641
|
+
result->multi_file_reader_state);
|
556
642
|
}
|
557
643
|
|
558
644
|
result->column_ids = input.column_ids;
|
@@ -561,7 +647,10 @@ public:
|
|
561
647
|
result->file_index = 0;
|
562
648
|
result->batch_index = 0;
|
563
649
|
result->max_threads = ParquetScanMaxThreads(context, input.bind_data.get());
|
564
|
-
|
650
|
+
|
651
|
+
bool require_extra_columns =
|
652
|
+
result->multi_file_reader_state && result->multi_file_reader_state->RequiresExtraColumns();
|
653
|
+
if (input.CanRemoveFilterColumns() || require_extra_columns) {
|
565
654
|
result->projection_ids = input.projection_ids;
|
566
655
|
const auto table_types = bind_data.types;
|
567
656
|
for (const auto &col_idx : input.column_ids) {
|
@@ -572,6 +661,13 @@ public:
|
|
572
661
|
}
|
573
662
|
}
|
574
663
|
}
|
664
|
+
|
665
|
+
if (require_extra_columns) {
|
666
|
+
for (const auto &column_type : result->multi_file_reader_state->extra_columns) {
|
667
|
+
result->scanned_types.push_back(column_type);
|
668
|
+
}
|
669
|
+
}
|
670
|
+
|
575
671
|
return std::move(result);
|
576
672
|
}
|
577
673
|
|
@@ -585,7 +681,8 @@ public:
|
|
585
681
|
static void ParquetScanSerialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
|
586
682
|
const TableFunction &function) {
|
587
683
|
auto &bind_data = bind_data_p->Cast<ParquetReadBindData>();
|
588
|
-
|
684
|
+
|
685
|
+
serializer.WriteProperty(100, "files", bind_data.file_list->GetAllFiles());
|
589
686
|
serializer.WriteProperty(101, "types", bind_data.types);
|
590
687
|
serializer.WriteProperty(102, "names", bind_data.names);
|
591
688
|
serializer.WriteProperty(103, "parquet_options", bind_data.parquet_options);
|
@@ -597,7 +694,17 @@ public:
|
|
597
694
|
auto types = deserializer.ReadProperty<vector<LogicalType>>(101, "types");
|
598
695
|
auto names = deserializer.ReadProperty<vector<string>>(102, "names");
|
599
696
|
auto parquet_options = deserializer.ReadProperty<ParquetOptions>(103, "parquet_options");
|
600
|
-
|
697
|
+
|
698
|
+
vector<Value> file_path;
|
699
|
+
for (auto &path : files) {
|
700
|
+
file_path.emplace_back(path);
|
701
|
+
}
|
702
|
+
|
703
|
+
auto multi_file_reader = MultiFileReader::Create(function);
|
704
|
+
auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path),
|
705
|
+
FileGlobOptions::DISALLOW_EMPTY);
|
706
|
+
return ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), types, names,
|
707
|
+
parquet_options);
|
601
708
|
}
|
602
709
|
|
603
710
|
static void ParquetScanImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
@@ -609,14 +716,16 @@ public:
|
|
609
716
|
auto &bind_data = data_p.bind_data->CastNoConst<ParquetReadBindData>();
|
610
717
|
|
611
718
|
do {
|
612
|
-
if (gstate.
|
719
|
+
if (gstate.CanRemoveColumns()) {
|
613
720
|
data.all_columns.Reset();
|
614
721
|
data.reader->Scan(data.scan_state, data.all_columns);
|
615
|
-
|
722
|
+
bind_data.multi_file_reader->FinalizeChunk(context, bind_data.reader_bind, data.reader->reader_data,
|
723
|
+
data.all_columns, gstate.multi_file_reader_state);
|
616
724
|
output.ReferenceColumns(data.all_columns, gstate.projection_ids);
|
617
725
|
} else {
|
618
726
|
data.reader->Scan(data.scan_state, output);
|
619
|
-
|
727
|
+
bind_data.multi_file_reader->FinalizeChunk(context, bind_data.reader_bind, data.reader->reader_data,
|
728
|
+
output, gstate.multi_file_reader_state);
|
620
729
|
}
|
621
730
|
|
622
731
|
bind_data.chunk_count++;
|
@@ -631,17 +740,33 @@ public:
|
|
631
740
|
|
632
741
|
static unique_ptr<NodeStatistics> ParquetCardinality(ClientContext &context, const FunctionData *bind_data) {
|
633
742
|
auto &data = bind_data->Cast<ParquetReadBindData>();
|
634
|
-
return make_uniq<NodeStatistics>(data.initial_file_cardinality * data.
|
743
|
+
return make_uniq<NodeStatistics>(data.initial_file_cardinality * data.file_list->GetTotalFileCount());
|
635
744
|
}
|
636
745
|
|
637
746
|
static idx_t ParquetScanMaxThreads(ClientContext &context, const FunctionData *bind_data) {
|
638
747
|
auto &data = bind_data->Cast<ParquetReadBindData>();
|
639
|
-
|
748
|
+
|
749
|
+
if (data.file_list->GetExpandResult() == FileExpandResult::MULTIPLE_FILES) {
|
640
750
|
return TaskScheduler::GetScheduler(context).NumberOfThreads();
|
641
751
|
}
|
752
|
+
|
642
753
|
return MaxValue(data.initial_file_row_groups, (idx_t)1);
|
643
754
|
}
|
644
755
|
|
756
|
+
// Queries the metadataprovider for another file to scan, updating the files/reader lists in the process.
|
757
|
+
// Returns true if resized
|
758
|
+
static bool ResizeFiles(const ParquetReadBindData &bind_data, ParquetReadGlobalState ¶llel_state) {
|
759
|
+
string scanned_file;
|
760
|
+
if (!bind_data.file_list->Scan(parallel_state.file_list_scan, scanned_file)) {
|
761
|
+
return false;
|
762
|
+
}
|
763
|
+
|
764
|
+
// Push the file in the reader data, to be opened later
|
765
|
+
parallel_state.readers.emplace_back(scanned_file);
|
766
|
+
|
767
|
+
return true;
|
768
|
+
}
|
769
|
+
|
645
770
|
// This function looks for the next available row group. If not available, it will open files from bind_data.files
|
646
771
|
// until there is a row group available for scanning or the files runs out
|
647
772
|
static bool ParquetParallelStateNext(ClientContext &context, const ParquetReadBindData &bind_data,
|
@@ -653,17 +778,15 @@ public:
|
|
653
778
|
return false;
|
654
779
|
}
|
655
780
|
|
656
|
-
if (parallel_state.file_index >= parallel_state.readers.size()) {
|
781
|
+
if (parallel_state.file_index >= parallel_state.readers.size() && !ResizeFiles(bind_data, parallel_state)) {
|
657
782
|
return false;
|
658
783
|
}
|
659
784
|
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
if (parallel_state.row_group_index <
|
664
|
-
parallel_state.readers[parallel_state.file_index]->NumRowGroups()) {
|
785
|
+
auto ¤t_reader_data = parallel_state.readers[parallel_state.file_index];
|
786
|
+
if (current_reader_data.file_state == ParquetFileState::OPEN) {
|
787
|
+
if (parallel_state.row_group_index < current_reader_data.reader->NumRowGroups()) {
|
665
788
|
// The current reader has rowgroups left to be scanned
|
666
|
-
scan_data.reader =
|
789
|
+
scan_data.reader = current_reader_data.reader;
|
667
790
|
vector<idx_t> group_indexes {parallel_state.row_group_index};
|
668
791
|
scan_data.reader->InitializeScan(scan_data.scan_state, group_indexes);
|
669
792
|
scan_data.batch_index = parallel_state.batch_index++;
|
@@ -672,16 +795,13 @@ public:
|
|
672
795
|
return true;
|
673
796
|
} else {
|
674
797
|
// Close current file
|
675
|
-
|
676
|
-
|
798
|
+
current_reader_data.file_state = ParquetFileState::CLOSED;
|
799
|
+
current_reader_data.reader = nullptr;
|
677
800
|
|
678
801
|
// Set state to the next file
|
679
802
|
parallel_state.file_index++;
|
680
803
|
parallel_state.row_group_index = 0;
|
681
804
|
|
682
|
-
if (parallel_state.file_index >= bind_data.files.size()) {
|
683
|
-
return false;
|
684
|
-
}
|
685
805
|
continue;
|
686
806
|
}
|
687
807
|
}
|
@@ -691,7 +811,7 @@ public:
|
|
691
811
|
}
|
692
812
|
|
693
813
|
// Check if the current file is being opened, in that case we need to wait for it.
|
694
|
-
if (parallel_state.
|
814
|
+
if (parallel_state.readers[parallel_state.file_index].file_state == ParquetFileState::OPENING) {
|
695
815
|
WaitForFile(parallel_state.file_index, parallel_state, parallel_lock);
|
696
816
|
}
|
697
817
|
}
|
@@ -701,10 +821,12 @@ public:
|
|
701
821
|
vector<unique_ptr<Expression>> &filters) {
|
702
822
|
auto &data = bind_data_p->Cast<ParquetReadBindData>();
|
703
823
|
|
704
|
-
auto
|
705
|
-
|
706
|
-
|
707
|
-
|
824
|
+
auto new_list = data.multi_file_reader->ComplexFilterPushdown(context, *data.file_list,
|
825
|
+
data.parquet_options.file_options, get, filters);
|
826
|
+
|
827
|
+
if (new_list) {
|
828
|
+
data.file_list = std::move(new_list);
|
829
|
+
MultiFileReader::PruneReaders(data, *data.file_list);
|
708
830
|
}
|
709
831
|
}
|
710
832
|
|
@@ -712,9 +834,14 @@ public:
|
|
712
834
|
static void WaitForFile(idx_t file_index, ParquetReadGlobalState ¶llel_state,
|
713
835
|
unique_lock<mutex> ¶llel_lock) {
|
714
836
|
while (true) {
|
715
|
-
|
837
|
+
|
838
|
+
// Get pointer to file mutex before unlocking
|
839
|
+
auto &file_mutex = *parallel_state.readers[file_index].file_mutex;
|
840
|
+
|
841
|
+
// To get the file lock, we first need to release the parallel_lock to prevent deadlocking. Note that this
|
842
|
+
// requires getting the ref to the file mutex pointer with the lock stil held: readers get be resized
|
716
843
|
parallel_lock.unlock();
|
717
|
-
unique_lock<mutex> current_file_lock(
|
844
|
+
unique_lock<mutex> current_file_lock(file_mutex);
|
718
845
|
parallel_lock.lock();
|
719
846
|
|
720
847
|
// Here we have both locks which means we can stop waiting if:
|
@@ -722,7 +849,7 @@ public:
|
|
722
849
|
// - the thread opening the file has failed
|
723
850
|
// - the file was somehow scanned till the end while we were waiting
|
724
851
|
if (parallel_state.file_index >= parallel_state.readers.size() ||
|
725
|
-
parallel_state.
|
852
|
+
parallel_state.readers[parallel_state.file_index].file_state != ParquetFileState::OPENING ||
|
726
853
|
parallel_state.error_opening_file) {
|
727
854
|
return;
|
728
855
|
}
|
@@ -734,24 +861,29 @@ public:
|
|
734
861
|
ParquetReadLocalState &scan_data, ParquetReadGlobalState ¶llel_state,
|
735
862
|
unique_lock<mutex> ¶llel_lock) {
|
736
863
|
const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
737
|
-
|
864
|
+
|
865
|
+
const auto file_index_limit =
|
866
|
+
MinValue<idx_t>(parallel_state.file_index + num_threads, parallel_state.readers.size());
|
867
|
+
|
738
868
|
for (idx_t i = parallel_state.file_index; i < file_index_limit; i++) {
|
739
|
-
if (parallel_state.
|
740
|
-
|
741
|
-
|
742
|
-
auto pq_options =
|
869
|
+
if (parallel_state.readers[i].file_state == ParquetFileState::UNOPENED) {
|
870
|
+
auto ¤t_reader_data = parallel_state.readers[i];
|
871
|
+
current_reader_data.file_state = ParquetFileState::OPENING;
|
872
|
+
auto pq_options = bind_data.parquet_options;
|
873
|
+
|
874
|
+
// Get pointer to file mutex before unlocking
|
875
|
+
auto ¤t_file_lock = *current_reader_data.file_mutex;
|
743
876
|
|
744
877
|
// Now we switch which lock we are holding, instead of locking the global state, we grab the lock on
|
745
878
|
// the file we are opening. This file lock allows threads to wait for a file to be opened.
|
746
879
|
parallel_lock.unlock();
|
747
|
-
|
748
|
-
unique_lock<mutex> file_lock(parallel_state.file_mutexes[i]);
|
880
|
+
unique_lock<mutex> file_lock(current_file_lock);
|
749
881
|
|
750
882
|
shared_ptr<ParquetReader> reader;
|
751
883
|
try {
|
752
|
-
reader =
|
884
|
+
reader = make_shared_ptr<ParquetReader>(context, current_reader_data.file_to_be_opened, pq_options);
|
753
885
|
InitializeParquetReader(*reader, bind_data, parallel_state.column_ids, parallel_state.filters,
|
754
|
-
context);
|
886
|
+
context, i, parallel_state.multi_file_reader_state);
|
755
887
|
} catch (...) {
|
756
888
|
parallel_lock.lock();
|
757
889
|
parallel_state.error_opening_file = true;
|
@@ -760,8 +892,8 @@ public:
|
|
760
892
|
|
761
893
|
// Now re-lock the state and add the reader
|
762
894
|
parallel_lock.lock();
|
763
|
-
|
764
|
-
|
895
|
+
current_reader_data.reader = reader;
|
896
|
+
current_reader_data.file_state = ParquetFileState::OPEN;
|
765
897
|
|
766
898
|
return true;
|
767
899
|
}
|
@@ -1004,6 +1136,8 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
|
|
1004
1136
|
"dictionary compression");
|
1005
1137
|
}
|
1006
1138
|
bind_data->dictionary_compression_ratio_threshold = val;
|
1139
|
+
} else if (loption == "compression_level") {
|
1140
|
+
bind_data->compression_level = option.second[0].GetValue<uint64_t>();
|
1007
1141
|
} else {
|
1008
1142
|
throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
|
1009
1143
|
}
|
@@ -1029,10 +1163,10 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
|
|
1029
1163
|
auto &parquet_bind = bind_data.Cast<ParquetWriteBindData>();
|
1030
1164
|
|
1031
1165
|
auto &fs = FileSystem::GetFileSystem(context);
|
1032
|
-
global_state->writer =
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1166
|
+
global_state->writer = make_uniq<ParquetWriter>(
|
1167
|
+
fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec,
|
1168
|
+
parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config,
|
1169
|
+
parquet_bind.dictionary_compression_ratio_threshold, parquet_bind.compression_level);
|
1036
1170
|
return std::move(global_state);
|
1037
1171
|
}
|
1038
1172
|
|
@@ -1153,6 +1287,7 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin
|
|
1153
1287
|
bind_data.encryption_config, nullptr);
|
1154
1288
|
serializer.WriteProperty(108, "dictionary_compression_ratio_threshold",
|
1155
1289
|
bind_data.dictionary_compression_ratio_threshold);
|
1290
|
+
serializer.WritePropertyWithDefault<optional_idx>(109, "compression_level", bind_data.compression_level);
|
1156
1291
|
}
|
1157
1292
|
|
1158
1293
|
static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) {
|
@@ -1168,6 +1303,7 @@ static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserialize
|
|
1168
1303
|
data->encryption_config, nullptr);
|
1169
1304
|
deserializer.ReadPropertyWithDefault<double>(108, "dictionary_compression_ratio_threshold",
|
1170
1305
|
data->dictionary_compression_ratio_threshold, 1.0);
|
1306
|
+
deserializer.ReadPropertyWithDefault<optional_idx>(109, "compression_level", data->compression_level);
|
1171
1307
|
return std::move(data);
|
1172
1308
|
}
|
1173
1309
|
// LCOV_EXCL_STOP
|
@@ -1229,8 +1365,9 @@ idx_t ParquetWriteFileSize(GlobalFunctionData &gstate) {
|
|
1229
1365
|
//===--------------------------------------------------------------------===//
|
1230
1366
|
// Scan Replacement
|
1231
1367
|
//===--------------------------------------------------------------------===//
|
1232
|
-
unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context,
|
1233
|
-
ReplacementScanData
|
1368
|
+
unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, ReplacementScanInput &input,
|
1369
|
+
optional_ptr<ReplacementScanData> data) {
|
1370
|
+
auto &table_name = input.table_name;
|
1234
1371
|
if (!ReplacementScan::CanReplace(table_name, {"parquet"})) {
|
1235
1372
|
return nullptr;
|
1236
1373
|
}
|
@@ -1310,6 +1447,14 @@ std::string ParquetExtension::Name() {
|
|
1310
1447
|
return "parquet";
|
1311
1448
|
}
|
1312
1449
|
|
1450
|
+
std::string ParquetExtension::Version() const {
|
1451
|
+
#ifdef EXT_VERSION_PARQUET
|
1452
|
+
return EXT_VERSION_PARQUET;
|
1453
|
+
#else
|
1454
|
+
return "";
|
1455
|
+
#endif
|
1456
|
+
}
|
1457
|
+
|
1313
1458
|
} // namespace duckdb
|
1314
1459
|
|
1315
1460
|
#ifdef DUCKDB_BUILD_LOADABLE_EXTENSION
|