duckdb 0.8.2-dev37.0 → 0.8.2-dev3989.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/binding.gyp +29 -13
- package/binding.gyp.in +1 -1
- package/configure.py +11 -3
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +10 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +162 -41
- package/src/duckdb/extension/icu/icu-datesub.cpp +3 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +2 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +19 -6
- package/src/duckdb/extension/icu/icu-strptime.cpp +5 -24
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +10 -12
- package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_deserializer.cpp +37 -73
- package/src/duckdb/extension/json/json_enums.cpp +105 -0
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +93 -38
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +26 -25
- package/src/duckdb/extension/json/json_scan.cpp +47 -6
- package/src/duckdb/extension/json/json_serializer.cpp +29 -72
- package/src/duckdb/extension/json/serialize_json.cpp +92 -0
- package/src/duckdb/extension/parquet/column_reader.cpp +37 -25
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +4 -0
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +1 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +28 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +258 -40
- package/src/duckdb/extension/parquet/parquet_reader.cpp +10 -10
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +25 -8
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +6 -0
- package/src/duckdb/extension/parquet/parquet_writer.cpp +149 -31
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog.cpp +147 -70
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -11
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +17 -41
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +2 -10
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +4 -14
- package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +11 -28
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +11 -42
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -26
- package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +11 -27
- package/src/duckdb/src/catalog/catalog_entry.cpp +25 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/catalog_set.cpp +0 -63
- package/src/duckdb/src/catalog/default/default_functions.cpp +21 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +0 -36
- package/src/duckdb/src/common/adbc/adbc.cpp +541 -171
- package/src/duckdb/src/common/adbc/driver_manager.cpp +92 -39
- package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
- package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
- package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
- package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
- package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +95 -666
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +65 -37
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -42
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/constants.cpp +2 -1
- package/src/duckdb/src/common/enum_util.cpp +4979 -4458
- package/src/duckdb/src/common/enums/date_part_specifier.cpp +2 -0
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +15 -2
- package/src/duckdb/src/common/extra_type_info.cpp +487 -0
- package/src/duckdb/src/common/field_writer.cpp +1 -1
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/file_system.cpp +46 -12
- package/src/duckdb/src/common/filename_pattern.cpp +1 -1
- package/src/duckdb/src/common/gzip_file_system.cpp +7 -12
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/http_state.cpp +78 -0
- package/src/duckdb/src/common/local_file_system.cpp +36 -28
- package/src/duckdb/src/common/multi_file_reader.cpp +193 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +92 -1
- package/src/duckdb/src/common/operator/string_cast.cpp +45 -8
- package/src/duckdb/src/common/radix_partitioning.cpp +34 -39
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +63 -73
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +85 -80
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +0 -9
- package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +102 -74
- package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +68 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +20 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +46 -10
- package/src/duckdb/src/common/types/date.cpp +15 -0
- package/src/duckdb/src/common/types/hugeint.cpp +40 -0
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +6 -0
- package/src/duckdb/src/common/types/list_segment.cpp +56 -198
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +251 -131
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +41 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/string_heap.cpp +4 -0
- package/src/duckdb/src/common/types/time.cpp +105 -0
- package/src/duckdb/src/common/types/timestamp.cpp +7 -0
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/value.cpp +99 -60
- package/src/duckdb/src/common/types/vector.cpp +73 -80
- package/src/duckdb/src/common/types.cpp +38 -724
- package/src/duckdb/src/common/virtual_file_system.cpp +142 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +64 -19
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +30 -0
- package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +1 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +83 -59
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +10 -4
- package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +380 -89
- package/src/duckdb/src/core_functions/scalar/date/date_sub.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +4 -0
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
- package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
- package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +16 -12
- package/src/duckdb/src/core_functions/scalar/generic/current_setting.cpp +3 -1
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +23 -3
- package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
- package/src/duckdb/src/core_functions/scalar/union/union_tag.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +226 -346
- package/src/duckdb/src/execution/column_binding_resolver.cpp +10 -7
- package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +219 -259
- package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +11 -15
- package/src/duckdb/src/execution/index/art/iterator.cpp +130 -214
- package/src/duckdb/src/execution/index/art/leaf.cpp +300 -266
- package/src/duckdb/src/execution/index/art/node.cpp +211 -205
- package/src/duckdb/src/execution/index/art/node16.cpp +10 -19
- package/src/duckdb/src/execution/index/art/node256.cpp +10 -18
- package/src/duckdb/src/execution/index/art/node4.cpp +21 -23
- package/src/duckdb/src/execution/index/art/node48.cpp +10 -20
- package/src/duckdb/src/execution/index/art/prefix.cpp +308 -338
- package/src/duckdb/src/execution/join_hashtable.cpp +9 -10
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +250 -317
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +231 -190
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +367 -1068
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +157 -174
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +67 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +46 -47
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +12 -9
- package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +10 -8
- package/src/duckdb/src/execution/operator/helper/physical_load.cpp +2 -1
- package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +3 -1
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +3 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +449 -288
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +28 -15
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +35 -17
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +31 -10
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +14 -10
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +11 -9
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +9 -7
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +14 -12
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +11 -11
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +24 -27
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -12
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +2 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +198 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -6
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +16 -7
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +37 -6
- package/src/duckdb/src/execution/physical_operator.cpp +20 -16
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +57 -35
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +32 -15
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +45 -34
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +2 -5
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -11
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +636 -349
- package/src/duckdb/src/execution/window_executor.cpp +1285 -0
- package/src/duckdb/src/execution/window_segment_tree.cpp +408 -144
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -13
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +6 -12
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/cast/string_cast.cpp +2 -2
- package/src/duckdb/src/function/cast/time_casts.cpp +7 -6
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +7 -2
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +7 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/operators/add.cpp +9 -0
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +6 -3
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +39 -5
- package/src/duckdb/src/function/scalar_function.cpp +5 -20
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
- package/src/duckdb/src/function/table/arrow.cpp +110 -88
- package/src/duckdb/src/function/table/arrow_conversion.cpp +86 -73
- package/src/duckdb/src/function/table/copy_csv.cpp +102 -97
- package/src/duckdb/src/function/table/read_csv.cpp +263 -141
- package/src/duckdb/src/function/table/system/test_all_types.cpp +48 -21
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +42 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +49 -2
- package/src/duckdb/src/function/table_function.cpp +4 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +20 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +3 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +1 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -6
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +2 -13
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -5
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +14 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +0 -6
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -5
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
- package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
- package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/constants.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +681 -577
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +4 -3
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +15 -1
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +215 -0
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +10 -8
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/index_vector.hpp +12 -0
- package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +11 -5
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +12 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/operator/add.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +65 -4
- package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +71 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +48 -39
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +128 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +186 -133
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +166 -121
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +27 -4
- package/src/duckdb/src/include/duckdb/common/serializer.hpp +0 -7
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +34 -13
- package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +11 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -5
- package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +46 -3
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +11 -15
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +46 -11
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +10 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types/time.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +16 -10
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +10 -3
- package/src/duckdb/src/include/duckdb/common/types.hpp +6 -25
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +40 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +7 -5
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +6 -4
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +4 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +12 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +128 -131
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -12
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +32 -28
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +46 -51
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +134 -53
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +7 -9
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -7
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +6 -5
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +5 -12
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +19 -19
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +61 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +22 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/schema/{physical_create_index.hpp → physical_create_art_index.hpp} +14 -7
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +6 -5
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +19 -21
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +79 -63
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -4
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +6 -1
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +80 -0
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +12 -4
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -3
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +24 -12
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +5 -1
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +4 -3
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +43 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +16 -14
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +16 -1
- package/src/duckdb/src/include/duckdb/main/connection.hpp +3 -4
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +27 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +210 -144
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +41 -6
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
- package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +71 -11
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +38 -64
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +8 -22
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -12
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +19 -30
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -3
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +9 -1
- package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +6 -5
- package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/constraint.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/check_constraint.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/foreign_key_constraint.hpp +6 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/not_null_constraint.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +6 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +21 -4
- package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +18 -2
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +12 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +66 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +8 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +8 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +9 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +9 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/load_info.hpp +17 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +22 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/show_select_info.hpp +7 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +23 -26
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +16 -5
- package/src/duckdb/src/include/duckdb/planner/bound_constraint.hpp +0 -8
- package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +9 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +3 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +24 -6
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +9 -2
- package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/joinside.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +3 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +12 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create.hpp +9 -6
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +12 -23
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +10 -6
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cross_product.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +9 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +7 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +6 -10
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_empty_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +8 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +11 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +52 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +7 -35
- package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_positional_join.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +10 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_sample.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_simple.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_pivotref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +33 -4
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +11 -11
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +8 -5
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +7 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +18 -3
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +8 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +98 -1
- package/src/duckdb/src/main/appender.cpp +3 -1
- package/src/duckdb/src/main/attached_database.cpp +2 -2
- package/src/duckdb/src/main/capi/arrow-c.cpp +196 -8
- package/src/duckdb/src/main/capi/duckdb-c.cpp +16 -0
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/capi/pending-c.cpp +23 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +106 -28
- package/src/duckdb/src/main/capi/result-c.cpp +3 -1
- package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
- package/src/duckdb/src/main/chunk_scan_state.cpp +48 -0
- package/src/duckdb/src/main/client_context.cpp +42 -19
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +18 -0
- package/src/duckdb/src/main/config.cpp +9 -3
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/database.cpp +3 -12
- package/src/duckdb/src/main/db_instance_cache.cpp +14 -6
- package/src/duckdb/src/main/extension/extension_helper.cpp +164 -88
- package/src/duckdb/src/main/extension/extension_install.cpp +76 -15
- package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
- package/src/duckdb/src/main/extension/extension_util.cpp +16 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/main/prepared_statement.cpp +38 -11
- package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
- package/src/duckdb/src/main/query_result.cpp +0 -21
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +6 -6
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/main/relation.cpp +10 -9
- package/src/duckdb/src/main/settings/settings.cpp +125 -33
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +2 -4
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +477 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +180 -323
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +23 -6
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1047
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +52 -41
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +34 -7
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +27 -10
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +3 -5
- package/src/duckdb/src/parallel/executor.cpp +25 -1
- package/src/duckdb/src/parallel/pipeline.cpp +0 -17
- package/src/duckdb/src/parallel/pipeline_executor.cpp +33 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
- package/src/duckdb/src/parallel/task_scheduler.cpp +18 -2
- package/src/duckdb/src/parser/column_definition.cpp +20 -32
- package/src/duckdb/src/parser/column_list.cpp +8 -0
- package/src/duckdb/src/parser/constraints/foreign_key_constraint.cpp +3 -0
- package/src/duckdb/src/parser/constraints/unique_constraint.cpp +3 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -25
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -19
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +5 -2
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +38 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +17 -1
- package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +2 -0
- package/src/duckdb/src/parser/parsed_data/detach_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/drop_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +0 -18
- package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +4 -1
- package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +62 -36
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -47
- package/src/duckdb/src/parser/result_modifier.cpp +0 -87
- package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
- package/src/duckdb/src/parser/statement/select_statement.cpp +0 -10
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -55
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +16 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +32 -17
- package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_load.cpp +1 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +44 -25
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +36 -33
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +14 -52
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +0 -23
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +70 -29
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +93 -28
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +67 -31
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +24 -1
- package/src/duckdb/src/planner/expression/bound_between_expression.cpp +4 -0
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +13 -8
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +28 -20
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +48 -4
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +5 -4
- package/src/duckdb/src/planner/expression_binder.cpp +23 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +19 -7
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -6
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +4 -2
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
- package/src/duckdb/src/planner/operator/logical_create.cpp +14 -0
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +36 -7
- package/src/duckdb/src/planner/operator/logical_create_table.cpp +16 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_delete.cpp +9 -2
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_distinct.cpp +13 -0
- package/src/duckdb/src/planner/operator/logical_explain.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +39 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +82 -4
- package/src/duckdb/src/planner/operator/logical_insert.cpp +8 -2
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +22 -0
- package/src/duckdb/src/planner/operator/logical_order.cpp +39 -0
- package/src/duckdb/src/planner/operator/logical_pivot.cpp +3 -0
- package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +5 -5
- package/src/duckdb/src/planner/operator/logical_sample.cpp +3 -0
- package/src/duckdb/src/planner/operator/logical_update.cpp +8 -2
- package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +4 -2
- package/src/duckdb/src/planner/planner.cpp +18 -7
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +13 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +13 -9
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
- package/src/duckdb/src/storage/checkpoint_manager.cpp +78 -72
- package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
- package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
- package/src/duckdb/src/storage/compression/fsst.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +52 -13
- package/src/duckdb/src/storage/data_table.cpp +36 -25
- package/src/duckdb/src/storage/index.cpp +4 -26
- package/src/duckdb/src/storage/local_storage.cpp +3 -4
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +98 -0
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +194 -0
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +283 -0
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +762 -0
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +62 -0
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +461 -0
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +421 -0
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +342 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +97 -0
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +22 -0
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +164 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +69 -51
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +44 -2
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +3 -2
- package/src/duckdb/src/storage/storage_manager.cpp +11 -5
- package/src/duckdb/src/storage/table/chunk_info.cpp +99 -3
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
- package/src/duckdb/src/storage/table/list_column_data.cpp +6 -3
- package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
- package/src/duckdb/src/storage/table/row_group.cpp +102 -20
- package/src/duckdb/src/storage/table/row_group_collection.cpp +23 -19
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/storage/wal_replay.cpp +26 -26
- package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +5 -2
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +11 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11019 -10364
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +10 -0
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +31 -1
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_common.cpp +4 -0
- package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
- package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
- package/src/duckdb/ub_src_common_serializer.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_execution.cpp +2 -2
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +1 -11
- package/src/duckdb/ub_src_execution_operator_schema.cpp +1 -1
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +2 -0
- package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +3 -3
- package/src/duckdb/ub_src_storage.cpp +0 -4
- package/src/duckdb/ub_src_storage_compression.cpp +2 -0
- package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +30 -0
- package/src/duckdb_node.hpp +1 -0
- package/src/statement.cpp +10 -5
- package/test/columns.test.ts +25 -3
- package/test/extension.test.ts +1 -1
- package/test/test_all_types.test.ts +234 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -193
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -172
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -69
- package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +0 -27
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_join.hpp +0 -32
- package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
- package/src/duckdb/src/parser/common_table_expression_info.cpp +0 -19
- package/src/duckdb/src/planner/operator/logical_asof_join.cpp +0 -14
- package/src/duckdb/src/planner/operator/logical_delim_join.cpp +0 -27
- package/src/duckdb/src/storage/meta_block_reader.cpp +0 -78
- package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -47,7 +47,7 @@ public:
|
|
47
47
|
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
|
48
48
|
unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
|
49
49
|
SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
|
50
|
-
|
50
|
+
SinkCombineResultType Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const override;
|
51
51
|
|
52
52
|
bool IsSink() const override {
|
53
53
|
return true;
|
package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp
RENAMED
@@ -1,7 +1,7 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/execution/operator/
|
4
|
+
// duckdb/execution/operator/scan/csv/base_csv_reader.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
@@ -15,9 +15,9 @@
|
|
15
15
|
#include "duckdb/common/enums/file_compression_type.hpp"
|
16
16
|
#include "duckdb/common/map.hpp"
|
17
17
|
#include "duckdb/common/queue.hpp"
|
18
|
-
#include "duckdb/execution/operator/
|
18
|
+
#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
|
19
19
|
#include "duckdb/common/multi_file_reader.hpp"
|
20
|
-
#include "duckdb/execution/operator/
|
20
|
+
#include "duckdb/execution/operator/scan/csv/csv_line_info.hpp"
|
21
21
|
|
22
22
|
#include <sstream>
|
23
23
|
|
@@ -30,19 +30,19 @@ struct StrpTimeFormat;
|
|
30
30
|
class FileOpener;
|
31
31
|
class FileSystem;
|
32
32
|
|
33
|
-
enum class ParserMode : uint8_t { PARSING = 0,
|
33
|
+
enum class ParserMode : uint8_t { PARSING = 0, SNIFFING_DATATYPES = 1, PARSING_HEADER = 2 };
|
34
34
|
|
35
35
|
//! Buffered CSV reader is a class that reads values from a stream and parses them as a CSV file
|
36
36
|
class BaseCSVReader {
|
37
37
|
public:
|
38
|
-
BaseCSVReader(ClientContext &context,
|
38
|
+
BaseCSVReader(ClientContext &context, CSVReaderOptions options,
|
39
39
|
const vector<LogicalType> &requested_types = vector<LogicalType>());
|
40
|
-
~BaseCSVReader();
|
40
|
+
virtual ~BaseCSVReader();
|
41
41
|
|
42
42
|
ClientContext &context;
|
43
43
|
FileSystem &fs;
|
44
44
|
Allocator &allocator;
|
45
|
-
|
45
|
+
CSVReaderOptions options;
|
46
46
|
vector<LogicalType> return_types;
|
47
47
|
vector<string> names;
|
48
48
|
MultiFileReaderData reader_data;
|
@@ -73,25 +73,25 @@ public:
|
|
73
73
|
const vector<LogicalType> &GetTypes() {
|
74
74
|
return return_types;
|
75
75
|
}
|
76
|
-
|
77
76
|
//! Get the 1-indexed global line number for the given local error line
|
78
|
-
virtual idx_t GetLineError(idx_t line_error, idx_t buffer_idx) {
|
77
|
+
virtual idx_t GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first = true) {
|
79
78
|
return line_error + 1;
|
80
79
|
};
|
81
80
|
|
82
81
|
//! Initialize projection indices to select all columns
|
83
82
|
void InitializeProjection();
|
84
83
|
|
84
|
+
static unique_ptr<CSVFileHandle> OpenCSV(ClientContext &context, const CSVReaderOptions &options);
|
85
|
+
|
86
|
+
static bool TryCastDateVector(map<LogicalTypeId, StrpTimeFormat> &options, Vector &input_vector,
|
87
|
+
Vector &result_vector, idx_t count, string &error_message, idx_t &line_error);
|
88
|
+
|
89
|
+
static bool TryCastTimestampVector(map<LogicalTypeId, StrpTimeFormat> &options, Vector &input_vector,
|
90
|
+
Vector &result_vector, idx_t count, string &error_message);
|
91
|
+
|
85
92
|
protected:
|
86
93
|
//! Initializes the parse_chunk with varchar columns and aligns info with new number of cols
|
87
94
|
void InitParseChunk(idx_t num_cols);
|
88
|
-
//! Change the date format for the type to the string
|
89
|
-
void SetDateFormat(const string &format_specifier, const LogicalTypeId &sql_type);
|
90
|
-
//! Try to cast a string value to the specified sql type
|
91
|
-
bool TryCastValue(const Value &value, const LogicalType &sql_type);
|
92
|
-
//! Try to cast a vector of values to the specified sql type
|
93
|
-
bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
|
94
|
-
|
95
95
|
//! Adds a value to the current row
|
96
96
|
void AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes,
|
97
97
|
idx_t buffer_idx = 0);
|
@@ -100,8 +100,6 @@ protected:
|
|
100
100
|
//! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk
|
101
101
|
bool Flush(DataChunk &insert_chunk, idx_t buffer_idx = 0, bool try_add_line = false);
|
102
102
|
|
103
|
-
unique_ptr<CSVFileHandle> OpenCSV(const BufferedCSVReaderOptions &options);
|
104
|
-
|
105
103
|
void VerifyUTF8(idx_t col_idx);
|
106
104
|
void VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset = 0);
|
107
105
|
string GetLineNumberStr(idx_t linenr, bool linenr_estimated, idx_t buffer_idx = 0);
|
@@ -109,11 +107,13 @@ protected:
|
|
109
107
|
//! Sets the newline delimiter
|
110
108
|
void SetNewLineDelimiter(bool carry = false, bool carry_followed_by_nl = false);
|
111
109
|
|
110
|
+
//! Verifies that the line length did not go over a pre-defined limit.
|
111
|
+
void VerifyLineLength(idx_t line_size, idx_t buffer_idx = 0);
|
112
|
+
|
112
113
|
protected:
|
113
114
|
//! Whether or not the current row's columns have overflown return_types.size()
|
114
115
|
bool error_column_overflow = false;
|
115
116
|
//! Number of sniffed columns - only used when auto-detecting
|
116
|
-
vector<idx_t> sniffed_column_counts;
|
117
117
|
};
|
118
118
|
|
119
119
|
} // namespace duckdb
|
@@ -0,0 +1,72 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
|
12
|
+
#include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
|
13
|
+
#include "duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
struct CopyInfo;
|
17
|
+
struct CSVFileHandle;
|
18
|
+
struct FileHandle;
|
19
|
+
struct StrpTimeFormat;
|
20
|
+
|
21
|
+
class FileOpener;
|
22
|
+
class FileSystem;
|
23
|
+
|
24
|
+
//! Buffered CSV reader is a class that reads values from a stream and parses them as a CSV file
|
25
|
+
class BufferedCSVReader : public BaseCSVReader {
|
26
|
+
//! Initial buffer read size; can be extended for long lines
|
27
|
+
static constexpr idx_t INITIAL_BUFFER_SIZE = 16384;
|
28
|
+
//! Larger buffer size for non disk files
|
29
|
+
static constexpr idx_t INITIAL_BUFFER_SIZE_LARGE = 10000000; // 10MB
|
30
|
+
|
31
|
+
public:
|
32
|
+
BufferedCSVReader(ClientContext &context, CSVReaderOptions options,
|
33
|
+
const vector<LogicalType> &requested_types = vector<LogicalType>());
|
34
|
+
BufferedCSVReader(ClientContext &context, string filename, CSVReaderOptions options,
|
35
|
+
const vector<LogicalType> &requested_types = vector<LogicalType>());
|
36
|
+
virtual ~BufferedCSVReader() {
|
37
|
+
}
|
38
|
+
|
39
|
+
unsafe_unique_array<char> buffer;
|
40
|
+
idx_t buffer_size;
|
41
|
+
idx_t position;
|
42
|
+
idx_t start = 0;
|
43
|
+
|
44
|
+
vector<unsafe_unique_array<char>> cached_buffers;
|
45
|
+
|
46
|
+
unique_ptr<CSVFileHandle> file_handle;
|
47
|
+
//! CSV State Machine Cache
|
48
|
+
CSVStateMachineCache state_machine_cache;
|
49
|
+
|
50
|
+
public:
|
51
|
+
//! Extract a single DataChunk from the CSV file and stores it in insert_chunk
|
52
|
+
void ParseCSV(DataChunk &insert_chunk);
|
53
|
+
static string ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
|
54
|
+
|
55
|
+
private:
|
56
|
+
//! Initialize Parser
|
57
|
+
void Initialize(const vector<LogicalType> &requested_types);
|
58
|
+
//! Skips skip_rows, reads header row from input stream
|
59
|
+
void SkipRowsAndReadHeader(idx_t skip_rows, bool skip_header);
|
60
|
+
//! Resets the buffer
|
61
|
+
void ResetBuffer();
|
62
|
+
//! Reads a new buffer from the CSV file if the current one has been exhausted
|
63
|
+
bool ReadBuffer(idx_t &start, idx_t &line_start);
|
64
|
+
//! Try to parse a single datachunk from the file. Throws an exception if anything goes wrong.
|
65
|
+
void ParseCSV(ParserMode mode);
|
66
|
+
//! Extract a single DataChunk from the CSV file and stores it in insert_chunk
|
67
|
+
bool TryParseCSV(ParserMode mode, DataChunk &insert_chunk, string &error_message);
|
68
|
+
//! Skip Empty lines for tables with over one column
|
69
|
+
void SkipEmptyLines();
|
70
|
+
};
|
71
|
+
|
72
|
+
} // namespace duckdb
|
@@ -0,0 +1,110 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/operator/scan/csv/csv_buffer.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/constants.hpp"
|
12
|
+
#include "duckdb/execution/operator/scan/csv/csv_file_handle.hpp"
|
13
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
14
|
+
#include "duckdb/storage/block_manager.hpp"
|
15
|
+
#include "duckdb/storage/buffer/block_handle.hpp"
|
16
|
+
|
17
|
+
namespace duckdb {
|
18
|
+
|
19
|
+
class CSVBufferHandle {
|
20
|
+
public:
|
21
|
+
CSVBufferHandle(BufferHandle handle_p, idx_t actual_size_p, const bool is_first_buffer_p,
|
22
|
+
const bool is_final_buffer_p, idx_t csv_global_state_p, idx_t start_position_p, idx_t file_idx_p)
|
23
|
+
: handle(std::move(handle_p)), actual_size(actual_size_p), is_first_buffer(is_first_buffer_p),
|
24
|
+
is_last_buffer(is_final_buffer_p), csv_global_start(csv_global_state_p), start_position(start_position_p),
|
25
|
+
file_idx(file_idx_p) {};
|
26
|
+
CSVBufferHandle()
|
27
|
+
: actual_size(0), is_first_buffer(false), is_last_buffer(false), csv_global_start(0), start_position(0),
|
28
|
+
file_idx(0) {};
|
29
|
+
//! Handle created during allocation
|
30
|
+
BufferHandle handle;
|
31
|
+
const idx_t actual_size;
|
32
|
+
const bool is_first_buffer;
|
33
|
+
const bool is_last_buffer;
|
34
|
+
const idx_t csv_global_start;
|
35
|
+
const idx_t start_position;
|
36
|
+
const idx_t file_idx;
|
37
|
+
inline char *Ptr() {
|
38
|
+
return char_ptr_cast(handle.Ptr());
|
39
|
+
}
|
40
|
+
};
|
41
|
+
|
42
|
+
//! CSV Buffers are parts of a decompressed CSV File.
|
43
|
+
//! For a decompressed file of 100Mb. With our Buffer size set to 32Mb, we would generate 4 buffers.
|
44
|
+
//! One for the first 32Mb, second and third for the other 32Mb, and the last one with 4 Mb
|
45
|
+
//! These buffers are actually used for sniffing and parsing!
|
46
|
+
class CSVBuffer {
|
47
|
+
public:
|
48
|
+
//! Constructor for Initial Buffer
|
49
|
+
CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
|
50
|
+
idx_t &global_csv_current_position, idx_t file_number);
|
51
|
+
|
52
|
+
//! Constructor for `Next()` Buffers
|
53
|
+
CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size, idx_t global_csv_current_position,
|
54
|
+
idx_t file_number_p);
|
55
|
+
|
56
|
+
//! Creates a new buffer with the next part of the CSV File
|
57
|
+
shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number);
|
58
|
+
|
59
|
+
//! Gets the buffer actual size
|
60
|
+
idx_t GetBufferSize();
|
61
|
+
|
62
|
+
//! Gets the start position of the buffer, only relevant for the first time it's scanned
|
63
|
+
idx_t GetStart();
|
64
|
+
|
65
|
+
//! If this buffer is the last buffer of the CSV File
|
66
|
+
bool IsCSVFileLastBuffer();
|
67
|
+
|
68
|
+
//! Allocates internal buffer, sets 'block' and 'handle' variables.
|
69
|
+
void AllocateBuffer(idx_t buffer_size);
|
70
|
+
|
71
|
+
void Reload(CSVFileHandle &file_handle);
|
72
|
+
//! Wrapper for the Pin Function, if it can seek, it means that the buffer might have been destroyed, hence we must
|
73
|
+
//! Scan it from the disk file again.
|
74
|
+
unique_ptr<CSVBufferHandle> Pin(CSVFileHandle &file_handle);
|
75
|
+
//! Wrapper for the unpin
|
76
|
+
void Unpin();
|
77
|
+
char *Ptr() {
|
78
|
+
return char_ptr_cast(handle.Ptr());
|
79
|
+
}
|
80
|
+
|
81
|
+
static constexpr idx_t CSV_BUFFER_SIZE = 32000000; // 32MB
|
82
|
+
//! In case the file has a size < 32MB, we will use this size instead
|
83
|
+
//! This is to avoid mallocing a lot of memory for a small file
|
84
|
+
//! And if it's a compressed file we can't use the actual size of the file
|
85
|
+
static constexpr idx_t CSV_MINIMUM_BUFFER_SIZE = 10000000; // 10MB
|
86
|
+
//! If this is the last buffer of the CSV File
|
87
|
+
bool last_buffer = false;
|
88
|
+
|
89
|
+
private:
|
90
|
+
ClientContext &context;
|
91
|
+
//! Actual size can be smaller than the buffer size in case we allocate it too optimistically.
|
92
|
+
idx_t file_size;
|
93
|
+
//! We need to check for Byte Order Mark, to define the start position of this buffer
|
94
|
+
//! https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8
|
95
|
+
idx_t start_position = 0;
|
96
|
+
//! If this is the first buffer of the CSV File
|
97
|
+
bool first_buffer = false;
|
98
|
+
//! Global position from the CSV File where this buffer starts
|
99
|
+
idx_t global_csv_start = 0;
|
100
|
+
//! Number of the file that is in this buffer
|
101
|
+
idx_t file_number = 0;
|
102
|
+
//! If we can seek in the file or not.
|
103
|
+
//! If we can't seek, this means we can't destroy the buffers
|
104
|
+
bool can_seek;
|
105
|
+
//! -------- Allocated Block ---------//
|
106
|
+
//! Block created in allocation
|
107
|
+
shared_ptr<BlockHandle> block;
|
108
|
+
BufferHandle handle;
|
109
|
+
};
|
110
|
+
} // namespace duckdb
|
@@ -0,0 +1,103 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/main/client_context.hpp"
|
12
|
+
#include "duckdb/execution/operator/scan/csv/csv_file_handle.hpp"
|
13
|
+
#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
class CSVBuffer;
|
17
|
+
class CSVStateMachine;
|
18
|
+
|
19
|
+
//! This class is used to manage the CSV buffers. Buffers are cached when used for auto detection.
|
20
|
+
//! When parsing, buffer are not cached and just returned.
|
21
|
+
class CSVBufferManager {
|
22
|
+
public:
|
23
|
+
CSVBufferManager(ClientContext &context, unique_ptr<CSVFileHandle> file_handle, const CSVReaderOptions &options,
|
24
|
+
idx_t file_idx = 0);
|
25
|
+
//! Returns a buffer from a buffer id (starting from 0). If it's in the auto-detection then we cache new buffers
|
26
|
+
//! Otherwise we remove them from the cache if they are already there, or just return them bypassing the cache.
|
27
|
+
unique_ptr<CSVBufferHandle> GetBuffer(const idx_t pos);
|
28
|
+
//! Returns the starting position of the first buffer
|
29
|
+
idx_t GetStartPos();
|
30
|
+
//! unique_ptr to the file handle, gets stolen after sniffing
|
31
|
+
unique_ptr<CSVFileHandle> file_handle;
|
32
|
+
//! Initializes the buffer manager, during it's construction/reset
|
33
|
+
void Initialize();
|
34
|
+
|
35
|
+
void UnpinBuffer(idx_t cache_idx);
|
36
|
+
|
37
|
+
ClientContext &context;
|
38
|
+
idx_t skip_rows = 0;
|
39
|
+
idx_t file_idx;
|
40
|
+
bool done = false;
|
41
|
+
|
42
|
+
private:
|
43
|
+
//! Reads next buffer in reference to cached_buffers.front()
|
44
|
+
bool ReadNextAndCacheIt();
|
45
|
+
vector<shared_ptr<CSVBuffer>> cached_buffers;
|
46
|
+
shared_ptr<CSVBuffer> last_buffer;
|
47
|
+
idx_t global_csv_pos = 0;
|
48
|
+
//! The size of the buffer, if the csv file has a smaller size than this, we will use that instead to malloc less
|
49
|
+
idx_t buffer_size;
|
50
|
+
//! Starting position of first buffer
|
51
|
+
idx_t start_pos = 0;
|
52
|
+
};
|
53
|
+
|
54
|
+
class CSVBufferIterator {
|
55
|
+
public:
|
56
|
+
explicit CSVBufferIterator(shared_ptr<CSVBufferManager> buffer_manager_p)
|
57
|
+
: buffer_manager(std::move(buffer_manager_p)) {
|
58
|
+
cur_pos = buffer_manager->GetStartPos();
|
59
|
+
};
|
60
|
+
|
61
|
+
//! This functions templates an operation over the CSV File
|
62
|
+
template <class OP, class T>
|
63
|
+
inline bool Process(CSVStateMachine &machine, T &result) {
|
64
|
+
|
65
|
+
OP::Initialize(machine);
|
66
|
+
//! If current buffer is not set we try to get a new one
|
67
|
+
if (!cur_buffer_handle) {
|
68
|
+
cur_pos = 0;
|
69
|
+
if (cur_buffer_idx == 0) {
|
70
|
+
cur_pos = buffer_manager->GetStartPos();
|
71
|
+
}
|
72
|
+
cur_buffer_handle = buffer_manager->GetBuffer(cur_buffer_idx++);
|
73
|
+
D_ASSERT(cur_buffer_handle);
|
74
|
+
}
|
75
|
+
while (cur_buffer_handle) {
|
76
|
+
char *buffer_handle_ptr = cur_buffer_handle->Ptr();
|
77
|
+
while (cur_pos < cur_buffer_handle->actual_size) {
|
78
|
+
if (OP::Process(machine, result, buffer_handle_ptr[cur_pos], cur_pos)) {
|
79
|
+
//! Not-Done Processing the File, but the Operator is happy!
|
80
|
+
OP::Finalize(machine, result);
|
81
|
+
return false;
|
82
|
+
}
|
83
|
+
cur_pos++;
|
84
|
+
}
|
85
|
+
cur_buffer_handle = buffer_manager->GetBuffer(cur_buffer_idx++);
|
86
|
+
cur_pos = 0;
|
87
|
+
}
|
88
|
+
//! Done Processing the File
|
89
|
+
OP::Finalize(machine, result);
|
90
|
+
return true;
|
91
|
+
}
|
92
|
+
//! Returns true if the iterator is finished
|
93
|
+
bool Finished();
|
94
|
+
//! Resets the iterator
|
95
|
+
void Reset();
|
96
|
+
|
97
|
+
private:
|
98
|
+
idx_t cur_pos = 0;
|
99
|
+
idx_t cur_buffer_idx = 0;
|
100
|
+
shared_ptr<CSVBufferManager> buffer_manager;
|
101
|
+
unique_ptr<CSVBufferHandle> cur_buffer_handle;
|
102
|
+
};
|
103
|
+
} // namespace duckdb
|
package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp
RENAMED
@@ -1,7 +1,7 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/execution/operator/
|
4
|
+
// duckdb/execution/operator/scan/csv/csv_file_handle.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
@@ -20,15 +20,13 @@ class FileSystem;
|
|
20
20
|
struct CSVFileHandle {
|
21
21
|
public:
|
22
22
|
CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p, const string &path_p,
|
23
|
-
FileCompressionType compression
|
23
|
+
FileCompressionType compression);
|
24
24
|
|
25
25
|
mutex main_mutex;
|
26
26
|
|
27
27
|
public:
|
28
28
|
bool CanSeek();
|
29
29
|
void Seek(idx_t position);
|
30
|
-
idx_t SeekPosition();
|
31
|
-
void Reset();
|
32
30
|
bool OnDiskFile();
|
33
31
|
|
34
32
|
idx_t FileSize();
|
@@ -38,29 +36,24 @@ public:
|
|
38
36
|
idx_t Read(void *buffer, idx_t nr_bytes);
|
39
37
|
|
40
38
|
string ReadLine();
|
41
|
-
|
39
|
+
|
40
|
+
string GetFilePath();
|
42
41
|
|
43
42
|
static unique_ptr<FileHandle> OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
|
44
43
|
FileCompressionType compression);
|
45
44
|
static unique_ptr<CSVFileHandle> OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
|
46
|
-
FileCompressionType compression
|
45
|
+
FileCompressionType compression);
|
47
46
|
|
48
47
|
private:
|
49
|
-
FileSystem &fs;
|
50
|
-
Allocator &allocator;
|
51
48
|
unique_ptr<FileHandle> file_handle;
|
52
49
|
string path;
|
53
|
-
FileCompressionType compression;
|
54
|
-
bool reset_enabled = true;
|
55
50
|
bool can_seek = false;
|
56
51
|
bool on_disk_file = false;
|
57
52
|
idx_t file_size = 0;
|
58
|
-
|
59
|
-
AllocatedData cached_buffer;
|
60
|
-
idx_t read_position = 0;
|
61
|
-
idx_t buffer_size = 0;
|
62
|
-
idx_t buffer_capacity = 0;
|
53
|
+
|
63
54
|
idx_t requested_bytes = 0;
|
55
|
+
//! If we finished reading the file
|
56
|
+
bool finished = false;
|
64
57
|
};
|
65
58
|
|
66
59
|
} // namespace duckdb
|
package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp
RENAMED
@@ -1,7 +1,7 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/execution/operator/
|
4
|
+
// duckdb/execution/operator/scan/csv/csv_line_info.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
@@ -18,11 +18,12 @@ public:
|
|
18
18
|
bool CanItGetLine(idx_t file_idx, idx_t batch_idx);
|
19
19
|
|
20
20
|
//! Return the 1-indexed line number
|
21
|
-
idx_t GetLine(idx_t batch_idx, idx_t line_error = 0, idx_t file_idx = 0, idx_t cur_start = 0, bool verify = true
|
21
|
+
idx_t GetLine(idx_t batch_idx, idx_t line_error = 0, idx_t file_idx = 0, idx_t cur_start = 0, bool verify = true,
|
22
|
+
bool stop_at_first = true);
|
22
23
|
//! Verify if the CSV File was read correctly from [0,batch_idx] batches.
|
23
24
|
void Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos);
|
24
|
-
//! Lines read per batch, <batch_index,count
|
25
|
-
unordered_map<idx_t, idx_t
|
25
|
+
//! Lines read per batch, <file_index, <batch_index, count>>
|
26
|
+
vector<unordered_map<idx_t, idx_t>> lines_read;
|
26
27
|
//! Set of batches that have been initialized but are not yet finished.
|
27
28
|
vector<set<idx_t>> current_batches;
|
28
29
|
//! Pointer to CSV Reader Mutex
|
@@ -1,14 +1,14 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/execution/operator/
|
4
|
+
// duckdb/execution/operator/scan/csv/csv_reader_options.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
-
#include "duckdb/execution/operator/
|
11
|
+
#include "duckdb/execution/operator/scan/csv/csv_buffer.hpp"
|
12
12
|
#include "duckdb/common/map.hpp"
|
13
13
|
#include "duckdb/function/scalar/strftime_format.hpp"
|
14
14
|
#include "duckdb/common/types/value.hpp"
|
@@ -28,35 +28,75 @@ enum class NewLineIdentifier : uint8_t {
|
|
28
28
|
|
29
29
|
enum class ParallelMode { AUTOMATIC = 0, PARALLEL = 1, SINGLE_THREADED = 2 };
|
30
30
|
|
31
|
-
|
31
|
+
//! Struct that holds the configuration of a CSV State Machine
|
32
|
+
//! Basically which char, quote and escape were used to generate it.
|
33
|
+
struct CSVStateMachineOptions {
|
34
|
+
CSVStateMachineOptions() {};
|
35
|
+
CSVStateMachineOptions(char delimiter_p, char quote_p, char escape_p)
|
36
|
+
: delimiter(delimiter_p), quote(quote_p), escape(escape_p) {};
|
37
|
+
|
38
|
+
//! Delimiter to separate columns within each line
|
39
|
+
char delimiter = ',';
|
40
|
+
//! Quote used for columns that contain reserved characters, e.g '
|
41
|
+
char quote = '\"';
|
42
|
+
//! Escape character to escape quote character
|
43
|
+
char escape = '\0';
|
44
|
+
|
45
|
+
bool operator==(const CSVStateMachineOptions &other) const {
|
46
|
+
return delimiter == other.delimiter && quote == other.quote && escape == other.escape;
|
47
|
+
}
|
48
|
+
|
49
|
+
void Serialize(FieldWriter &writer) const;
|
50
|
+
void Deserialize(FieldReader &reader);
|
51
|
+
};
|
52
|
+
|
53
|
+
struct DialectOptions {
|
54
|
+
CSVStateMachineOptions state_machine_options;
|
55
|
+
//! New Line separator
|
56
|
+
NewLineIdentifier new_line = NewLineIdentifier::NOT_SET;
|
57
|
+
//! Expected number of columns
|
58
|
+
idx_t num_cols = 0;
|
59
|
+
//! Whether or not the file has a header line
|
60
|
+
bool header = false;
|
61
|
+
//! The date format to use (if any is specified)
|
62
|
+
map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
|
63
|
+
//! Whether or not a type format is specified
|
64
|
+
map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
|
65
|
+
//! How many leading rows to skip
|
66
|
+
idx_t skip_rows = 0;
|
67
|
+
//! True start of the first CSV Buffer (After skipping empty lines, headers, notes and so on)
|
68
|
+
idx_t true_start = 0;
|
69
|
+
|
70
|
+
void Serialize(FieldWriter &writer) const;
|
71
|
+
void Deserialize(FieldReader &reader);
|
72
|
+
};
|
73
|
+
|
74
|
+
struct CSVReaderOptions {
|
32
75
|
//===--------------------------------------------------------------------===//
|
33
76
|
// CommonCSVOptions
|
34
77
|
//===--------------------------------------------------------------------===//
|
35
|
-
|
78
|
+
//! See struct above.
|
79
|
+
DialectOptions dialect_options;
|
36
80
|
//! Whether or not a delimiter was defined by the user
|
37
81
|
bool has_delimiter = false;
|
38
|
-
//! Delimiter to separate columns within each line
|
39
|
-
string delimiter = ",";
|
40
82
|
//! Whether or not a new_line was defined by the user
|
41
83
|
bool has_newline = false;
|
42
|
-
//! New Line separator
|
43
|
-
NewLineIdentifier new_line = NewLineIdentifier::NOT_SET;
|
44
84
|
//! Whether or not a quote was defined by the user
|
45
85
|
bool has_quote = false;
|
46
|
-
//! Quote used for columns that contain reserved characters, e.g., delimiter
|
47
|
-
string quote = "\"";
|
48
86
|
//! Whether or not an escape character was defined by the user
|
49
87
|
bool has_escape = false;
|
50
|
-
//! Escape character to escape quote character
|
51
|
-
string escape;
|
52
88
|
//! Whether or not a header information was given by the user
|
53
89
|
bool has_header = false;
|
54
|
-
//! Whether or not the file has a header line
|
55
|
-
bool header = false;
|
56
90
|
//! Whether or not we should ignore InvalidInput errors
|
57
91
|
bool ignore_errors = false;
|
58
|
-
//!
|
59
|
-
|
92
|
+
//! Rejects table name
|
93
|
+
string rejects_table_name;
|
94
|
+
//! Rejects table entry limit (0 = no limit)
|
95
|
+
idx_t rejects_limit = 0;
|
96
|
+
//! Columns to use as recovery key for rejected rows when reading with ignore_errors = true
|
97
|
+
vector<string> rejects_recovery_columns;
|
98
|
+
//! Index of the recovery columns
|
99
|
+
vector<idx_t> rejects_recovery_column_ids;
|
60
100
|
//! Number of samples to buffer
|
61
101
|
idx_t buffer_sample_size = STANDARD_VECTOR_SIZE * 50;
|
62
102
|
//! Specifies the string that represents a null value
|
@@ -84,9 +124,6 @@ struct BufferedCSVReaderOptions {
|
|
84
124
|
//===--------------------------------------------------------------------===//
|
85
125
|
// ReadCSVOptions
|
86
126
|
//===--------------------------------------------------------------------===//
|
87
|
-
|
88
|
-
//! How many leading rows to skip
|
89
|
-
idx_t skip_rows = 0;
|
90
127
|
//! Whether or not the skip_rows is set by the user
|
91
128
|
bool skip_rows_set = false;
|
92
129
|
//! Maximum CSV line size: specified because if we reach this amount, we likely have wrong delimiters (default: 2MB)
|
@@ -109,7 +146,7 @@ struct BufferedCSVReaderOptions {
|
|
109
146
|
//! Multi-file reader options
|
110
147
|
MultiFileReaderOptions file_options;
|
111
148
|
//! Buffer Size (Parallel Scan)
|
112
|
-
idx_t buffer_size = CSVBuffer::
|
149
|
+
idx_t buffer_size = CSVBuffer::CSV_BUFFER_SIZE;
|
113
150
|
//! Decimal separator when reading as numeric
|
114
151
|
string decimal_separator = ".";
|
115
152
|
//! Whether or not to pad rows that do not have enough columns with NULL values
|
@@ -129,16 +166,13 @@ struct BufferedCSVReaderOptions {
|
|
129
166
|
string suffix;
|
130
167
|
string write_newline;
|
131
168
|
|
132
|
-
//! The date format to use (if any is specified)
|
133
|
-
std::map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
|
134
169
|
//! The date format to use for writing (if any is specified)
|
135
|
-
|
136
|
-
{LogicalTypeId::TIMESTAMP, {}}};
|
137
|
-
//! Whether or not a type format is specified
|
138
|
-
std::map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
|
170
|
+
map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
|
139
171
|
|
140
172
|
void Serialize(FieldWriter &writer) const;
|
141
173
|
void Deserialize(FieldReader &reader);
|
174
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
175
|
+
static CSVReaderOptions FormatDeserialize(FormatDeserializer &deserializer);
|
142
176
|
|
143
177
|
void SetCompression(const string &compression);
|
144
178
|
void SetHeader(bool has_header);
|
@@ -155,10 +189,9 @@ struct BufferedCSVReaderOptions {
|
|
155
189
|
//! set - argument(s) to the option
|
156
190
|
//! expected_names - names expected if the option is "columns"
|
157
191
|
void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
|
158
|
-
|
159
192
|
void SetWriteOption(const string &loption, const Value &value);
|
160
193
|
void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
|
161
194
|
|
162
|
-
|
195
|
+
string ToString() const;
|
163
196
|
};
|
164
197
|
} // namespace duckdb
|