duckdb 1.1.2-dev4.0 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +0 -5
- package/src/duckdb/extension/icu/third_party/icu/common/rbbiscan.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/rbbitblb.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ucurr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uresbund.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uresimp.h +31 -31
- package/src/duckdb/extension/icu/third_party/icu/common/ustring.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +12 -12
- package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/listformatter.cpp +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_decimalquantity.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +28 -28
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +7 -7
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucol.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucoleitr.h +41 -41
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/umsg.h +41 -41
- package/src/duckdb/extension/icu/third_party/icu/i18n/usrchimp.h +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +13 -7
- package/src/duckdb/extension/parquet/column_writer.cpp +2 -1
- package/src/duckdb/extension/parquet/geo_parquet.cpp +24 -9
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +3 -1
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -1
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +0 -4
- package/src/duckdb/extension/parquet/parquet_extension.cpp +20 -6
- package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -2
- package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +0 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +5 -5
- package/src/duckdb/src/common/allocator.cpp +3 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +1 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +11 -0
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +6 -4
- package/src/duckdb/src/common/enum_util.cpp +33 -0
- package/src/duckdb/src/common/exception.cpp +3 -0
- package/src/duckdb/src/common/extra_type_info.cpp +1 -44
- package/src/duckdb/src/common/field_writer.cpp +97 -0
- package/src/duckdb/src/common/render_tree.cpp +7 -5
- package/src/duckdb/src/common/row_operations/row_match.cpp +359 -0
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +27 -0
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +36 -0
- package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
- package/src/duckdb/src/common/serializer.cpp +24 -0
- package/src/duckdb/src/common/sort/comparators.cpp +2 -2
- package/src/duckdb/src/common/types/bit.cpp +57 -34
- package/src/duckdb/src/common/types/data_chunk.cpp +32 -29
- package/src/duckdb/src/common/types/vector_cache.cpp +12 -6
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +14 -0
- package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +20 -1
- package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +32 -7
- package/src/duckdb/src/core_functions/function_list.cpp +1 -2
- package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +23 -5
- package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +12 -6
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_case.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +3 -2
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -1
- package/src/duckdb/src/execution/expression_executor/execute_function.cpp +2 -1
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +3 -2
- package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor.cpp +9 -3
- package/src/duckdb/src/execution/expression_executor_state.cpp +11 -9
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +238 -0
- package/src/duckdb/src/execution/index/art/plan_art.cpp +94 -0
- package/src/duckdb/src/execution/index/index_type_set.cpp +4 -1
- package/src/duckdb/src/execution/join_hashtable.cpp +7 -8
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +6 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +44 -5
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +28 -24
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +25 -26
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +5 -3
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +73 -27
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +695 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1487 -0
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +72 -0
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +280 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +666 -0
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +14 -4
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +207 -0
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +207 -0
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +6 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +14 -87
- package/src/duckdb/src/execution/physical_plan/plan_export.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +1 -1
- package/src/duckdb/src/execution/reservoir_sample.cpp +1 -1
- package/src/duckdb/src/execution/window_executor.cpp +3 -3
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
- package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -2
- package/src/duckdb/src/function/scalar/string/concat.cpp +118 -151
- package/src/duckdb/src/function/table/arrow.cpp +13 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +12 -7
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
- package/src/duckdb/src/function/table/read_csv.cpp +2 -30
- package/src/duckdb/src/function/table/sniff_csv.cpp +2 -1
- package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +15 -7
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/atomic.hpp +13 -1
- package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +3 -4
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info/enum_type_info.hpp +53 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +5 -5
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +36 -33
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +10 -13
- package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/vector_cache.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/windows_undefs.hpp +2 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +2 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +0 -6
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +16 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +91 -36
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -5
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/database.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +20 -22
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -9
- package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +8 -1
- package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -0
- package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +5 -5
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +15 -5
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +10 -13
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +5 -1
- package/src/duckdb/src/include/duckdb.h +2 -2
- package/src/duckdb/src/main/appender.cpp +3 -0
- package/src/duckdb/src/main/capi/profiling_info-c.cpp +5 -2
- package/src/duckdb/src/main/client_context.cpp +8 -2
- package/src/duckdb/src/main/connection.cpp +1 -1
- package/src/duckdb/src/main/database.cpp +13 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +3 -2
- package/src/duckdb/src/main/extension_install_info.cpp +1 -1
- package/src/duckdb/src/main/profiling_info.cpp +78 -58
- package/src/duckdb/src/main/query_profiler.cpp +79 -89
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/secret/secret.cpp +2 -1
- package/src/duckdb/src/main/secret/secret_manager.cpp +14 -0
- package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +4 -2
- package/src/duckdb/src/optimizer/deliminator.cpp +0 -7
- package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +7 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -1
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +21 -21
- package/src/duckdb/src/parallel/task_scheduler.cpp +9 -0
- package/src/duckdb/src/parser/parsed_data/exported_table_data.cpp +22 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +3 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +7 -1
- package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +89 -87
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +4 -9
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +4 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +4 -3
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +13 -3
- package/src/duckdb/src/planner/expression_binder.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_export.cpp +28 -0
- package/src/duckdb/src/planner/table_binding.cpp +1 -2
- package/src/duckdb/src/planner/table_filter.cpp +6 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +2 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
- package/src/duckdb/src/storage/compression/bitpacking.cpp +7 -3
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +16 -0
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +29 -0
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +15 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +2 -1
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +3 -5
- package/src/duckdb/src/storage/storage_info.cpp +4 -4
- package/src/duckdb/src/storage/table/row_group_collection.cpp +1 -1
- package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -1
- package/src/duckdb/src/storage/temporary_file_manager.cpp +1 -1
- package/src/duckdb/src/transaction/duck_transaction.cpp +15 -14
- package/src/duckdb/third_party/brotli/common/brotli_platform.h +1 -1
- package/src/duckdb/third_party/brotli/dec/decode.cpp +1 -1
- package/src/duckdb/third_party/brotli/enc/memory.cpp +4 -4
- package/src/duckdb/third_party/fsst/libfsst.cpp +1 -1
- package/src/duckdb/third_party/hyperloglog/sds.cpp +1 -1
- package/src/duckdb/third_party/hyperloglog/sds.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/datatype/timestamp.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/mb/pg_wchar.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/bitmapset.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/lockoptions.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/makefuncs.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/pg_list.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/value.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/gramparse.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/scanner.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/scansup.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/pg_functions.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1964 -1964
- package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +1 -1
- package/src/duckdb/third_party/lz4/lz4.cpp +1 -1
- package/src/duckdb/third_party/mbedtls/include/des_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aes_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aria_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/asn1write.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/chacha20.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/chachapoly.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/cmac.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/config_psa.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ecdsa.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ecp.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/md5.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/nist_kw.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs12.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs5.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/psa_util.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ripemd160.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/threading.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/timing.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/platform_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/psa/crypto.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/rsa_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/sha1_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/sha256_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/sha512_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/ssl_misc.h +1 -1
- package/src/duckdb/third_party/mbedtls/library/aesni.h +1 -1
- package/src/duckdb/third_party/mbedtls/library/padlock.h +1 -1
- package/src/duckdb/third_party/miniz/miniz.cpp +1 -1
- package/src/duckdb/third_party/parquet/parquet_types.cpp +1 -1
- package/src/duckdb/third_party/parquet/windows_compatibility.h +1 -1
- package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
- package/src/duckdb/third_party/pcg/pcg_uint128.hpp +1 -1
- package/src/duckdb/third_party/skiplist/Node.h +4 -4
- package/src/duckdb/third_party/snappy/snappy.cc +1 -1
- package/src/duckdb/third_party/snappy/snappy_version.hpp +1 -1
- package/src/duckdb/third_party/thrift/thrift/thrift-config.h +1 -1
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1 -1
- package/src/duckdb/third_party/zstd/include/zstd_static.h +1 -1
- package/src/duckdb/ub_src_execution_index_art.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
- package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
#include "duckdb/execution/operator/csv_scanner/base_scanner.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
3
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
4
4
|
#include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
|
5
5
|
|
6
6
|
namespace duckdb {
|
@@ -60,14 +60,53 @@ bool CSVSchema::Empty() const {
|
|
60
60
|
return columns.empty();
|
61
61
|
}
|
62
62
|
|
63
|
-
bool CSVSchema::SchemasMatch(string &error_message,
|
64
|
-
|
65
|
-
D_ASSERT(names.size() ==
|
63
|
+
bool CSVSchema::SchemasMatch(string &error_message, SnifferResult &sniffer_result, const string &cur_file_path,
|
64
|
+
bool is_minimal_sniffer) const {
|
65
|
+
D_ASSERT(sniffer_result.names.size() == sniffer_result.return_types.size());
|
66
66
|
bool match = true;
|
67
67
|
unordered_map<string, TypeIdxPair> current_schema;
|
68
|
-
|
68
|
+
|
69
|
+
for (idx_t i = 0; i < sniffer_result.names.size(); i++) {
|
69
70
|
// Populate our little schema
|
70
|
-
current_schema[names[i]] = {
|
71
|
+
current_schema[sniffer_result.names[i]] = {sniffer_result.return_types[i], i};
|
72
|
+
}
|
73
|
+
if (is_minimal_sniffer) {
|
74
|
+
auto min_sniffer = static_cast<AdaptiveSnifferResult &>(sniffer_result);
|
75
|
+
if (!min_sniffer.more_than_one_row) {
|
76
|
+
bool min_sniff_match = true;
|
77
|
+
// If we don't have more than one row, either the names must match or the types must match.
|
78
|
+
for (auto &column : columns) {
|
79
|
+
if (current_schema.find(column.name) == current_schema.end()) {
|
80
|
+
min_sniff_match = false;
|
81
|
+
break;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
if (min_sniff_match) {
|
85
|
+
return true;
|
86
|
+
}
|
87
|
+
// Otherwise, the types must match.
|
88
|
+
min_sniff_match = true;
|
89
|
+
if (sniffer_result.return_types.size() == columns.size()) {
|
90
|
+
idx_t return_type_idx = 0;
|
91
|
+
for (auto &column : columns) {
|
92
|
+
if (column.type != sniffer_result.return_types[return_type_idx++]) {
|
93
|
+
min_sniff_match = false;
|
94
|
+
break;
|
95
|
+
}
|
96
|
+
}
|
97
|
+
} else {
|
98
|
+
min_sniff_match = false;
|
99
|
+
}
|
100
|
+
if (min_sniff_match) {
|
101
|
+
// If we got here, we have the right types but the wrong names, lets fix the names
|
102
|
+
idx_t sniff_name_idx = 0;
|
103
|
+
for (auto &column : columns) {
|
104
|
+
sniffer_result.names[sniff_name_idx++] = column.name;
|
105
|
+
}
|
106
|
+
return true;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
// If we got to this point, the minimal sniffer doesn't match, we throw an error.
|
71
110
|
}
|
72
111
|
// Here we check if the schema of a given file matched our original schema
|
73
112
|
// We consider it's not a match if:
|
@@ -258,7 +258,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
|
|
258
258
|
// We check for a weird case, where we ignore an extra value, if it is a null value
|
259
259
|
return;
|
260
260
|
}
|
261
|
-
validity_mask[chunk_col_id]->SetInvalid(number_of_rows);
|
261
|
+
validity_mask[chunk_col_id]->SetInvalid(static_cast<idx_t>(number_of_rows));
|
262
262
|
}
|
263
263
|
cur_col_id++;
|
264
264
|
chunk_col_id++;
|
@@ -447,7 +447,11 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
|
|
447
447
|
}
|
448
448
|
|
449
449
|
DataChunk &StringValueResult::ToChunk() {
|
450
|
-
|
450
|
+
if (number_of_rows < 0) {
|
451
|
+
throw InternalException("CSVScanner: ToChunk() function. Has a negative number of rows, this indicates an "
|
452
|
+
"issue with the error handler.");
|
453
|
+
}
|
454
|
+
parse_chunk.SetCardinality(static_cast<idx_t>(number_of_rows));
|
451
455
|
return parse_chunk;
|
452
456
|
}
|
453
457
|
|
@@ -658,7 +662,7 @@ bool LineError::HandleErrors(StringValueResult &result) {
|
|
658
662
|
result.RemoveLastLine();
|
659
663
|
} else {
|
660
664
|
// Otherwise, we add it to the borked rows to remove it later and just cleanup the column variables.
|
661
|
-
result.borked_rows.insert(result.number_of_rows);
|
665
|
+
result.borked_rows.insert(static_cast<idx_t>(result.number_of_rows));
|
662
666
|
result.cur_col_id = 0;
|
663
667
|
result.chunk_col_id = 0;
|
664
668
|
}
|
@@ -740,9 +744,9 @@ bool StringValueResult::AddRowInternal() {
|
|
740
744
|
}
|
741
745
|
|
742
746
|
if (current_errors.HandleErrors(*this)) {
|
743
|
-
line_positions_per_row[number_of_rows] = current_line_position;
|
747
|
+
line_positions_per_row[static_cast<idx_t>(number_of_rows)] = current_line_position;
|
744
748
|
number_of_rows++;
|
745
|
-
if (number_of_rows >= result_size) {
|
749
|
+
if (static_cast<idx_t>(number_of_rows) >= result_size) {
|
746
750
|
// We have a full chunk
|
747
751
|
return true;
|
748
752
|
}
|
@@ -769,7 +773,7 @@ bool StringValueResult::AddRowInternal() {
|
|
769
773
|
if (empty) {
|
770
774
|
static_cast<string_t *>(vector_ptr[chunk_col_id])[number_of_rows] = string_t();
|
771
775
|
} else {
|
772
|
-
validity_mask[chunk_col_id]->SetInvalid(number_of_rows);
|
776
|
+
validity_mask[chunk_col_id]->SetInvalid(static_cast<idx_t>(number_of_rows));
|
773
777
|
}
|
774
778
|
cur_col_id++;
|
775
779
|
chunk_col_id++;
|
@@ -799,11 +803,11 @@ bool StringValueResult::AddRowInternal() {
|
|
799
803
|
RemoveLastLine();
|
800
804
|
}
|
801
805
|
}
|
802
|
-
line_positions_per_row[number_of_rows] = current_line_position;
|
806
|
+
line_positions_per_row[static_cast<idx_t>(number_of_rows)] = current_line_position;
|
803
807
|
cur_col_id = 0;
|
804
808
|
chunk_col_id = 0;
|
805
809
|
number_of_rows++;
|
806
|
-
if (number_of_rows >= result_size) {
|
810
|
+
if (static_cast<idx_t>(number_of_rows) >= result_size) {
|
807
811
|
// We have a full chunk
|
808
812
|
return true;
|
809
813
|
}
|
@@ -861,12 +865,12 @@ bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_
|
|
861
865
|
if (empty) {
|
862
866
|
static_cast<string_t *>(result.vector_ptr[0])[result.number_of_rows] = string_t();
|
863
867
|
} else {
|
864
|
-
result.validity_mask[0]->SetInvalid(result.number_of_rows);
|
868
|
+
result.validity_mask[0]->SetInvalid(static_cast<idx_t>(result.number_of_rows));
|
865
869
|
}
|
866
870
|
result.number_of_rows++;
|
867
871
|
}
|
868
872
|
}
|
869
|
-
if (result.number_of_rows >= result.result_size) {
|
873
|
+
if (static_cast<idx_t>(result.number_of_rows) >= result.result_size) {
|
870
874
|
// We have a full chunk
|
871
875
|
return true;
|
872
876
|
}
|
@@ -1043,15 +1047,15 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
|
|
1043
1047
|
}
|
1044
1048
|
if (!result.borked_rows.empty()) {
|
1045
1049
|
// We must remove the borked lines from our chunk
|
1046
|
-
SelectionVector
|
1050
|
+
SelectionVector successful_rows(parse_chunk.size());
|
1047
1051
|
idx_t sel_idx = 0;
|
1048
1052
|
for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
1049
1053
|
if (result.borked_rows.find(row_idx) == result.borked_rows.end()) {
|
1050
|
-
|
1054
|
+
successful_rows.set_index(sel_idx++, row_idx);
|
1051
1055
|
}
|
1052
1056
|
}
|
1053
1057
|
// Now we slice the result
|
1054
|
-
insert_chunk.Slice(
|
1058
|
+
insert_chunk.Slice(successful_rows, sel_idx);
|
1055
1059
|
}
|
1056
1060
|
}
|
1057
1061
|
|
@@ -1389,7 +1393,7 @@ void StringValueResult::SkipBOM() const {
|
|
1389
1393
|
void StringValueResult::RemoveLastLine() {
|
1390
1394
|
// potentially de-nullify values
|
1391
1395
|
for (idx_t i = 0; i < chunk_col_id; i++) {
|
1392
|
-
validity_mask[i]->SetValid(number_of_rows);
|
1396
|
+
validity_mask[i]->SetValid(static_cast<idx_t>(number_of_rows));
|
1393
1397
|
}
|
1394
1398
|
// reset column trackers
|
1395
1399
|
cur_col_id = 0;
|
@@ -1470,10 +1474,6 @@ void StringValueScanner::SetStart() {
|
|
1470
1474
|
}
|
1471
1475
|
return;
|
1472
1476
|
}
|
1473
|
-
if (state_machine->options.IgnoreErrors()) {
|
1474
|
-
// If we are ignoring errors we don't really need to figure out a line.
|
1475
|
-
return;
|
1476
|
-
}
|
1477
1477
|
// The result size of the data after skipping the row is one line
|
1478
1478
|
// We have to look for a new line that fits our schema
|
1479
1479
|
// 1. We walk until the next new line
|
@@ -1524,7 +1524,7 @@ void StringValueScanner::SetStart() {
|
|
1524
1524
|
}
|
1525
1525
|
|
1526
1526
|
void StringValueScanner::FinalizeChunkProcess() {
|
1527
|
-
if (result.number_of_rows >= result.result_size || iterator.done) {
|
1527
|
+
if (static_cast<idx_t>(result.number_of_rows) >= result.result_size || iterator.done) {
|
1528
1528
|
// We are done
|
1529
1529
|
if (!sniffing) {
|
1530
1530
|
if (csv_file_scan) {
|
@@ -1562,14 +1562,18 @@ void StringValueScanner::FinalizeChunkProcess() {
|
|
1562
1562
|
if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
|
1563
1563
|
has_unterminated_quotes = true;
|
1564
1564
|
}
|
1565
|
-
result.current_errors.HandleErrors(result)
|
1565
|
+
if (result.current_errors.HandleErrors(result)) {
|
1566
|
+
result.number_of_rows++;
|
1567
|
+
}
|
1566
1568
|
}
|
1567
1569
|
if (states.IsQuotedCurrent() && !has_unterminated_quotes) {
|
1568
1570
|
// If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
|
1569
1571
|
// quotes
|
1570
1572
|
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
|
1571
1573
|
result.last_position);
|
1572
|
-
result.current_errors.HandleErrors(result)
|
1574
|
+
if (result.current_errors.HandleErrors(result)) {
|
1575
|
+
result.number_of_rows++;
|
1576
|
+
}
|
1573
1577
|
}
|
1574
1578
|
if (!iterator.done) {
|
1575
1579
|
if (iterator.pos.buffer_pos >= iterator.GetEndPos() || iterator.pos.buffer_idx > iterator.GetBufferIdx() ||
|
@@ -1580,9 +1584,9 @@ void StringValueScanner::FinalizeChunkProcess() {
|
|
1580
1584
|
} else {
|
1581
1585
|
// 2) If a boundary is not set
|
1582
1586
|
// We read until the chunk is complete, or we have nothing else to read.
|
1583
|
-
while (!FinishedFile() && result.number_of_rows < result.result_size) {
|
1587
|
+
while (!FinishedFile() && static_cast<idx_t>(result.number_of_rows) < result.result_size) {
|
1584
1588
|
MoveToNextBuffer();
|
1585
|
-
if (result.number_of_rows >= result.result_size) {
|
1589
|
+
if (static_cast<idx_t>(result.number_of_rows) >= result.result_size) {
|
1586
1590
|
return;
|
1587
1591
|
}
|
1588
1592
|
if (cur_buffer_handle) {
|
@@ -1592,7 +1596,7 @@ void StringValueScanner::FinalizeChunkProcess() {
|
|
1592
1596
|
iterator.done = FinishedFile();
|
1593
1597
|
if (result.null_padding && result.number_of_rows < STANDARD_VECTOR_SIZE && result.chunk_col_id > 0) {
|
1594
1598
|
while (result.chunk_col_id < result.parse_chunk.ColumnCount()) {
|
1595
|
-
result.validity_mask[result.chunk_col_id++]->SetInvalid(result.number_of_rows);
|
1599
|
+
result.validity_mask[result.chunk_col_id++]->SetInvalid(static_cast<idx_t>(result.number_of_rows));
|
1596
1600
|
result.cur_col_id++;
|
1597
1601
|
}
|
1598
1602
|
result.number_of_rows++;
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
1
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
2
2
|
#include "duckdb/common/types/value.hpp"
|
3
3
|
|
4
4
|
namespace duckdb {
|
@@ -41,7 +41,7 @@ void MatchAndReplace(CSVOption<T> &original, CSVOption<T> &sniffed, const string
|
|
41
41
|
// We verify that the user input matches the sniffed value
|
42
42
|
if (original != sniffed) {
|
43
43
|
error += "CSV Sniffer: Sniffer detected value different than the user input for the " + name;
|
44
|
-
error += " options \n Set: " + original.FormatValue() + " Sniffed: " + sniffed.FormatValue() + "\n";
|
44
|
+
error += " options \n Set: " + original.FormatValue() + ", Sniffed: " + sniffed.FormatValue() + "\n";
|
45
45
|
}
|
46
46
|
} else {
|
47
47
|
// We replace the value of original with the sniffed value
|
@@ -88,15 +88,14 @@ void CSVSniffer::SetResultOptions() {
|
|
88
88
|
options.dialect_options.rows_until_header = best_candidate->GetStateMachine().dialect_options.rows_until_header;
|
89
89
|
}
|
90
90
|
|
91
|
-
|
91
|
+
AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
|
92
92
|
if (set_columns.IsSet()) {
|
93
93
|
// Nothing to see here
|
94
|
-
return
|
94
|
+
return AdaptiveSnifferResult(*set_columns.types, *set_columns.names, true);
|
95
95
|
}
|
96
96
|
// Return Types detected
|
97
97
|
vector<LogicalType> return_types;
|
98
98
|
// Column Names detected
|
99
|
-
vector<string> names;
|
100
99
|
|
101
100
|
buffer_manager->sniffing = true;
|
102
101
|
constexpr idx_t result_size = 2;
|
@@ -106,7 +105,8 @@ SnifferResult CSVSniffer::MinimalSniff() {
|
|
106
105
|
ColumnCountScanner count_scanner(buffer_manager, state_machine, error_handler, result_size);
|
107
106
|
auto &sniffed_column_counts = count_scanner.ParseChunk();
|
108
107
|
if (sniffed_column_counts.result_position == 0) {
|
109
|
-
|
108
|
+
// The file is an empty file, we just return
|
109
|
+
return {{}, {}, false};
|
110
110
|
}
|
111
111
|
|
112
112
|
state_machine->dialect_options.num_cols = sniffed_column_counts[0].number_of_columns;
|
@@ -130,20 +130,20 @@ SnifferResult CSVSniffer::MinimalSniff() {
|
|
130
130
|
|
131
131
|
// Possibly Gather Header
|
132
132
|
vector<HeaderValue> potential_header;
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
}
|
142
|
-
potential_header.emplace_back(val);
|
133
|
+
|
134
|
+
for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
|
135
|
+
auto &cur_vector = data_chunk.data[col_idx];
|
136
|
+
auto vector_data = FlatVector::GetData<string_t>(cur_vector);
|
137
|
+
auto &validity = FlatVector::Validity(cur_vector);
|
138
|
+
HeaderValue val;
|
139
|
+
if (validity.RowIsValid(0)) {
|
140
|
+
val = HeaderValue(vector_data[0]);
|
143
141
|
}
|
142
|
+
potential_header.emplace_back(val);
|
144
143
|
}
|
145
|
-
|
146
|
-
|
144
|
+
|
145
|
+
vector<string> names = DetectHeaderInternal(buffer_manager->context, potential_header, *state_machine, set_columns,
|
146
|
+
best_sql_types_candidates_per_column_idx, options, *error_handler);
|
147
147
|
|
148
148
|
for (idx_t column_idx = 0; column_idx < best_sql_types_candidates_per_column_idx.size(); column_idx++) {
|
149
149
|
LogicalType d_type = best_sql_types_candidates_per_column_idx[column_idx].back();
|
@@ -153,10 +153,10 @@ SnifferResult CSVSniffer::MinimalSniff() {
|
|
153
153
|
detected_types.push_back(d_type);
|
154
154
|
}
|
155
155
|
|
156
|
-
return {detected_types, names};
|
156
|
+
return {detected_types, names, sniffed_column_counts.result_position > 1};
|
157
157
|
}
|
158
158
|
|
159
|
-
SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
|
159
|
+
SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
|
160
160
|
auto min_sniff_res = MinimalSniff();
|
161
161
|
bool run_full = error_handler->AnyErrors() || detection_error_handler->AnyErrors();
|
162
162
|
// Check if we are happy with the result or if we need to do more sniffing
|
@@ -164,8 +164,7 @@ SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
|
|
164
164
|
// If we got no errors, we also run full if schemas do not match.
|
165
165
|
if (!set_columns.IsSet() && !options.file_options.AnySet()) {
|
166
166
|
string error;
|
167
|
-
run_full =
|
168
|
-
!file_schema.SchemasMatch(error, min_sniff_res.names, min_sniff_res.return_types, options.file_path);
|
167
|
+
run_full = !file_schema.SchemasMatch(error, min_sniff_res, options.file_path, true);
|
169
168
|
}
|
170
169
|
}
|
171
170
|
if (run_full) {
|
@@ -173,14 +172,14 @@ SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
|
|
173
172
|
auto full_sniffer = SniffCSV();
|
174
173
|
if (!set_columns.IsSet() && !options.file_options.AnySet()) {
|
175
174
|
string error;
|
176
|
-
if (!file_schema.SchemasMatch(error, full_sniffer
|
175
|
+
if (!file_schema.SchemasMatch(error, full_sniffer, options.file_path, false) &&
|
177
176
|
!options.ignore_errors.GetValue()) {
|
178
177
|
throw InvalidInputException(error);
|
179
178
|
}
|
180
179
|
}
|
181
180
|
return full_sniffer;
|
182
181
|
}
|
183
|
-
return min_sniff_res;
|
182
|
+
return min_sniff_res.ToSnifferResult();
|
184
183
|
}
|
185
184
|
SnifferResult CSVSniffer::SniffCSV(bool force_match) {
|
186
185
|
buffer_manager->sniffing = true;
|
@@ -228,8 +227,8 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
|
|
228
227
|
if (set_names.size() == names.size()) {
|
229
228
|
for (idx_t i = 0; i < set_columns.Size(); i++) {
|
230
229
|
if (set_names[i] != names[i]) {
|
231
|
-
header_error += "Column at position: " + to_string(i) + " Set name: " + set_names[i] +
|
232
|
-
" Sniffed Name: " + names[i] + "\n";
|
230
|
+
header_error += "Column at position: " + to_string(i) + ", Set name: " + set_names[i] +
|
231
|
+
", Sniffed Name: " + names[i] + "\n";
|
233
232
|
match = false;
|
234
233
|
}
|
235
234
|
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "duckdb/common/shared_ptr.hpp"
|
2
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
2
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
3
3
|
#include "duckdb/main/client_data.hpp"
|
4
4
|
#include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
|
5
5
|
|
@@ -302,6 +302,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
302
302
|
// Whether there are more values (rows) available that are consistent, exceeding the current best.
|
303
303
|
bool more_values = consistent_rows > best_consistent_rows && num_cols >= max_columns_found;
|
304
304
|
|
305
|
+
bool more_columns = consistent_rows == best_consistent_rows && num_cols > max_columns_found;
|
306
|
+
|
305
307
|
// If additional padding is required when compared to the previous padding count.
|
306
308
|
bool require_more_padding = padding_count > prev_padding_count;
|
307
309
|
|
@@ -338,10 +340,10 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
338
340
|
// - There are more values and no additional padding is required.
|
339
341
|
// - There's more than one column and less padding is required.
|
340
342
|
if (rows_consistent &&
|
341
|
-
(single_column_before || (more_values && !require_more_padding) ||
|
343
|
+
(single_column_before || ((more_values || more_columns) && !require_more_padding) ||
|
342
344
|
(more_than_one_column && require_less_padding)) &&
|
343
345
|
!invalid_padding && comments_are_acceptable) {
|
344
|
-
if (!candidates.empty() && set_columns.IsSet() && max_columns_found ==
|
346
|
+
if (!candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size()) {
|
345
347
|
// We have a candidate that fits our requirements better
|
346
348
|
return;
|
347
349
|
}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "duckdb/common/types/cast_helpers.hpp"
|
2
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
2
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
3
3
|
#include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
|
4
4
|
|
5
5
|
#include "utf8proc.hpp"
|
@@ -114,9 +114,9 @@ bool CSVSniffer::DetectHeaderWithSetColumn(ClientContext &context, vector<Header
|
|
114
114
|
return false;
|
115
115
|
}
|
116
116
|
if (best_header_row[i].value != (*set_columns.names)[i]) {
|
117
|
-
error << "Header
|
118
|
-
error << "Expected
|
119
|
-
error << "Actual
|
117
|
+
error << "Header mismatch at position: " << i << "\n";
|
118
|
+
error << "Expected name: \"" << (*set_columns.names)[i] << "\", ";
|
119
|
+
error << "Actual name: \"" << best_header_row[i].value << "\"."
|
120
120
|
<< "\n";
|
121
121
|
has_header = false;
|
122
122
|
break;
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "duckdb/common/operator/integer_cast_operator.hpp"
|
5
5
|
#include "duckdb/common/string.hpp"
|
6
6
|
#include "duckdb/common/types/time.hpp"
|
7
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
7
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
8
8
|
|
9
9
|
namespace duckdb {
|
10
10
|
struct TryCastFloatingOperator {
|
@@ -488,7 +488,7 @@ void CSVSniffer::DetectTypes() {
|
|
488
488
|
if (!best_candidate) {
|
489
489
|
DialectCandidates dialect_candidates(options.dialect_options.state_machine_options);
|
490
490
|
auto error = CSVError::SniffingError(options, dialect_candidates.Print());
|
491
|
-
error_handler->Error(error);
|
491
|
+
error_handler->Error(error, true);
|
492
492
|
}
|
493
493
|
// Assert that it's all good at this point.
|
494
494
|
D_ASSERT(best_candidate && !best_format_candidates.empty());
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
|
2
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
2
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
3
3
|
#include "utf8proc_wrapper.hpp"
|
4
4
|
#include "duckdb/main/error_manager.hpp"
|
5
5
|
#include "duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp"
|
package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
|
2
2
|
#include "duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp"
|
3
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
3
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
4
4
|
|
5
5
|
namespace duckdb {
|
6
6
|
|
@@ -26,10 +26,10 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
|
|
26
26
|
switch (cur_state) {
|
27
27
|
case CSVState::QUOTED:
|
28
28
|
case CSVState::QUOTED_NEW_LINE:
|
29
|
+
case CSVState::ESCAPE:
|
29
30
|
InitializeTransitionArray(transition_array, cur_state, CSVState::QUOTED);
|
30
31
|
break;
|
31
32
|
case CSVState::UNQUOTED:
|
32
|
-
case CSVState::ESCAPE:
|
33
33
|
InitializeTransitionArray(transition_array, cur_state, CSVState::INVALID);
|
34
34
|
break;
|
35
35
|
case CSVState::COMMENT:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include "duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
3
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
4
4
|
#include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
|
5
5
|
#include "duckdb/function/table/read_csv.hpp"
|
6
6
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include "duckdb/execution/operator/csv_scanner/global_csv_state.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
3
|
+
#include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
|
4
4
|
#include "duckdb/execution/operator/csv_scanner/scanner_boundary.hpp"
|
5
5
|
#include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
|
6
6
|
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|