duckdb 1.1.2-dev4.0 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +0 -5
  3. package/src/duckdb/extension/icu/third_party/icu/common/rbbiscan.cpp +1 -1
  4. package/src/duckdb/extension/icu/third_party/icu/common/rbbitblb.cpp +1 -1
  5. package/src/duckdb/extension/icu/third_party/icu/common/ucurr.cpp +1 -1
  6. package/src/duckdb/extension/icu/third_party/icu/common/uresbund.cpp +1 -1
  7. package/src/duckdb/extension/icu/third_party/icu/common/uresimp.h +31 -31
  8. package/src/duckdb/extension/icu/third_party/icu/common/ustring.cpp +1 -1
  9. package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
  10. package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +12 -12
  11. package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
  12. package/src/duckdb/extension/icu/third_party/icu/i18n/listformatter.cpp +4 -4
  13. package/src/duckdb/extension/icu/third_party/icu/i18n/number_decimalquantity.h +1 -1
  14. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
  15. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +28 -28
  16. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +7 -7
  17. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucol.h +1 -1
  18. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucoleitr.h +41 -41
  19. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/umsg.h +41 -41
  20. package/src/duckdb/extension/icu/third_party/icu/i18n/usrchimp.h +3 -3
  21. package/src/duckdb/extension/json/include/json_common.hpp +1 -1
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +13 -7
  23. package/src/duckdb/extension/parquet/column_writer.cpp +2 -1
  24. package/src/duckdb/extension/parquet/geo_parquet.cpp +24 -9
  25. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +3 -1
  26. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
  27. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -1
  28. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +0 -4
  29. package/src/duckdb/extension/parquet/parquet_extension.cpp +20 -6
  30. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -2
  31. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
  32. package/src/duckdb/extension/parquet/serialize_parquet.cpp +0 -2
  33. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -1
  34. package/src/duckdb/src/catalog/default/default_functions.cpp +5 -5
  35. package/src/duckdb/src/common/allocator.cpp +3 -2
  36. package/src/duckdb/src/common/arrow/arrow_appender.cpp +1 -0
  37. package/src/duckdb/src/common/arrow/arrow_converter.cpp +11 -0
  38. package/src/duckdb/src/common/arrow/schema_metadata.cpp +6 -4
  39. package/src/duckdb/src/common/enum_util.cpp +33 -0
  40. package/src/duckdb/src/common/exception.cpp +3 -0
  41. package/src/duckdb/src/common/extra_type_info.cpp +1 -44
  42. package/src/duckdb/src/common/field_writer.cpp +97 -0
  43. package/src/duckdb/src/common/render_tree.cpp +7 -5
  44. package/src/duckdb/src/common/row_operations/row_match.cpp +359 -0
  45. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +27 -0
  46. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +36 -0
  47. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  48. package/src/duckdb/src/common/serializer.cpp +24 -0
  49. package/src/duckdb/src/common/sort/comparators.cpp +2 -2
  50. package/src/duckdb/src/common/types/bit.cpp +57 -34
  51. package/src/duckdb/src/common/types/data_chunk.cpp +32 -29
  52. package/src/duckdb/src/common/types/vector_cache.cpp +12 -6
  53. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +14 -0
  54. package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +20 -1
  55. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -2
  56. package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +32 -7
  57. package/src/duckdb/src/core_functions/function_list.cpp +1 -2
  58. package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +23 -5
  59. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +12 -6
  60. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +1 -1
  61. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +4 -3
  62. package/src/duckdb/src/execution/expression_executor/execute_case.cpp +4 -3
  63. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  64. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +3 -2
  65. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -1
  66. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +2 -1
  67. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +3 -2
  68. package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
  69. package/src/duckdb/src/execution/expression_executor.cpp +9 -3
  70. package/src/duckdb/src/execution/expression_executor_state.cpp +11 -9
  71. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +238 -0
  72. package/src/duckdb/src/execution/index/art/plan_art.cpp +94 -0
  73. package/src/duckdb/src/execution/index/index_type_set.cpp +4 -1
  74. package/src/duckdb/src/execution/join_hashtable.cpp +7 -8
  75. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +6 -4
  76. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -4
  77. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +1 -1
  78. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +44 -5
  79. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +28 -24
  80. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +25 -26
  81. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +5 -3
  82. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +4 -4
  83. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +2 -2
  84. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  85. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
  86. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +1 -1
  87. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +2 -2
  88. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +1 -1
  89. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +1 -1
  90. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +73 -27
  91. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
  92. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +695 -0
  93. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1487 -0
  94. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +72 -0
  95. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  96. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +280 -0
  97. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +666 -0
  98. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +14 -4
  99. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +207 -0
  100. package/src/duckdb/src/execution/partitionable_hashtable.cpp +207 -0
  101. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +6 -1
  102. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -4
  103. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +14 -87
  104. package/src/duckdb/src/execution/physical_plan/plan_export.cpp +1 -1
  105. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +1 -1
  106. package/src/duckdb/src/execution/reservoir_sample.cpp +1 -1
  107. package/src/duckdb/src/execution/window_executor.cpp +3 -3
  108. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  109. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -2
  110. package/src/duckdb/src/function/scalar/string/concat.cpp +118 -151
  111. package/src/duckdb/src/function/table/arrow.cpp +13 -0
  112. package/src/duckdb/src/function/table/arrow_conversion.cpp +12 -7
  113. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  114. package/src/duckdb/src/function/table/read_csv.cpp +2 -30
  115. package/src/duckdb/src/function/table/sniff_csv.cpp +2 -1
  116. package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +15 -7
  117. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  118. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/common/atomic.hpp +13 -1
  120. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +3 -4
  121. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  122. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +2 -0
  123. package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
  124. package/src/duckdb/src/include/duckdb/common/extra_type_info/enum_type_info.hpp +53 -0
  125. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +5 -5
  126. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
  127. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +36 -33
  128. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +10 -13
  129. package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/common/types/vector_cache.hpp +7 -5
  131. package/src/duckdb/src/include/duckdb/common/windows_undefs.hpp +2 -1
  132. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +2 -0
  133. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +1 -1
  134. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +0 -6
  135. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +1 -1
  136. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +3 -2
  137. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -0
  138. package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +16 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +4 -4
  140. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +4 -2
  141. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +3 -2
  142. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +91 -36
  143. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp +36 -0
  144. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +1 -1
  145. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +0 -1
  146. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -5
  147. package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
  148. package/src/duckdb/src/include/duckdb/main/database.hpp +5 -0
  149. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  150. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +20 -22
  151. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -9
  152. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +8 -1
  153. package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -0
  154. package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +5 -5
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +15 -5
  156. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
  157. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -0
  158. package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +10 -13
  159. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -0
  160. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -2
  161. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +1 -1
  162. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +0 -2
  163. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -0
  164. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +5 -1
  165. package/src/duckdb/src/include/duckdb.h +2 -2
  166. package/src/duckdb/src/main/appender.cpp +3 -0
  167. package/src/duckdb/src/main/capi/profiling_info-c.cpp +5 -2
  168. package/src/duckdb/src/main/client_context.cpp +8 -2
  169. package/src/duckdb/src/main/connection.cpp +1 -1
  170. package/src/duckdb/src/main/database.cpp +13 -0
  171. package/src/duckdb/src/main/extension/extension_helper.cpp +1 -1
  172. package/src/duckdb/src/main/extension/extension_install.cpp +9 -1
  173. package/src/duckdb/src/main/extension/extension_load.cpp +3 -2
  174. package/src/duckdb/src/main/extension_install_info.cpp +1 -1
  175. package/src/duckdb/src/main/profiling_info.cpp +78 -58
  176. package/src/duckdb/src/main/query_profiler.cpp +79 -89
  177. package/src/duckdb/src/main/relation/read_csv_relation.cpp +1 -1
  178. package/src/duckdb/src/main/secret/secret.cpp +2 -1
  179. package/src/duckdb/src/main/secret/secret_manager.cpp +14 -0
  180. package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +4 -2
  181. package/src/duckdb/src/optimizer/deliminator.cpp +0 -7
  182. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +7 -0
  183. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -1
  184. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +21 -21
  185. package/src/duckdb/src/parallel/task_scheduler.cpp +9 -0
  186. package/src/duckdb/src/parser/parsed_data/exported_table_data.cpp +22 -0
  187. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +3 -0
  188. package/src/duckdb/src/parser/statement/insert_statement.cpp +7 -1
  189. package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +1 -1
  190. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +89 -87
  191. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -2
  192. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +4 -9
  193. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +4 -0
  194. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +2 -2
  195. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -1
  196. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +4 -3
  197. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +13 -3
  198. package/src/duckdb/src/planner/expression_binder.cpp +1 -1
  199. package/src/duckdb/src/planner/operator/logical_export.cpp +28 -0
  200. package/src/duckdb/src/planner/table_binding.cpp +1 -2
  201. package/src/duckdb/src/planner/table_filter.cpp +6 -2
  202. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +2 -1
  203. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  204. package/src/duckdb/src/storage/compression/bitpacking.cpp +7 -3
  205. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
  206. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
  207. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +16 -0
  208. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +29 -0
  209. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +15 -0
  210. package/src/duckdb/src/storage/single_file_block_manager.cpp +2 -1
  211. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +3 -5
  212. package/src/duckdb/src/storage/storage_info.cpp +4 -4
  213. package/src/duckdb/src/storage/table/row_group_collection.cpp +1 -1
  214. package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -1
  215. package/src/duckdb/src/storage/temporary_file_manager.cpp +1 -1
  216. package/src/duckdb/src/transaction/duck_transaction.cpp +15 -14
  217. package/src/duckdb/third_party/brotli/common/brotli_platform.h +1 -1
  218. package/src/duckdb/third_party/brotli/dec/decode.cpp +1 -1
  219. package/src/duckdb/third_party/brotli/enc/memory.cpp +4 -4
  220. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -1
  221. package/src/duckdb/third_party/hyperloglog/sds.cpp +1 -1
  222. package/src/duckdb/third_party/hyperloglog/sds.hpp +1 -1
  223. package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -1
  224. package/src/duckdb/third_party/libpg_query/include/datatype/timestamp.hpp +1 -1
  225. package/src/duckdb/third_party/libpg_query/include/mb/pg_wchar.hpp +1 -1
  226. package/src/duckdb/third_party/libpg_query/include/nodes/bitmapset.hpp +1 -1
  227. package/src/duckdb/third_party/libpg_query/include/nodes/lockoptions.hpp +1 -1
  228. package/src/duckdb/third_party/libpg_query/include/nodes/makefuncs.hpp +1 -1
  229. package/src/duckdb/third_party/libpg_query/include/nodes/pg_list.hpp +1 -1
  230. package/src/duckdb/third_party/libpg_query/include/nodes/value.hpp +1 -1
  231. package/src/duckdb/third_party/libpg_query/include/parser/gramparse.hpp +1 -1
  232. package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -1
  233. package/src/duckdb/third_party/libpg_query/include/parser/scanner.hpp +1 -1
  234. package/src/duckdb/third_party/libpg_query/include/parser/scansup.hpp +1 -1
  235. package/src/duckdb/third_party/libpg_query/include/pg_functions.hpp +1 -1
  236. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +1 -1
  237. package/src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp +1 -1
  238. package/src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp +1 -1
  239. package/src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp +1 -1
  240. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1964 -1964
  241. package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +1 -1
  242. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +1 -1
  243. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +1 -1
  244. package/src/duckdb/third_party/lz4/lz4.cpp +1 -1
  245. package/src/duckdb/third_party/mbedtls/include/des_alt.h +1 -1
  246. package/src/duckdb/third_party/mbedtls/include/mbedtls/aes_alt.h +1 -1
  247. package/src/duckdb/third_party/mbedtls/include/mbedtls/aria_alt.h +1 -1
  248. package/src/duckdb/third_party/mbedtls/include/mbedtls/asn1write.h +1 -1
  249. package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia_alt.h +1 -1
  250. package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm_alt.h +1 -1
  251. package/src/duckdb/third_party/mbedtls/include/mbedtls/chacha20.h +1 -1
  252. package/src/duckdb/third_party/mbedtls/include/mbedtls/chachapoly.h +1 -1
  253. package/src/duckdb/third_party/mbedtls/include/mbedtls/cmac.h +1 -1
  254. package/src/duckdb/third_party/mbedtls/include/mbedtls/config_psa.h +1 -1
  255. package/src/duckdb/third_party/mbedtls/include/mbedtls/ecdsa.h +1 -1
  256. package/src/duckdb/third_party/mbedtls/include/mbedtls/ecp.h +1 -1
  257. package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm_alt.h +1 -1
  258. package/src/duckdb/third_party/mbedtls/include/mbedtls/md5.h +1 -1
  259. package/src/duckdb/third_party/mbedtls/include/mbedtls/nist_kw.h +1 -1
  260. package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs12.h +1 -1
  261. package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs5.h +1 -1
  262. package/src/duckdb/third_party/mbedtls/include/mbedtls/psa_util.h +1 -1
  263. package/src/duckdb/third_party/mbedtls/include/mbedtls/ripemd160.h +1 -1
  264. package/src/duckdb/third_party/mbedtls/include/mbedtls/threading.h +1 -1
  265. package/src/duckdb/third_party/mbedtls/include/mbedtls/timing.h +1 -1
  266. package/src/duckdb/third_party/mbedtls/include/platform_alt.h +1 -1
  267. package/src/duckdb/third_party/mbedtls/include/psa/crypto.h +1 -1
  268. package/src/duckdb/third_party/mbedtls/include/rsa_alt.h +1 -1
  269. package/src/duckdb/third_party/mbedtls/include/sha1_alt.h +1 -1
  270. package/src/duckdb/third_party/mbedtls/include/sha256_alt.h +1 -1
  271. package/src/duckdb/third_party/mbedtls/include/sha512_alt.h +1 -1
  272. package/src/duckdb/third_party/mbedtls/include/ssl_misc.h +1 -1
  273. package/src/duckdb/third_party/mbedtls/library/aesni.h +1 -1
  274. package/src/duckdb/third_party/mbedtls/library/padlock.h +1 -1
  275. package/src/duckdb/third_party/miniz/miniz.cpp +1 -1
  276. package/src/duckdb/third_party/parquet/parquet_types.cpp +1 -1
  277. package/src/duckdb/third_party/parquet/windows_compatibility.h +1 -1
  278. package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
  279. package/src/duckdb/third_party/pcg/pcg_uint128.hpp +1 -1
  280. package/src/duckdb/third_party/skiplist/Node.h +4 -4
  281. package/src/duckdb/third_party/snappy/snappy.cc +1 -1
  282. package/src/duckdb/third_party/snappy/snappy_version.hpp +1 -1
  283. package/src/duckdb/third_party/thrift/thrift/thrift-config.h +1 -1
  284. package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1 -1
  285. package/src/duckdb/third_party/zstd/include/zstd_static.h +1 -1
  286. package/src/duckdb/ub_src_execution_index_art.cpp +2 -0
  287. package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
  288. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -1,6 +1,6 @@
1
1
  #include "duckdb/execution/operator/csv_scanner/base_scanner.hpp"
2
2
 
3
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
3
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
4
4
  #include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
5
5
 
6
6
  namespace duckdb {
@@ -60,14 +60,53 @@ bool CSVSchema::Empty() const {
60
60
  return columns.empty();
61
61
  }
62
62
 
63
- bool CSVSchema::SchemasMatch(string &error_message, vector<string> &names, vector<LogicalType> &types,
64
- const string &cur_file_path) {
65
- D_ASSERT(names.size() == types.size());
63
+ bool CSVSchema::SchemasMatch(string &error_message, SnifferResult &sniffer_result, const string &cur_file_path,
64
+ bool is_minimal_sniffer) const {
65
+ D_ASSERT(sniffer_result.names.size() == sniffer_result.return_types.size());
66
66
  bool match = true;
67
67
  unordered_map<string, TypeIdxPair> current_schema;
68
- for (idx_t i = 0; i < names.size(); i++) {
68
+
69
+ for (idx_t i = 0; i < sniffer_result.names.size(); i++) {
69
70
  // Populate our little schema
70
- current_schema[names[i]] = {types[i], i};
71
+ current_schema[sniffer_result.names[i]] = {sniffer_result.return_types[i], i};
72
+ }
73
+ if (is_minimal_sniffer) {
74
+ auto min_sniffer = static_cast<AdaptiveSnifferResult &>(sniffer_result);
75
+ if (!min_sniffer.more_than_one_row) {
76
+ bool min_sniff_match = true;
77
+ // If we don't have more than one row, either the names must match or the types must match.
78
+ for (auto &column : columns) {
79
+ if (current_schema.find(column.name) == current_schema.end()) {
80
+ min_sniff_match = false;
81
+ break;
82
+ }
83
+ }
84
+ if (min_sniff_match) {
85
+ return true;
86
+ }
87
+ // Otherwise, the types must match.
88
+ min_sniff_match = true;
89
+ if (sniffer_result.return_types.size() == columns.size()) {
90
+ idx_t return_type_idx = 0;
91
+ for (auto &column : columns) {
92
+ if (column.type != sniffer_result.return_types[return_type_idx++]) {
93
+ min_sniff_match = false;
94
+ break;
95
+ }
96
+ }
97
+ } else {
98
+ min_sniff_match = false;
99
+ }
100
+ if (min_sniff_match) {
101
+ // If we got here, we have the right types but the wrong names, lets fix the names
102
+ idx_t sniff_name_idx = 0;
103
+ for (auto &column : columns) {
104
+ sniffer_result.names[sniff_name_idx++] = column.name;
105
+ }
106
+ return true;
107
+ }
108
+ }
109
+ // If we got to this point, the minimal sniffer doesn't match, we throw an error.
71
110
  }
72
111
  // Here we check if the schema of a given file matched our original schema
73
112
  // We consider it's not a match if:
@@ -258,7 +258,7 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
258
258
  // We check for a weird case, where we ignore an extra value, if it is a null value
259
259
  return;
260
260
  }
261
- validity_mask[chunk_col_id]->SetInvalid(number_of_rows);
261
+ validity_mask[chunk_col_id]->SetInvalid(static_cast<idx_t>(number_of_rows));
262
262
  }
263
263
  cur_col_id++;
264
264
  chunk_col_id++;
@@ -447,7 +447,11 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
447
447
  }
448
448
 
449
449
  DataChunk &StringValueResult::ToChunk() {
450
- parse_chunk.SetCardinality(number_of_rows);
450
+ if (number_of_rows < 0) {
451
+ throw InternalException("CSVScanner: ToChunk() function. Has a negative number of rows, this indicates an "
452
+ "issue with the error handler.");
453
+ }
454
+ parse_chunk.SetCardinality(static_cast<idx_t>(number_of_rows));
451
455
  return parse_chunk;
452
456
  }
453
457
 
@@ -658,7 +662,7 @@ bool LineError::HandleErrors(StringValueResult &result) {
658
662
  result.RemoveLastLine();
659
663
  } else {
660
664
  // Otherwise, we add it to the borked rows to remove it later and just cleanup the column variables.
661
- result.borked_rows.insert(result.number_of_rows);
665
+ result.borked_rows.insert(static_cast<idx_t>(result.number_of_rows));
662
666
  result.cur_col_id = 0;
663
667
  result.chunk_col_id = 0;
664
668
  }
@@ -740,9 +744,9 @@ bool StringValueResult::AddRowInternal() {
740
744
  }
741
745
 
742
746
  if (current_errors.HandleErrors(*this)) {
743
- line_positions_per_row[number_of_rows] = current_line_position;
747
+ line_positions_per_row[static_cast<idx_t>(number_of_rows)] = current_line_position;
744
748
  number_of_rows++;
745
- if (number_of_rows >= result_size) {
749
+ if (static_cast<idx_t>(number_of_rows) >= result_size) {
746
750
  // We have a full chunk
747
751
  return true;
748
752
  }
@@ -769,7 +773,7 @@ bool StringValueResult::AddRowInternal() {
769
773
  if (empty) {
770
774
  static_cast<string_t *>(vector_ptr[chunk_col_id])[number_of_rows] = string_t();
771
775
  } else {
772
- validity_mask[chunk_col_id]->SetInvalid(number_of_rows);
776
+ validity_mask[chunk_col_id]->SetInvalid(static_cast<idx_t>(number_of_rows));
773
777
  }
774
778
  cur_col_id++;
775
779
  chunk_col_id++;
@@ -799,11 +803,11 @@ bool StringValueResult::AddRowInternal() {
799
803
  RemoveLastLine();
800
804
  }
801
805
  }
802
- line_positions_per_row[number_of_rows] = current_line_position;
806
+ line_positions_per_row[static_cast<idx_t>(number_of_rows)] = current_line_position;
803
807
  cur_col_id = 0;
804
808
  chunk_col_id = 0;
805
809
  number_of_rows++;
806
- if (number_of_rows >= result_size) {
810
+ if (static_cast<idx_t>(number_of_rows) >= result_size) {
807
811
  // We have a full chunk
808
812
  return true;
809
813
  }
@@ -861,12 +865,12 @@ bool StringValueResult::EmptyLine(StringValueResult &result, const idx_t buffer_
861
865
  if (empty) {
862
866
  static_cast<string_t *>(result.vector_ptr[0])[result.number_of_rows] = string_t();
863
867
  } else {
864
- result.validity_mask[0]->SetInvalid(result.number_of_rows);
868
+ result.validity_mask[0]->SetInvalid(static_cast<idx_t>(result.number_of_rows));
865
869
  }
866
870
  result.number_of_rows++;
867
871
  }
868
872
  }
869
- if (result.number_of_rows >= result.result_size) {
873
+ if (static_cast<idx_t>(result.number_of_rows) >= result.result_size) {
870
874
  // We have a full chunk
871
875
  return true;
872
876
  }
@@ -1043,15 +1047,15 @@ void StringValueScanner::Flush(DataChunk &insert_chunk) {
1043
1047
  }
1044
1048
  if (!result.borked_rows.empty()) {
1045
1049
  // We must remove the borked lines from our chunk
1046
- SelectionVector succesful_rows(parse_chunk.size());
1050
+ SelectionVector successful_rows(parse_chunk.size());
1047
1051
  idx_t sel_idx = 0;
1048
1052
  for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
1049
1053
  if (result.borked_rows.find(row_idx) == result.borked_rows.end()) {
1050
- succesful_rows.set_index(sel_idx++, row_idx);
1054
+ successful_rows.set_index(sel_idx++, row_idx);
1051
1055
  }
1052
1056
  }
1053
1057
  // Now we slice the result
1054
- insert_chunk.Slice(succesful_rows, sel_idx);
1058
+ insert_chunk.Slice(successful_rows, sel_idx);
1055
1059
  }
1056
1060
  }
1057
1061
 
@@ -1389,7 +1393,7 @@ void StringValueResult::SkipBOM() const {
1389
1393
  void StringValueResult::RemoveLastLine() {
1390
1394
  // potentially de-nullify values
1391
1395
  for (idx_t i = 0; i < chunk_col_id; i++) {
1392
- validity_mask[i]->SetValid(number_of_rows);
1396
+ validity_mask[i]->SetValid(static_cast<idx_t>(number_of_rows));
1393
1397
  }
1394
1398
  // reset column trackers
1395
1399
  cur_col_id = 0;
@@ -1470,10 +1474,6 @@ void StringValueScanner::SetStart() {
1470
1474
  }
1471
1475
  return;
1472
1476
  }
1473
- if (state_machine->options.IgnoreErrors()) {
1474
- // If we are ignoring errors we don't really need to figure out a line.
1475
- return;
1476
- }
1477
1477
  // The result size of the data after skipping the row is one line
1478
1478
  // We have to look for a new line that fits our schema
1479
1479
  // 1. We walk until the next new line
@@ -1524,7 +1524,7 @@ void StringValueScanner::SetStart() {
1524
1524
  }
1525
1525
 
1526
1526
  void StringValueScanner::FinalizeChunkProcess() {
1527
- if (result.number_of_rows >= result.result_size || iterator.done) {
1527
+ if (static_cast<idx_t>(result.number_of_rows) >= result.result_size || iterator.done) {
1528
1528
  // We are done
1529
1529
  if (!sniffing) {
1530
1530
  if (csv_file_scan) {
@@ -1562,14 +1562,18 @@ void StringValueScanner::FinalizeChunkProcess() {
1562
1562
  if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
1563
1563
  has_unterminated_quotes = true;
1564
1564
  }
1565
- result.current_errors.HandleErrors(result);
1565
+ if (result.current_errors.HandleErrors(result)) {
1566
+ result.number_of_rows++;
1567
+ }
1566
1568
  }
1567
1569
  if (states.IsQuotedCurrent() && !has_unterminated_quotes) {
1568
1570
  // If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
1569
1571
  // quotes
1570
1572
  result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
1571
1573
  result.last_position);
1572
- result.current_errors.HandleErrors(result);
1574
+ if (result.current_errors.HandleErrors(result)) {
1575
+ result.number_of_rows++;
1576
+ }
1573
1577
  }
1574
1578
  if (!iterator.done) {
1575
1579
  if (iterator.pos.buffer_pos >= iterator.GetEndPos() || iterator.pos.buffer_idx > iterator.GetBufferIdx() ||
@@ -1580,9 +1584,9 @@ void StringValueScanner::FinalizeChunkProcess() {
1580
1584
  } else {
1581
1585
  // 2) If a boundary is not set
1582
1586
  // We read until the chunk is complete, or we have nothing else to read.
1583
- while (!FinishedFile() && result.number_of_rows < result.result_size) {
1587
+ while (!FinishedFile() && static_cast<idx_t>(result.number_of_rows) < result.result_size) {
1584
1588
  MoveToNextBuffer();
1585
- if (result.number_of_rows >= result.result_size) {
1589
+ if (static_cast<idx_t>(result.number_of_rows) >= result.result_size) {
1586
1590
  return;
1587
1591
  }
1588
1592
  if (cur_buffer_handle) {
@@ -1592,7 +1596,7 @@ void StringValueScanner::FinalizeChunkProcess() {
1592
1596
  iterator.done = FinishedFile();
1593
1597
  if (result.null_padding && result.number_of_rows < STANDARD_VECTOR_SIZE && result.chunk_col_id > 0) {
1594
1598
  while (result.chunk_col_id < result.parse_chunk.ColumnCount()) {
1595
- result.validity_mask[result.chunk_col_id++]->SetInvalid(result.number_of_rows);
1599
+ result.validity_mask[result.chunk_col_id++]->SetInvalid(static_cast<idx_t>(result.number_of_rows));
1596
1600
  result.cur_col_id++;
1597
1601
  }
1598
1602
  result.number_of_rows++;
@@ -1,4 +1,4 @@
1
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
1
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
2
2
  #include "duckdb/common/types/value.hpp"
3
3
 
4
4
  namespace duckdb {
@@ -41,7 +41,7 @@ void MatchAndReplace(CSVOption<T> &original, CSVOption<T> &sniffed, const string
41
41
  // We verify that the user input matches the sniffed value
42
42
  if (original != sniffed) {
43
43
  error += "CSV Sniffer: Sniffer detected value different than the user input for the " + name;
44
- error += " options \n Set: " + original.FormatValue() + " Sniffed: " + sniffed.FormatValue() + "\n";
44
+ error += " options \n Set: " + original.FormatValue() + ", Sniffed: " + sniffed.FormatValue() + "\n";
45
45
  }
46
46
  } else {
47
47
  // We replace the value of original with the sniffed value
@@ -88,15 +88,14 @@ void CSVSniffer::SetResultOptions() {
88
88
  options.dialect_options.rows_until_header = best_candidate->GetStateMachine().dialect_options.rows_until_header;
89
89
  }
90
90
 
91
- SnifferResult CSVSniffer::MinimalSniff() {
91
+ AdaptiveSnifferResult CSVSniffer::MinimalSniff() {
92
92
  if (set_columns.IsSet()) {
93
93
  // Nothing to see here
94
- return SnifferResult(*set_columns.types, *set_columns.names);
94
+ return AdaptiveSnifferResult(*set_columns.types, *set_columns.names, true);
95
95
  }
96
96
  // Return Types detected
97
97
  vector<LogicalType> return_types;
98
98
  // Column Names detected
99
- vector<string> names;
100
99
 
101
100
  buffer_manager->sniffing = true;
102
101
  constexpr idx_t result_size = 2;
@@ -106,7 +105,8 @@ SnifferResult CSVSniffer::MinimalSniff() {
106
105
  ColumnCountScanner count_scanner(buffer_manager, state_machine, error_handler, result_size);
107
106
  auto &sniffed_column_counts = count_scanner.ParseChunk();
108
107
  if (sniffed_column_counts.result_position == 0) {
109
- return {{}, {}};
108
+ // The file is an empty file, we just return
109
+ return {{}, {}, false};
110
110
  }
111
111
 
112
112
  state_machine->dialect_options.num_cols = sniffed_column_counts[0].number_of_columns;
@@ -130,20 +130,20 @@ SnifferResult CSVSniffer::MinimalSniff() {
130
130
 
131
131
  // Possibly Gather Header
132
132
  vector<HeaderValue> potential_header;
133
- if (start_row != 0) {
134
- for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
135
- auto &cur_vector = data_chunk.data[col_idx];
136
- auto vector_data = FlatVector::GetData<string_t>(cur_vector);
137
- auto &validity = FlatVector::Validity(cur_vector);
138
- HeaderValue val;
139
- if (validity.RowIsValid(0)) {
140
- val = HeaderValue(vector_data[0]);
141
- }
142
- potential_header.emplace_back(val);
133
+
134
+ for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
135
+ auto &cur_vector = data_chunk.data[col_idx];
136
+ auto vector_data = FlatVector::GetData<string_t>(cur_vector);
137
+ auto &validity = FlatVector::Validity(cur_vector);
138
+ HeaderValue val;
139
+ if (validity.RowIsValid(0)) {
140
+ val = HeaderValue(vector_data[0]);
143
141
  }
142
+ potential_header.emplace_back(val);
144
143
  }
145
- names = DetectHeaderInternal(buffer_manager->context, potential_header, *state_machine, set_columns,
146
- best_sql_types_candidates_per_column_idx, options, *error_handler);
144
+
145
+ vector<string> names = DetectHeaderInternal(buffer_manager->context, potential_header, *state_machine, set_columns,
146
+ best_sql_types_candidates_per_column_idx, options, *error_handler);
147
147
 
148
148
  for (idx_t column_idx = 0; column_idx < best_sql_types_candidates_per_column_idx.size(); column_idx++) {
149
149
  LogicalType d_type = best_sql_types_candidates_per_column_idx[column_idx].back();
@@ -153,10 +153,10 @@ SnifferResult CSVSniffer::MinimalSniff() {
153
153
  detected_types.push_back(d_type);
154
154
  }
155
155
 
156
- return {detected_types, names};
156
+ return {detected_types, names, sniffed_column_counts.result_position > 1};
157
157
  }
158
158
 
159
- SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
159
+ SnifferResult CSVSniffer::AdaptiveSniff(const CSVSchema &file_schema) {
160
160
  auto min_sniff_res = MinimalSniff();
161
161
  bool run_full = error_handler->AnyErrors() || detection_error_handler->AnyErrors();
162
162
  // Check if we are happy with the result or if we need to do more sniffing
@@ -164,8 +164,7 @@ SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
164
164
  // If we got no errors, we also run full if schemas do not match.
165
165
  if (!set_columns.IsSet() && !options.file_options.AnySet()) {
166
166
  string error;
167
- run_full =
168
- !file_schema.SchemasMatch(error, min_sniff_res.names, min_sniff_res.return_types, options.file_path);
167
+ run_full = !file_schema.SchemasMatch(error, min_sniff_res, options.file_path, true);
169
168
  }
170
169
  }
171
170
  if (run_full) {
@@ -173,14 +172,14 @@ SnifferResult CSVSniffer::AdaptiveSniff(CSVSchema &file_schema) {
173
172
  auto full_sniffer = SniffCSV();
174
173
  if (!set_columns.IsSet() && !options.file_options.AnySet()) {
175
174
  string error;
176
- if (!file_schema.SchemasMatch(error, full_sniffer.names, full_sniffer.return_types, options.file_path) &&
175
+ if (!file_schema.SchemasMatch(error, full_sniffer, options.file_path, false) &&
177
176
  !options.ignore_errors.GetValue()) {
178
177
  throw InvalidInputException(error);
179
178
  }
180
179
  }
181
180
  return full_sniffer;
182
181
  }
183
- return min_sniff_res;
182
+ return min_sniff_res.ToSnifferResult();
184
183
  }
185
184
  SnifferResult CSVSniffer::SniffCSV(bool force_match) {
186
185
  buffer_manager->sniffing = true;
@@ -228,8 +227,8 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
228
227
  if (set_names.size() == names.size()) {
229
228
  for (idx_t i = 0; i < set_columns.Size(); i++) {
230
229
  if (set_names[i] != names[i]) {
231
- header_error += "Column at position: " + to_string(i) + " Set name: " + set_names[i] +
232
- " Sniffed Name: " + names[i] + "\n";
230
+ header_error += "Column at position: " + to_string(i) + ", Set name: " + set_names[i] +
231
+ ", Sniffed Name: " + names[i] + "\n";
233
232
  match = false;
234
233
  }
235
234
  }
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/common/shared_ptr.hpp"
2
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
2
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
3
3
  #include "duckdb/main/client_data.hpp"
4
4
  #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
5
5
 
@@ -302,6 +302,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
302
302
  // Whether there are more values (rows) available that are consistent, exceeding the current best.
303
303
  bool more_values = consistent_rows > best_consistent_rows && num_cols >= max_columns_found;
304
304
 
305
+ bool more_columns = consistent_rows == best_consistent_rows && num_cols > max_columns_found;
306
+
305
307
  // If additional padding is required when compared to the previous padding count.
306
308
  bool require_more_padding = padding_count > prev_padding_count;
307
309
 
@@ -338,10 +340,10 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
338
340
  // - There are more values and no additional padding is required.
339
341
  // - There's more than one column and less padding is required.
340
342
  if (rows_consistent &&
341
- (single_column_before || (more_values && !require_more_padding) ||
343
+ (single_column_before || ((more_values || more_columns) && !require_more_padding) ||
342
344
  (more_than_one_column && require_less_padding)) &&
343
345
  !invalid_padding && comments_are_acceptable) {
344
- if (!candidates.empty() && set_columns.IsSet() && max_columns_found == candidates.size()) {
346
+ if (!candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size()) {
345
347
  // We have a candidate that fits our requirements better
346
348
  return;
347
349
  }
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/common/types/cast_helpers.hpp"
2
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
2
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
3
3
  #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
4
4
 
5
5
  #include "utf8proc.hpp"
@@ -114,9 +114,9 @@ bool CSVSniffer::DetectHeaderWithSetColumn(ClientContext &context, vector<Header
114
114
  return false;
115
115
  }
116
116
  if (best_header_row[i].value != (*set_columns.names)[i]) {
117
- error << "Header Mismatch at position:" << i << "\n";
118
- error << "Expected Name: \"" << (*set_columns.names)[i] << "\".";
119
- error << "Actual Name: \"" << best_header_row[i].value << "\"."
117
+ error << "Header mismatch at position: " << i << "\n";
118
+ error << "Expected name: \"" << (*set_columns.names)[i] << "\", ";
119
+ error << "Actual name: \"" << best_header_row[i].value << "\"."
120
120
  << "\n";
121
121
  has_header = false;
122
122
  break;
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/operator/integer_cast_operator.hpp"
5
5
  #include "duckdb/common/string.hpp"
6
6
  #include "duckdb/common/types/time.hpp"
7
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
7
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
8
8
 
9
9
  namespace duckdb {
10
10
  struct TryCastFloatingOperator {
@@ -488,7 +488,7 @@ void CSVSniffer::DetectTypes() {
488
488
  if (!best_candidate) {
489
489
  DialectCandidates dialect_candidates(options.dialect_options.state_machine_options);
490
490
  auto error = CSVError::SniffingError(options, dialect_candidates.Print());
491
- error_handler->Error(error);
491
+ error_handler->Error(error, true);
492
492
  }
493
493
  // Assert that it's all good at this point.
494
494
  D_ASSERT(best_candidate && !best_format_candidates.empty());
@@ -1,4 +1,4 @@
1
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
1
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
2
2
  #include "duckdb/execution/operator/csv_scanner/csv_casting.hpp"
3
3
 
4
4
  namespace duckdb {
@@ -1,4 +1,4 @@
1
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
1
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
2
2
 
3
3
  namespace duckdb {
4
4
  void CSVSniffer::ReplaceTypes() {
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
2
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
2
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
3
3
  #include "utf8proc_wrapper.hpp"
4
4
  #include "duckdb/main/error_manager.hpp"
5
5
  #include "duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp"
@@ -1,6 +1,6 @@
1
1
  #include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
2
2
  #include "duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp"
3
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
3
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
4
4
 
5
5
  namespace duckdb {
6
6
 
@@ -26,10 +26,10 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
26
26
  switch (cur_state) {
27
27
  case CSVState::QUOTED:
28
28
  case CSVState::QUOTED_NEW_LINE:
29
+ case CSVState::ESCAPE:
29
30
  InitializeTransitionArray(transition_array, cur_state, CSVState::QUOTED);
30
31
  break;
31
32
  case CSVState::UNQUOTED:
32
- case CSVState::ESCAPE:
33
33
  InitializeTransitionArray(transition_array, cur_state, CSVState::INVALID);
34
34
  break;
35
35
  case CSVState::COMMENT:
@@ -1,6 +1,6 @@
1
1
  #include "duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp"
2
2
 
3
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
3
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
4
4
  #include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
5
5
  #include "duckdb/function/table/read_csv.hpp"
6
6
 
@@ -1,6 +1,6 @@
1
1
  #include "duckdb/execution/operator/csv_scanner/global_csv_state.hpp"
2
2
 
3
- #include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
3
+ #include "duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp"
4
4
  #include "duckdb/execution/operator/csv_scanner/scanner_boundary.hpp"
5
5
  #include "duckdb/execution/operator/csv_scanner/skip_scanner.hpp"
6
6
  #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"