duckdb 1.1.2-dev4.0 → 1.1.2-dev6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +0 -5
  3. package/src/duckdb/extension/icu/third_party/icu/common/rbbiscan.cpp +1 -1
  4. package/src/duckdb/extension/icu/third_party/icu/common/rbbitblb.cpp +1 -1
  5. package/src/duckdb/extension/icu/third_party/icu/common/ucurr.cpp +1 -1
  6. package/src/duckdb/extension/icu/third_party/icu/common/uresbund.cpp +1 -1
  7. package/src/duckdb/extension/icu/third_party/icu/common/uresimp.h +31 -31
  8. package/src/duckdb/extension/icu/third_party/icu/common/ustring.cpp +1 -1
  9. package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
  10. package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +12 -12
  11. package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
  12. package/src/duckdb/extension/icu/third_party/icu/i18n/listformatter.cpp +4 -4
  13. package/src/duckdb/extension/icu/third_party/icu/i18n/number_decimalquantity.h +1 -1
  14. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
  15. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +28 -28
  16. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +7 -7
  17. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucol.h +1 -1
  18. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucoleitr.h +41 -41
  19. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/umsg.h +41 -41
  20. package/src/duckdb/extension/icu/third_party/icu/i18n/usrchimp.h +3 -3
  21. package/src/duckdb/extension/json/include/json_common.hpp +1 -1
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +13 -7
  23. package/src/duckdb/extension/parquet/column_writer.cpp +2 -1
  24. package/src/duckdb/extension/parquet/geo_parquet.cpp +24 -9
  25. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +3 -1
  26. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
  27. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -1
  28. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +0 -4
  29. package/src/duckdb/extension/parquet/parquet_extension.cpp +20 -6
  30. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -2
  31. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
  32. package/src/duckdb/extension/parquet/serialize_parquet.cpp +0 -2
  33. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -1
  34. package/src/duckdb/src/catalog/default/default_functions.cpp +5 -5
  35. package/src/duckdb/src/common/allocator.cpp +3 -2
  36. package/src/duckdb/src/common/arrow/arrow_appender.cpp +1 -0
  37. package/src/duckdb/src/common/arrow/arrow_converter.cpp +11 -0
  38. package/src/duckdb/src/common/arrow/schema_metadata.cpp +6 -4
  39. package/src/duckdb/src/common/enum_util.cpp +33 -0
  40. package/src/duckdb/src/common/exception.cpp +3 -0
  41. package/src/duckdb/src/common/extra_type_info.cpp +1 -44
  42. package/src/duckdb/src/common/field_writer.cpp +97 -0
  43. package/src/duckdb/src/common/render_tree.cpp +7 -5
  44. package/src/duckdb/src/common/row_operations/row_match.cpp +359 -0
  45. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +27 -0
  46. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +36 -0
  47. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  48. package/src/duckdb/src/common/serializer.cpp +24 -0
  49. package/src/duckdb/src/common/sort/comparators.cpp +2 -2
  50. package/src/duckdb/src/common/types/bit.cpp +57 -34
  51. package/src/duckdb/src/common/types/data_chunk.cpp +32 -29
  52. package/src/duckdb/src/common/types/vector_cache.cpp +12 -6
  53. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +14 -0
  54. package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +20 -1
  55. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -2
  56. package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +32 -7
  57. package/src/duckdb/src/core_functions/function_list.cpp +1 -2
  58. package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +23 -5
  59. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +12 -6
  60. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +1 -1
  61. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +4 -3
  62. package/src/duckdb/src/execution/expression_executor/execute_case.cpp +4 -3
  63. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  64. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +3 -2
  65. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -1
  66. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +2 -1
  67. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +3 -2
  68. package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
  69. package/src/duckdb/src/execution/expression_executor.cpp +9 -3
  70. package/src/duckdb/src/execution/expression_executor_state.cpp +11 -9
  71. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +238 -0
  72. package/src/duckdb/src/execution/index/art/plan_art.cpp +94 -0
  73. package/src/duckdb/src/execution/index/index_type_set.cpp +4 -1
  74. package/src/duckdb/src/execution/join_hashtable.cpp +7 -8
  75. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +6 -4
  76. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -4
  77. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +1 -1
  78. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +44 -5
  79. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +28 -24
  80. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +25 -26
  81. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +5 -3
  82. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +4 -4
  83. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +2 -2
  84. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  85. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
  86. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +1 -1
  87. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +2 -2
  88. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +1 -1
  89. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +1 -1
  90. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +73 -27
  91. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
  92. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +695 -0
  93. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1487 -0
  94. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +72 -0
  95. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  96. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +280 -0
  97. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +666 -0
  98. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +14 -4
  99. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +207 -0
  100. package/src/duckdb/src/execution/partitionable_hashtable.cpp +207 -0
  101. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +6 -1
  102. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -4
  103. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +14 -87
  104. package/src/duckdb/src/execution/physical_plan/plan_export.cpp +1 -1
  105. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +1 -1
  106. package/src/duckdb/src/execution/reservoir_sample.cpp +1 -1
  107. package/src/duckdb/src/execution/window_executor.cpp +3 -3
  108. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  109. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -2
  110. package/src/duckdb/src/function/scalar/string/concat.cpp +118 -151
  111. package/src/duckdb/src/function/table/arrow.cpp +13 -0
  112. package/src/duckdb/src/function/table/arrow_conversion.cpp +12 -7
  113. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  114. package/src/duckdb/src/function/table/read_csv.cpp +2 -30
  115. package/src/duckdb/src/function/table/sniff_csv.cpp +2 -1
  116. package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +15 -7
  117. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  118. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/common/atomic.hpp +13 -1
  120. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +3 -4
  121. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  122. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +2 -0
  123. package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
  124. package/src/duckdb/src/include/duckdb/common/extra_type_info/enum_type_info.hpp +53 -0
  125. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +5 -5
  126. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
  127. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +36 -33
  128. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +10 -13
  129. package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/common/types/vector_cache.hpp +7 -5
  131. package/src/duckdb/src/include/duckdb/common/windows_undefs.hpp +2 -1
  132. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +2 -0
  133. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +1 -1
  134. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +0 -6
  135. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +1 -1
  136. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +3 -2
  137. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -0
  138. package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +16 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +4 -4
  140. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +4 -2
  141. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +3 -2
  142. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +91 -36
  143. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp +36 -0
  144. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +1 -1
  145. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +0 -1
  146. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -5
  147. package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
  148. package/src/duckdb/src/include/duckdb/main/database.hpp +5 -0
  149. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  150. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +20 -22
  151. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -9
  152. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +8 -1
  153. package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -0
  154. package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +5 -5
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +15 -5
  156. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
  157. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -0
  158. package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +10 -13
  159. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -0
  160. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -2
  161. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +1 -1
  162. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +0 -2
  163. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -0
  164. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +5 -1
  165. package/src/duckdb/src/include/duckdb.h +2 -2
  166. package/src/duckdb/src/main/appender.cpp +3 -0
  167. package/src/duckdb/src/main/capi/profiling_info-c.cpp +5 -2
  168. package/src/duckdb/src/main/client_context.cpp +8 -2
  169. package/src/duckdb/src/main/connection.cpp +1 -1
  170. package/src/duckdb/src/main/database.cpp +13 -0
  171. package/src/duckdb/src/main/extension/extension_helper.cpp +1 -1
  172. package/src/duckdb/src/main/extension/extension_install.cpp +9 -1
  173. package/src/duckdb/src/main/extension/extension_load.cpp +3 -2
  174. package/src/duckdb/src/main/extension_install_info.cpp +1 -1
  175. package/src/duckdb/src/main/profiling_info.cpp +78 -58
  176. package/src/duckdb/src/main/query_profiler.cpp +79 -89
  177. package/src/duckdb/src/main/relation/read_csv_relation.cpp +1 -1
  178. package/src/duckdb/src/main/secret/secret.cpp +2 -1
  179. package/src/duckdb/src/main/secret/secret_manager.cpp +14 -0
  180. package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +4 -2
  181. package/src/duckdb/src/optimizer/deliminator.cpp +0 -7
  182. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +7 -0
  183. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -1
  184. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +21 -21
  185. package/src/duckdb/src/parallel/task_scheduler.cpp +9 -0
  186. package/src/duckdb/src/parser/parsed_data/exported_table_data.cpp +22 -0
  187. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +3 -0
  188. package/src/duckdb/src/parser/statement/insert_statement.cpp +7 -1
  189. package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +1 -1
  190. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +89 -87
  191. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -2
  192. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +4 -9
  193. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +4 -0
  194. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +2 -2
  195. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -1
  196. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +4 -3
  197. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +13 -3
  198. package/src/duckdb/src/planner/expression_binder.cpp +1 -1
  199. package/src/duckdb/src/planner/operator/logical_export.cpp +28 -0
  200. package/src/duckdb/src/planner/table_binding.cpp +1 -2
  201. package/src/duckdb/src/planner/table_filter.cpp +6 -2
  202. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +2 -1
  203. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  204. package/src/duckdb/src/storage/compression/bitpacking.cpp +7 -3
  205. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
  206. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
  207. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +16 -0
  208. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +29 -0
  209. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +15 -0
  210. package/src/duckdb/src/storage/single_file_block_manager.cpp +2 -1
  211. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +3 -5
  212. package/src/duckdb/src/storage/storage_info.cpp +4 -4
  213. package/src/duckdb/src/storage/table/row_group_collection.cpp +1 -1
  214. package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -1
  215. package/src/duckdb/src/storage/temporary_file_manager.cpp +1 -1
  216. package/src/duckdb/src/transaction/duck_transaction.cpp +15 -14
  217. package/src/duckdb/third_party/brotli/common/brotli_platform.h +1 -1
  218. package/src/duckdb/third_party/brotli/dec/decode.cpp +1 -1
  219. package/src/duckdb/third_party/brotli/enc/memory.cpp +4 -4
  220. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -1
  221. package/src/duckdb/third_party/hyperloglog/sds.cpp +1 -1
  222. package/src/duckdb/third_party/hyperloglog/sds.hpp +1 -1
  223. package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -1
  224. package/src/duckdb/third_party/libpg_query/include/datatype/timestamp.hpp +1 -1
  225. package/src/duckdb/third_party/libpg_query/include/mb/pg_wchar.hpp +1 -1
  226. package/src/duckdb/third_party/libpg_query/include/nodes/bitmapset.hpp +1 -1
  227. package/src/duckdb/third_party/libpg_query/include/nodes/lockoptions.hpp +1 -1
  228. package/src/duckdb/third_party/libpg_query/include/nodes/makefuncs.hpp +1 -1
  229. package/src/duckdb/third_party/libpg_query/include/nodes/pg_list.hpp +1 -1
  230. package/src/duckdb/third_party/libpg_query/include/nodes/value.hpp +1 -1
  231. package/src/duckdb/third_party/libpg_query/include/parser/gramparse.hpp +1 -1
  232. package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -1
  233. package/src/duckdb/third_party/libpg_query/include/parser/scanner.hpp +1 -1
  234. package/src/duckdb/third_party/libpg_query/include/parser/scansup.hpp +1 -1
  235. package/src/duckdb/third_party/libpg_query/include/pg_functions.hpp +1 -1
  236. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +1 -1
  237. package/src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp +1 -1
  238. package/src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp +1 -1
  239. package/src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp +1 -1
  240. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1964 -1964
  241. package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +1 -1
  242. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +1 -1
  243. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +1 -1
  244. package/src/duckdb/third_party/lz4/lz4.cpp +1 -1
  245. package/src/duckdb/third_party/mbedtls/include/des_alt.h +1 -1
  246. package/src/duckdb/third_party/mbedtls/include/mbedtls/aes_alt.h +1 -1
  247. package/src/duckdb/third_party/mbedtls/include/mbedtls/aria_alt.h +1 -1
  248. package/src/duckdb/third_party/mbedtls/include/mbedtls/asn1write.h +1 -1
  249. package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia_alt.h +1 -1
  250. package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm_alt.h +1 -1
  251. package/src/duckdb/third_party/mbedtls/include/mbedtls/chacha20.h +1 -1
  252. package/src/duckdb/third_party/mbedtls/include/mbedtls/chachapoly.h +1 -1
  253. package/src/duckdb/third_party/mbedtls/include/mbedtls/cmac.h +1 -1
  254. package/src/duckdb/third_party/mbedtls/include/mbedtls/config_psa.h +1 -1
  255. package/src/duckdb/third_party/mbedtls/include/mbedtls/ecdsa.h +1 -1
  256. package/src/duckdb/third_party/mbedtls/include/mbedtls/ecp.h +1 -1
  257. package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm_alt.h +1 -1
  258. package/src/duckdb/third_party/mbedtls/include/mbedtls/md5.h +1 -1
  259. package/src/duckdb/third_party/mbedtls/include/mbedtls/nist_kw.h +1 -1
  260. package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs12.h +1 -1
  261. package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs5.h +1 -1
  262. package/src/duckdb/third_party/mbedtls/include/mbedtls/psa_util.h +1 -1
  263. package/src/duckdb/third_party/mbedtls/include/mbedtls/ripemd160.h +1 -1
  264. package/src/duckdb/third_party/mbedtls/include/mbedtls/threading.h +1 -1
  265. package/src/duckdb/third_party/mbedtls/include/mbedtls/timing.h +1 -1
  266. package/src/duckdb/third_party/mbedtls/include/platform_alt.h +1 -1
  267. package/src/duckdb/third_party/mbedtls/include/psa/crypto.h +1 -1
  268. package/src/duckdb/third_party/mbedtls/include/rsa_alt.h +1 -1
  269. package/src/duckdb/third_party/mbedtls/include/sha1_alt.h +1 -1
  270. package/src/duckdb/third_party/mbedtls/include/sha256_alt.h +1 -1
  271. package/src/duckdb/third_party/mbedtls/include/sha512_alt.h +1 -1
  272. package/src/duckdb/third_party/mbedtls/include/ssl_misc.h +1 -1
  273. package/src/duckdb/third_party/mbedtls/library/aesni.h +1 -1
  274. package/src/duckdb/third_party/mbedtls/library/padlock.h +1 -1
  275. package/src/duckdb/third_party/miniz/miniz.cpp +1 -1
  276. package/src/duckdb/third_party/parquet/parquet_types.cpp +1 -1
  277. package/src/duckdb/third_party/parquet/windows_compatibility.h +1 -1
  278. package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
  279. package/src/duckdb/third_party/pcg/pcg_uint128.hpp +1 -1
  280. package/src/duckdb/third_party/skiplist/Node.h +4 -4
  281. package/src/duckdb/third_party/snappy/snappy.cc +1 -1
  282. package/src/duckdb/third_party/snappy/snappy_version.hpp +1 -1
  283. package/src/duckdb/third_party/thrift/thrift/thrift-config.h +1 -1
  284. package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1 -1
  285. package/src/duckdb/third_party/zstd/include/zstd_static.h +1 -1
  286. package/src/duckdb/ub_src_execution_index_art.cpp +2 -0
  287. package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
  288. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -0,0 +1,666 @@
1
+ #include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
2
+
3
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4
+ #include "duckdb/common/file_system.hpp"
5
+ #include "duckdb/common/string_util.hpp"
6
+ #include "duckdb/common/to_string.hpp"
7
+ #include "duckdb/common/types/cast_helpers.hpp"
8
+ #include "duckdb/common/vector_operations/unary_executor.hpp"
9
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
10
+ #include "duckdb/function/scalar/strftime_format.hpp"
11
+ #include "duckdb/main/database.hpp"
12
+ #include "duckdb/parser/column_definition.hpp"
13
+ #include "duckdb/storage/data_table.hpp"
14
+ #include "utf8proc_wrapper.hpp"
15
+ #include "utf8proc.hpp"
16
+ #include "duckdb/parser/keyword_helper.hpp"
17
+ #include "duckdb/function/table/read_csv.hpp"
18
+ #include "duckdb/execution/operator/persistent/csv_line_info.hpp"
19
+
20
+ #include <algorithm>
21
+ #include <cctype>
22
+ #include <cstring>
23
+ #include <fstream>
24
+
25
+ namespace duckdb {
26
+
27
+ ParallelCSVReader::ParallelCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
28
+ unique_ptr<CSVBufferRead> buffer_p, idx_t first_pos_first_buffer_p,
29
+ const vector<LogicalType> &requested_types, idx_t file_idx_p)
30
+ : BaseCSVReader(context, std::move(options_p), requested_types), file_idx(file_idx_p),
31
+ first_pos_first_buffer(first_pos_first_buffer_p) {
32
+ Initialize(requested_types);
33
+ SetBufferRead(std::move(buffer_p));
34
+ if (options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1) {
35
+ throw InternalException("Parallel CSV reader cannot handle CSVs with multi-byte delimiters/escapes/quotes");
36
+ }
37
+ }
38
+
39
+ void ParallelCSVReader::Initialize(const vector<LogicalType> &requested_types) {
40
+ return_types = requested_types;
41
+ InitParseChunk(return_types.size());
42
+ }
43
+
44
+ bool ParallelCSVReader::NewLineDelimiter(bool carry, bool carry_followed_by_nl, bool first_char) {
45
+ // Set the delimiter if not set yet.
46
+ SetNewLineDelimiter(carry, carry_followed_by_nl);
47
+ D_ASSERT(options.new_line == NewLineIdentifier::SINGLE || options.new_line == NewLineIdentifier::CARRY_ON);
48
+ if (options.new_line == NewLineIdentifier::SINGLE) {
49
+ return (!carry) || (carry && !carry_followed_by_nl);
50
+ }
51
+ return (carry && carry_followed_by_nl) || (!carry && first_char);
52
+ }
53
+
54
+ void ParallelCSVReader::SkipEmptyLines() {
55
+ idx_t new_pos_buffer = position_buffer;
56
+ if (parse_chunk.data.size() == 1) {
57
+ // Empty lines are null data.
58
+ return;
59
+ }
60
+ for (; new_pos_buffer < end_buffer; new_pos_buffer++) {
61
+ if (StringUtil::CharacterIsNewline((*buffer)[new_pos_buffer])) {
62
+ bool carrier_return = (*buffer)[new_pos_buffer] == '\r';
63
+ new_pos_buffer++;
64
+ if (carrier_return && new_pos_buffer < buffer_size && (*buffer)[new_pos_buffer] == '\n') {
65
+ position_buffer++;
66
+ }
67
+ if (new_pos_buffer > end_buffer) {
68
+ return;
69
+ }
70
+ position_buffer = new_pos_buffer;
71
+ } else if ((*buffer)[new_pos_buffer] != ' ') {
72
+ return;
73
+ }
74
+ }
75
+ }
76
+
77
+ bool ParallelCSVReader::SetPosition() {
78
+ if (buffer->buffer->IsCSVFileFirstBuffer() && start_buffer == position_buffer &&
79
+ start_buffer == first_pos_first_buffer) {
80
+ start_buffer = buffer->buffer->GetStart();
81
+ position_buffer = start_buffer;
82
+ verification_positions.beginning_of_first_line = position_buffer;
83
+ verification_positions.end_of_last_line = position_buffer;
84
+ // First buffer doesn't need any setting
85
+
86
+ if (options.header) {
87
+ for (; position_buffer < end_buffer; position_buffer++) {
88
+ if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
89
+ bool carrier_return = (*buffer)[position_buffer] == '\r';
90
+ position_buffer++;
91
+ if (carrier_return && position_buffer < buffer_size && (*buffer)[position_buffer] == '\n') {
92
+ position_buffer++;
93
+ }
94
+ if (position_buffer > end_buffer) {
95
+ return false;
96
+ }
97
+ SkipEmptyLines();
98
+ if (verification_positions.beginning_of_first_line == 0) {
99
+ verification_positions.beginning_of_first_line = position_buffer;
100
+ }
101
+
102
+ verification_positions.end_of_last_line = position_buffer;
103
+ return true;
104
+ }
105
+ }
106
+ return false;
107
+ }
108
+ SkipEmptyLines();
109
+ if (verification_positions.beginning_of_first_line == 0) {
110
+ verification_positions.beginning_of_first_line = position_buffer;
111
+ }
112
+
113
+ verification_positions.end_of_last_line = position_buffer;
114
+ return true;
115
+ }
116
+
117
+ // We have to move position up to next new line
118
+ idx_t end_buffer_real = end_buffer;
119
+ // Check if we already start in a valid line
120
+ string error_message;
121
+ bool successfully_read_first_line = false;
122
+ while (!successfully_read_first_line) {
123
+ DataChunk first_line_chunk;
124
+ first_line_chunk.Initialize(allocator, return_types);
125
+ // Ensure that parse_chunk has no gunk when trying to figure new line
126
+ parse_chunk.Reset();
127
+ for (; position_buffer < end_buffer; position_buffer++) {
128
+ if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
129
+ bool carriage_return = (*buffer)[position_buffer] == '\r';
130
+ bool carriage_return_followed = false;
131
+ position_buffer++;
132
+ if (position_buffer < end_buffer) {
133
+ if (carriage_return && (*buffer)[position_buffer] == '\n') {
134
+ carriage_return_followed = true;
135
+ position_buffer++;
136
+ }
137
+ }
138
+ if (NewLineDelimiter(carriage_return, carriage_return_followed, position_buffer - 1 == start_buffer)) {
139
+ break;
140
+ }
141
+ }
142
+ }
143
+ SkipEmptyLines();
144
+
145
+ if (position_buffer > buffer_size) {
146
+ break;
147
+ }
148
+
149
+ if (position_buffer >= end_buffer && !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
150
+ break;
151
+ }
152
+
153
+ if (position_buffer > end_buffer && options.new_line == NewLineIdentifier::CARRY_ON &&
154
+ (*buffer)[position_buffer - 1] == '\n') {
155
+ break;
156
+ }
157
+ idx_t position_set = position_buffer;
158
+ start_buffer = position_buffer;
159
+ // We check if we can add this line
160
+ // disable the projection pushdown while reading the first line
161
+ // otherwise the first line parsing can be influenced by which columns we are reading
162
+ auto column_ids = std::move(reader_data.column_ids);
163
+ auto column_mapping = std::move(reader_data.column_mapping);
164
+ InitializeProjection();
165
+ try {
166
+ successfully_read_first_line = TryParseSimpleCSV(first_line_chunk, error_message, true);
167
+ } catch (...) {
168
+ successfully_read_first_line = false;
169
+ }
170
+ // restore the projection pushdown
171
+ reader_data.column_ids = std::move(column_ids);
172
+ reader_data.column_mapping = std::move(column_mapping);
173
+ end_buffer = end_buffer_real;
174
+ start_buffer = position_set;
175
+ if (position_buffer >= end_buffer) {
176
+ if (successfully_read_first_line) {
177
+ position_buffer = position_set;
178
+ }
179
+ break;
180
+ }
181
+ position_buffer = position_set;
182
+ }
183
+ if (verification_positions.beginning_of_first_line == 0) {
184
+ verification_positions.beginning_of_first_line = position_buffer;
185
+ }
186
+ // Ensure that parse_chunk has no gunk when trying to figure new line
187
+ parse_chunk.Reset();
188
+
189
+ verification_positions.end_of_last_line = position_buffer;
190
+ finished = false;
191
+ return successfully_read_first_line;
192
+ }
193
+
194
+ void ParallelCSVReader::SetBufferRead(unique_ptr<CSVBufferRead> buffer_read_p) {
195
+ if (!buffer_read_p->buffer) {
196
+ throw InternalException("ParallelCSVReader::SetBufferRead - CSVBufferRead does not have a buffer to read");
197
+ }
198
+ position_buffer = buffer_read_p->buffer_start;
199
+ start_buffer = buffer_read_p->buffer_start;
200
+ end_buffer = buffer_read_p->buffer_end;
201
+ if (buffer_read_p->next_buffer) {
202
+ buffer_size = buffer_read_p->buffer->GetBufferSize() + buffer_read_p->next_buffer->GetBufferSize();
203
+ } else {
204
+ buffer_size = buffer_read_p->buffer->GetBufferSize();
205
+ }
206
+ buffer = std::move(buffer_read_p);
207
+
208
+ reached_remainder_state = false;
209
+ verification_positions.beginning_of_first_line = 0;
210
+ verification_positions.end_of_last_line = 0;
211
+ finished = false;
212
+ D_ASSERT(end_buffer <= buffer_size);
213
+ }
214
+
215
+ VerificationPositions ParallelCSVReader::GetVerificationPositions() {
216
+ verification_positions.beginning_of_first_line += buffer->buffer->GetCSVGlobalStart();
217
+ verification_positions.end_of_last_line += buffer->buffer->GetCSVGlobalStart();
218
+ return verification_positions;
219
+ }
220
+
221
+ // If BufferRemainder returns false, it means we are done scanning this buffer and should go to the end_state
222
+ bool ParallelCSVReader::BufferRemainder() {
223
+ if (position_buffer >= end_buffer && !reached_remainder_state) {
224
+ // First time we finish the buffer piece we should scan here, we set the variables
225
+ // to allow this piece to be scanned up to the end of the buffer or the next new line
226
+ reached_remainder_state = true;
227
+ // end_buffer is allowed to go to buffer size to finish its last line
228
+ end_buffer = buffer_size;
229
+ }
230
+ if (position_buffer >= end_buffer) {
231
+ // buffer ends, return false
232
+ return false;
233
+ }
234
+ // we can still scan stuff, return true
235
+ return true;
236
+ }
237
+
238
+ void ParallelCSVReader::VerifyLineLength(idx_t line_size) {
239
+ if (line_size > options.maximum_line_size) {
240
+ throw InvalidInputException("Error in file \"%s\" on line %s: Maximum line size of %llu bytes exceeded!",
241
+ options.file_path,
242
+ GetLineNumberStr(parse_chunk.size(), linenr_estimated, buffer->batch_index).c_str(),
243
+ options.maximum_line_size);
244
+ }
245
+ }
246
+
247
+ bool AllNewLine(string_t value, idx_t column_amount) {
248
+ auto value_str = value.GetString();
249
+ if (value_str.empty() && column_amount == 1) {
250
+ // This is a one column (empty)
251
+ return false;
252
+ }
253
+ for (idx_t i = 0; i < value.GetSize(); i++) {
254
+ if (!StringUtil::CharacterIsNewline(value_str[i])) {
255
+ return false;
256
+ }
257
+ }
258
+ return true;
259
+ }
260
+
261
+ bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message, bool try_add_line) {
262
+ // If line is not set, we have to figure it out, we assume whatever is in the first line
263
+ if (options.new_line == NewLineIdentifier::NOT_SET) {
264
+ idx_t cur_pos = position_buffer;
265
+ // we can start in the middle of a new line, so move a bit forward.
266
+ while (cur_pos < end_buffer) {
267
+ if (StringUtil::CharacterIsNewline((*buffer)[cur_pos])) {
268
+ cur_pos++;
269
+ } else {
270
+ break;
271
+ }
272
+ }
273
+ for (; cur_pos < end_buffer; cur_pos++) {
274
+ if (StringUtil::CharacterIsNewline((*buffer)[cur_pos])) {
275
+ bool carriage_return = (*buffer)[cur_pos] == '\r';
276
+ bool carriage_return_followed = false;
277
+ cur_pos++;
278
+ if (cur_pos < end_buffer) {
279
+ if (carriage_return && (*buffer)[cur_pos] == '\n') {
280
+ carriage_return_followed = true;
281
+ cur_pos++;
282
+ }
283
+ }
284
+ SetNewLineDelimiter(carriage_return, carriage_return_followed);
285
+ break;
286
+ }
287
+ }
288
+ }
289
+ // used for parsing algorithm
290
+ if (start_buffer == buffer_size) {
291
+ // Nothing to read
292
+ finished = true;
293
+ return true;
294
+ }
295
+ D_ASSERT(end_buffer <= buffer_size);
296
+ bool finished_chunk = false;
297
+ idx_t column = 0;
298
+ idx_t offset = 0;
299
+ bool has_quotes = false;
300
+
301
+ vector<idx_t> escape_positions;
302
+ if ((start_buffer == buffer->buffer_start || start_buffer == buffer->buffer_end) && !try_add_line) {
303
+ // First time reading this buffer piece
304
+ if (!SetPosition()) {
305
+ finished = true;
306
+ return true;
307
+ }
308
+ }
309
+ if (position_buffer == buffer_size) {
310
+ // Nothing to read
311
+ finished = true;
312
+ return true;
313
+ }
314
+ // Keep track of line size
315
+ idx_t line_start = position_buffer;
316
+ // start parsing the first value
317
+ goto value_start;
318
+
319
+ value_start : {
320
+ /* state: value_start */
321
+ if (!BufferRemainder()) {
322
+ goto final_state;
323
+ }
324
+ offset = 0;
325
+
326
+ // this state parses the first character of a value
327
+ if ((*buffer)[position_buffer] == options.quote[0]) {
328
+ // quote: actual value starts in the next position
329
+ // move to in_quotes state
330
+ start_buffer = position_buffer + 1;
331
+ goto in_quotes;
332
+ } else {
333
+ // no quote, move to normal parsing state
334
+ start_buffer = position_buffer;
335
+ goto normal;
336
+ }
337
+ };
338
+
339
+ normal : {
340
+ /* state: normal parsing state */
341
+ // this state parses the remainder of a non-quoted value until we reach a delimiter or newline
342
+ for (; position_buffer < end_buffer; position_buffer++) {
343
+ auto c = (*buffer)[position_buffer];
344
+ if (c == options.delimiter[0]) {
345
+ // delimiter: end the value and add it to the chunk
346
+ goto add_value;
347
+ } else if (c == options.quote[0] && try_add_line) {
348
+ return false;
349
+ } else if (StringUtil::CharacterIsNewline(c)) {
350
+ // newline: add row
351
+ if (column > 0 || try_add_line || parse_chunk.data.size() == 1) {
352
+ goto add_row;
353
+ }
354
+ if (column == 0 && position_buffer == start_buffer) {
355
+ start_buffer++;
356
+ }
357
+ }
358
+ }
359
+ if (!BufferRemainder()) {
360
+ goto final_state;
361
+ } else {
362
+ goto normal;
363
+ }
364
+ };
365
+
366
+ add_value : {
367
+ /* state: Add value to string vector */
368
+ AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes,
369
+ buffer->local_batch_index);
370
+ // increase position by 1 and move start to the new position
371
+ offset = 0;
372
+ has_quotes = false;
373
+ start_buffer = ++position_buffer;
374
+ if (!BufferRemainder()) {
375
+ goto final_state;
376
+ }
377
+ goto value_start;
378
+ };
379
+
380
+ add_row : {
381
+ /* state: Add Row to Parse chunk */
382
+ // check type of newline (\r or \n)
383
+ bool carriage_return = (*buffer)[position_buffer] == '\r';
384
+
385
+ AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes,
386
+ buffer->local_batch_index);
387
+ if (try_add_line) {
388
+ bool success = column == insert_chunk.ColumnCount();
389
+ if (success) {
390
+ idx_t cur_linenr = linenr;
391
+ AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
392
+ success = Flush(insert_chunk, buffer->local_batch_index, true);
393
+ linenr = cur_linenr;
394
+ }
395
+ reached_remainder_state = false;
396
+ parse_chunk.Reset();
397
+ return success;
398
+ } else {
399
+ VerifyLineLength(position_buffer - line_start);
400
+ line_start = position_buffer;
401
+ finished_chunk = AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
402
+ }
403
+ // increase position by 1 and move start to the new position
404
+ offset = 0;
405
+ has_quotes = false;
406
+ position_buffer++;
407
+ start_buffer = position_buffer;
408
+ verification_positions.end_of_last_line = position_buffer;
409
+ if (carriage_return) {
410
+ // \r newline, go to special state that parses an optional \n afterwards
411
+ // optionally skips a newline (\n) character, which allows \r\n to be interpreted as a single line
412
+ if (!BufferRemainder()) {
413
+ goto final_state;
414
+ }
415
+ if ((*buffer)[position_buffer] == '\n') {
416
+ if (options.new_line == NewLineIdentifier::SINGLE) {
417
+ error_message = "Wrong NewLine Identifier. Expecting \\r\\n";
418
+ return false;
419
+ }
420
+ // newline after carriage return: skip
421
+ // increase position by 1 and move start to the new position
422
+ start_buffer = ++position_buffer;
423
+
424
+ SkipEmptyLines();
425
+ verification_positions.end_of_last_line = position_buffer;
426
+ start_buffer = position_buffer;
427
+ if (reached_remainder_state) {
428
+ goto final_state;
429
+ }
430
+ } else {
431
+ if (options.new_line == NewLineIdentifier::CARRY_ON) {
432
+ error_message = "Wrong NewLine Identifier. Expecting \\r or \\n";
433
+ return false;
434
+ }
435
+ }
436
+ if (!BufferRemainder()) {
437
+ goto final_state;
438
+ }
439
+ if (reached_remainder_state || finished_chunk) {
440
+ goto final_state;
441
+ }
442
+ goto value_start;
443
+ } else {
444
+ if (options.new_line == NewLineIdentifier::CARRY_ON) {
445
+ error_message = "Wrong NewLine Identifier. Expecting \\r or \\n";
446
+ return false;
447
+ }
448
+ if (reached_remainder_state) {
449
+ goto final_state;
450
+ }
451
+ if (!BufferRemainder()) {
452
+ goto final_state;
453
+ }
454
+ SkipEmptyLines();
455
+ verification_positions.end_of_last_line = position_buffer;
456
+ start_buffer = position_buffer;
457
+ // \n newline, move to value start
458
+ if (finished_chunk) {
459
+ goto final_state;
460
+ }
461
+ goto value_start;
462
+ }
463
+ }
464
+ in_quotes:
465
+ /* state: in_quotes this state parses the remainder of a quoted value*/
466
+ has_quotes = true;
467
+ position_buffer++;
468
+ for (; position_buffer < end_buffer; position_buffer++) {
469
+ auto c = (*buffer)[position_buffer];
470
+ if (c == options.quote[0]) {
471
+ // quote: move to unquoted state
472
+ goto unquote;
473
+ } else if (c == options.escape[0]) {
474
+ // escape: store the escaped position and move to handle_escape state
475
+ escape_positions.push_back(position_buffer - start_buffer);
476
+ goto handle_escape;
477
+ }
478
+ }
479
+ if (!BufferRemainder()) {
480
+ if (buffer->buffer->IsCSVFileLastBuffer()) {
481
+ if (try_add_line) {
482
+ return false;
483
+ }
484
+ // still in quoted state at the end of the file or at the end of a buffer when running multithreaded, error:
485
+ throw InvalidInputException("Error in file \"%s\" on line %s: unterminated quotes. (%s)", options.file_path,
486
+ GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(),
487
+ options.ToString());
488
+ } else {
489
+ goto final_state;
490
+ }
491
+ } else {
492
+ position_buffer--;
493
+ goto in_quotes;
494
+ }
495
+
496
+ unquote : {
497
+ /* state: unquote: this state handles the state directly after we unquote*/
498
+ //
499
+ // in this state we expect either another quote (entering the quoted state again, and escaping the quote)
500
+ // or a delimiter/newline, ending the current value and moving on to the next value
501
+ position_buffer++;
502
+ if (!BufferRemainder()) {
503
+ offset = 1;
504
+ goto final_state;
505
+ }
506
+ auto c = (*buffer)[position_buffer];
507
+ if (c == options.quote[0] && (options.escape.empty() || options.escape[0] == options.quote[0])) {
508
+ // escaped quote, return to quoted state and store escape position
509
+ escape_positions.push_back(position_buffer - start_buffer);
510
+ goto in_quotes;
511
+ } else if (c == options.delimiter[0]) {
512
+ // delimiter, add value
513
+ offset = 1;
514
+ goto add_value;
515
+ } else if (StringUtil::CharacterIsNewline(c)) {
516
+ offset = 1;
517
+ // FIXME: should this be an assertion?
518
+ D_ASSERT(try_add_line || (!try_add_line && column == parse_chunk.ColumnCount() - 1));
519
+ goto add_row;
520
+ } else if (position_buffer >= end_buffer) {
521
+ // reached end of buffer
522
+ offset = 1;
523
+ goto final_state;
524
+ } else {
525
+ error_message = StringUtil::Format(
526
+ "Error in file \"%s\" on line %s: quote should be followed by end of value, end of "
527
+ "row or another quote. (%s). ",
528
+ options.file_path, GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(),
529
+ options.ToString());
530
+ return false;
531
+ }
532
+ }
533
+ handle_escape : {
534
+ /* state: handle_escape */
535
+ // escape should be followed by a quote or another escape character
536
+ position_buffer++;
537
+ if (!BufferRemainder()) {
538
+ goto final_state;
539
+ }
540
+ if (position_buffer >= buffer_size && buffer->buffer->IsCSVFileLastBuffer()) {
541
+ error_message = StringUtil::Format(
542
+ "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path,
543
+ GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(), options.ToString());
544
+ return false;
545
+ }
546
+ if ((*buffer)[position_buffer] != options.quote[0] && (*buffer)[position_buffer] != options.escape[0]) {
547
+ error_message = StringUtil::Format(
548
+ "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path,
549
+ GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(), options.ToString());
550
+ return false;
551
+ }
552
+ // escape was followed by quote or escape, go back to quoted state
553
+ goto in_quotes;
554
+ }
555
+ final_state : {
556
+ /* state: final_stage reached after we finished reading the end_buffer of the csv buffer */
557
+ // reset end buffer
558
+ end_buffer = buffer->buffer_end;
559
+ if (position_buffer == end_buffer) {
560
+ reached_remainder_state = false;
561
+ }
562
+ if (finished_chunk) {
563
+ if (position_buffer >= end_buffer) {
564
+ if (position_buffer == end_buffer && StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1]) &&
565
+ position_buffer < buffer_size) {
566
+ // last position is a new line, we still have to go through one more line of this buffer
567
+ finished = false;
568
+ } else {
569
+ finished = true;
570
+ }
571
+ }
572
+ buffer->lines_read += insert_chunk.size();
573
+ return true;
574
+ }
575
+ // If this is the last buffer, we have to read the last value
576
+ if (buffer->buffer->IsCSVFileLastBuffer() || (buffer->next_buffer && buffer->next_buffer->IsCSVFileLastBuffer())) {
577
+ if (column > 0 || start_buffer != position_buffer || try_add_line ||
578
+ (insert_chunk.data.size() == 1 && start_buffer != position_buffer)) {
579
+ // remaining values to be added to the chunk
580
+ auto str_value = buffer->GetValue(start_buffer, position_buffer, offset);
581
+ if (!AllNewLine(str_value, insert_chunk.data.size()) || offset == 0) {
582
+ AddValue(str_value, column, escape_positions, has_quotes, buffer->local_batch_index);
583
+ if (try_add_line) {
584
+ bool success = column == return_types.size();
585
+ if (success) {
586
+ auto cur_linenr = linenr;
587
+ AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
588
+ success = Flush(insert_chunk, buffer->local_batch_index);
589
+ linenr = cur_linenr;
590
+ }
591
+ parse_chunk.Reset();
592
+ reached_remainder_state = false;
593
+ return success;
594
+ } else {
595
+ VerifyLineLength(position_buffer - line_start);
596
+ line_start = position_buffer;
597
+ AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
598
+ verification_positions.end_of_last_line = position_buffer;
599
+ }
600
+ }
601
+ }
602
+ }
603
+ // flush the parsed chunk and finalize parsing
604
+ if (mode == ParserMode::PARSING) {
605
+ Flush(insert_chunk, buffer->local_batch_index);
606
+ buffer->lines_read += insert_chunk.size();
607
+ }
608
+ if (position_buffer - verification_positions.end_of_last_line > options.buffer_size) {
609
+ error_message = "Line does not fit in one buffer. Increase the buffer size.";
610
+ return false;
611
+ }
612
+ end_buffer = buffer_size;
613
+ SkipEmptyLines();
614
+ end_buffer = buffer->buffer_end;
615
+ verification_positions.end_of_last_line = position_buffer;
616
+ if (position_buffer >= end_buffer) {
617
+ if (position_buffer >= end_buffer) {
618
+ if (position_buffer == end_buffer && StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1]) &&
619
+ position_buffer < buffer_size) {
620
+ // last position is a new line, we still have to go through one more line of this buffer
621
+ finished = false;
622
+ } else {
623
+ finished = true;
624
+ }
625
+ }
626
+ }
627
+ return true;
628
+ };
629
+ }
630
+
631
+ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
632
+ string error_message;
633
+ if (!TryParseCSV(ParserMode::PARSING, insert_chunk, error_message)) {
634
+ throw InvalidInputException(error_message);
635
+ }
636
+ }
637
+
638
+ idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first) {
639
+ while (true) {
640
+ if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
641
+ auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
642
+ return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false, stop_at_first);
643
+ }
644
+ }
645
+ }
646
+
647
+ bool ParallelCSVReader::TryParseCSV(ParserMode mode) {
648
+ DataChunk dummy_chunk;
649
+ string error_message;
650
+ return TryParseCSV(mode, dummy_chunk, error_message);
651
+ }
652
+
653
+ void ParallelCSVReader::ParseCSV(ParserMode mode) {
654
+ DataChunk dummy_chunk;
655
+ string error_message;
656
+ if (!TryParseCSV(mode, dummy_chunk, error_message)) {
657
+ throw InvalidInputException(error_message);
658
+ }
659
+ }
660
+
661
+ bool ParallelCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_chunk, string &error_message) {
662
+ mode = parser_mode;
663
+ return TryParseSimpleCSV(insert_chunk, error_message);
664
+ }
665
+
666
+ } // namespace duckdb