duckdb 1.1.4-dev13.0 → 1.1.4-dev14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/core_functions/function_list.cpp +1 -0
  4. package/src/duckdb/extension/core_functions/include/core_functions/scalar/map_functions.hpp +9 -0
  5. package/src/duckdb/extension/core_functions/scalar/date/current.cpp +1 -0
  6. package/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp +2 -2
  7. package/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp +1 -1
  8. package/src/duckdb/extension/core_functions/scalar/list/flatten.cpp +91 -61
  9. package/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp +89 -8
  10. package/src/duckdb/extension/icu/icu-current.cpp +63 -0
  11. package/src/duckdb/extension/icu/icu-makedate.cpp +43 -39
  12. package/src/duckdb/extension/icu/icu-timezone.cpp +63 -63
  13. package/src/duckdb/extension/icu/icu_extension.cpp +2 -0
  14. package/src/duckdb/extension/icu/include/icu-casts.hpp +39 -0
  15. package/src/duckdb/extension/icu/include/icu-current.hpp +17 -0
  16. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  17. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -1
  18. package/src/duckdb/extension/parquet/column_writer.cpp +26 -18
  19. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +0 -6
  20. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +15 -1
  21. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
  22. package/src/duckdb/extension/parquet/parquet_extension.cpp +67 -15
  23. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -3
  24. package/src/duckdb/extension/parquet/parquet_writer.cpp +5 -6
  25. package/src/duckdb/src/catalog/catalog.cpp +21 -8
  26. package/src/duckdb/src/catalog/catalog_search_path.cpp +17 -1
  27. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  28. package/src/duckdb/src/catalog/default/default_functions.cpp +0 -3
  29. package/src/duckdb/src/catalog/dependency_list.cpp +7 -0
  30. package/src/duckdb/src/common/adbc/adbc.cpp +1 -56
  31. package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -2
  32. package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +58 -28
  33. package/src/duckdb/src/common/arrow/schema_metadata.cpp +1 -1
  34. package/src/duckdb/src/common/compressed_file_system.cpp +6 -2
  35. package/src/duckdb/src/common/enum_util.cpp +26 -22
  36. package/src/duckdb/src/common/error_data.cpp +3 -2
  37. package/src/duckdb/src/common/gzip_file_system.cpp +8 -8
  38. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  39. package/src/duckdb/src/common/multi_file_reader.cpp +1 -1
  40. package/src/duckdb/src/common/random_engine.cpp +4 -1
  41. package/src/duckdb/src/common/serializer/memory_stream.cpp +23 -19
  42. package/src/duckdb/src/common/serializer/serializer.cpp +1 -1
  43. package/src/duckdb/src/common/types/bit.cpp +1 -1
  44. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +0 -5
  45. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -1
  46. package/src/duckdb/src/common/types/data_chunk.cpp +2 -1
  47. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +0 -4
  48. package/src/duckdb/src/common/types.cpp +1 -1
  49. package/src/duckdb/src/execution/index/art/art.cpp +52 -42
  50. package/src/duckdb/src/execution/index/art/leaf.cpp +4 -9
  51. package/src/duckdb/src/execution/index/art/node.cpp +13 -13
  52. package/src/duckdb/src/execution/index/art/prefix.cpp +21 -16
  53. package/src/duckdb/src/execution/index/bound_index.cpp +6 -8
  54. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +39 -34
  55. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +2 -1
  56. package/src/duckdb/src/execution/index/unbound_index.cpp +10 -0
  57. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +62 -44
  58. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +26 -0
  59. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +69 -40
  60. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +3 -7
  61. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +11 -5
  62. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +4 -0
  63. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +8 -8
  64. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +36 -12
  65. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +12 -9
  66. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +0 -1
  67. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +29 -1
  68. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -10
  69. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +58 -35
  70. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +2 -1
  71. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +9 -4
  72. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +7 -6
  73. package/src/duckdb/src/function/compression_config.cpp +4 -0
  74. package/src/duckdb/src/function/function_binder.cpp +1 -1
  75. package/src/duckdb/src/function/scalar/system/write_log.cpp +2 -2
  76. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +15 -2
  77. package/src/duckdb/src/function/table/arrow_conversion.cpp +10 -10
  78. package/src/duckdb/src/function/table/copy_csv.cpp +8 -5
  79. package/src/duckdb/src/function/table/read_csv.cpp +21 -4
  80. package/src/duckdb/src/function/table/sniff_csv.cpp +7 -0
  81. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +4 -0
  82. package/src/duckdb/src/function/table/system/duckdb_secret_types.cpp +71 -0
  83. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  84. package/src/duckdb/src/function/table/table_scan.cpp +120 -36
  85. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
  86. package/src/duckdb/src/function/window/window_aggregate_function.cpp +6 -1
  87. package/src/duckdb/src/function/window/window_boundaries_state.cpp +135 -11
  88. package/src/duckdb/src/function/window/window_segment_tree.cpp +50 -22
  89. package/src/duckdb/src/function/window/window_token_tree.cpp +4 -3
  90. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +4 -0
  91. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +2 -0
  92. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +1 -0
  93. package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +4 -2
  94. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -8
  95. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +0 -2
  96. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +8 -3
  97. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +6 -1
  98. package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +25 -0
  99. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +9 -3
  100. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  101. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +11 -14
  102. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +5 -4
  103. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -10
  104. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +6 -5
  105. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +37 -32
  106. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +36 -1
  107. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
  108. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +2 -0
  109. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -5
  110. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +5 -30
  111. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +7 -1
  112. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -3
  113. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +1 -0
  114. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  115. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +2 -2
  116. package/src/duckdb/src/include/duckdb/logging/logger.hpp +40 -119
  117. package/src/duckdb/src/include/duckdb/logging/logging.hpp +0 -2
  118. package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
  119. package/src/duckdb/src/include/duckdb/main/connection.hpp +0 -8
  120. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -1
  121. package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
  122. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +11 -7
  123. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
  124. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +2 -0
  125. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +3 -0
  126. package/src/duckdb/src/include/duckdb/main/settings.hpp +10 -0
  127. package/src/duckdb/src/include/duckdb/parser/constraint.hpp +9 -0
  128. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +36 -9
  129. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +2 -1
  130. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +8 -2
  131. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
  132. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +9 -1
  133. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +1 -0
  134. package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +0 -2
  135. package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +4 -4
  136. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -1
  137. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +14 -10
  138. package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +4 -0
  139. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +6 -1
  140. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +7 -2
  141. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +9 -0
  142. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +2 -0
  143. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +4 -3
  144. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +2 -0
  145. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +6 -4
  146. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +1 -1
  147. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +2 -0
  148. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -0
  149. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +1 -1
  150. package/src/duckdb/src/logging/logger.cpp +8 -66
  151. package/src/duckdb/src/main/attached_database.cpp +3 -1
  152. package/src/duckdb/src/main/client_context.cpp +4 -2
  153. package/src/duckdb/src/main/config.cpp +20 -2
  154. package/src/duckdb/src/main/connection.cpp +2 -29
  155. package/src/duckdb/src/main/connection_manager.cpp +5 -3
  156. package/src/duckdb/src/main/database.cpp +2 -2
  157. package/src/duckdb/src/main/extension/extension_helper.cpp +4 -5
  158. package/src/duckdb/src/main/extension/extension_install.cpp +23 -10
  159. package/src/duckdb/src/main/extension/extension_load.cpp +6 -7
  160. package/src/duckdb/src/main/extension.cpp +27 -9
  161. package/src/duckdb/src/main/secret/secret_manager.cpp +11 -0
  162. package/src/duckdb/src/main/settings/custom_settings.cpp +44 -0
  163. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +6 -0
  164. package/src/duckdb/src/optimizer/filter_combiner.cpp +13 -3
  165. package/src/duckdb/src/optimizer/filter_pushdown.cpp +33 -6
  166. package/src/duckdb/src/optimizer/late_materialization.cpp +14 -3
  167. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +0 -3
  168. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +5 -1
  169. package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +6 -3
  170. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +49 -0
  171. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +1 -0
  172. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +50 -12
  173. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +7 -5
  174. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +1 -0
  175. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
  176. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +12 -2
  177. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +0 -1
  178. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +55 -39
  179. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +2 -1
  180. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +15 -7
  181. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +13 -8
  182. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +8 -3
  183. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +17 -1
  184. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +1 -0
  185. package/src/duckdb/src/planner/filter/conjunction_filter.cpp +1 -0
  186. package/src/duckdb/src/planner/filter/constant_filter.cpp +21 -0
  187. package/src/duckdb/src/planner/filter/in_filter.cpp +4 -7
  188. package/src/duckdb/src/planner/logical_operator.cpp +5 -3
  189. package/src/duckdb/src/planner/planner.cpp +1 -1
  190. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
  191. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +3 -4
  192. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -5
  193. package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +4 -4
  194. package/src/duckdb/src/storage/compression/fsst.cpp +2 -2
  195. package/src/duckdb/src/storage/compression/roaring/common.cpp +10 -1
  196. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +11 -6
  197. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +4 -0
  198. package/src/duckdb/src/storage/compression/zstd.cpp +6 -0
  199. package/src/duckdb/src/storage/data_table.cpp +104 -109
  200. package/src/duckdb/src/storage/local_storage.cpp +8 -6
  201. package/src/duckdb/src/storage/magic_bytes.cpp +1 -1
  202. package/src/duckdb/src/storage/serialization/serialize_dependency.cpp +3 -3
  203. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +3 -3
  204. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +7 -5
  205. package/src/duckdb/src/storage/single_file_block_manager.cpp +95 -28
  206. package/src/duckdb/src/storage/storage_info.cpp +38 -0
  207. package/src/duckdb/src/storage/storage_manager.cpp +11 -0
  208. package/src/duckdb/src/storage/table/column_data.cpp +4 -0
  209. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
  210. package/src/duckdb/src/storage/table/row_group_collection.cpp +67 -68
  211. package/src/duckdb/src/storage/table/table_statistics.cpp +4 -4
  212. package/src/duckdb/src/storage/table_index_list.cpp +41 -15
  213. package/src/duckdb/src/storage/wal_replay.cpp +3 -1
  214. package/src/duckdb/src/storage/write_ahead_log.cpp +11 -4
  215. package/src/duckdb/src/transaction/meta_transaction.cpp +1 -1
  216. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  217. package/src/duckdb/third_party/httplib/httplib.hpp +0 -1
  218. package/src/duckdb/third_party/re2/util/logging.h +10 -10
  219. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
@@ -126,6 +126,10 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
126
126
  SkipBOM();
127
127
  }
128
128
  }
129
+ ignore_empty_values = state_machine.dialect_options.state_machine_options.delimiter.GetValue()[0] != ' ' &&
130
+ state_machine.dialect_options.state_machine_options.quote != ' ' &&
131
+ state_machine.dialect_options.state_machine_options.escape != ' ' &&
132
+ state_machine.dialect_options.state_machine_options.comment != ' ';
129
133
  }
130
134
 
131
135
  StringValueResult::~StringValueResult() {
@@ -148,7 +152,7 @@ inline bool IsValueNull(const char *null_str_ptr, const char *value_ptr, const i
148
152
  }
149
153
 
150
154
  bool StringValueResult::HandleTooManyColumnsError(const char *value_ptr, const idx_t size) {
151
- if (cur_col_id >= number_of_columns) {
155
+ if (cur_col_id >= number_of_columns && state_machine.state_machine_options.strict_mode.GetValue()) {
152
156
  bool error = true;
153
157
  if (cur_col_id == number_of_columns && ((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) {
154
158
  // we make an exception if the first over-value is null
@@ -220,6 +224,9 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
220
224
  return;
221
225
  }
222
226
  if (cur_col_id >= number_of_columns) {
227
+ if (!state_machine.state_machine_options.strict_mode.GetValue()) {
228
+ return;
229
+ }
223
230
  bool error = true;
224
231
  if (cur_col_id == number_of_columns && ((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) {
225
232
  // we make an exception if the first over-value is null
@@ -245,9 +252,9 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
245
252
  }
246
253
 
247
254
  if (((quoted && state_machine.options.allow_quoted_nulls) || !quoted)) {
248
- // Check for the occurrence of escaped null string like \N only if RFC 4180 conformance is disabled
255
+ // Check for the occurrence of escaped null string like \N only if strict_mode is disabled
249
256
  const bool check_unquoted_escaped_null =
250
- state_machine.state_machine_options.rfc_4180.GetValue() == false && escaped && !quoted && size == 1;
257
+ state_machine.state_machine_options.strict_mode.GetValue() == false && escaped && !quoted && size == 1;
251
258
  for (idx_t i = 0; i < null_str_count; i++) {
252
259
  bool is_null = false;
253
260
  if (null_str_size[i] == 2 && null_str_ptr[i][0] == state_machine.state_machine_options.escape.GetValue()) {
@@ -485,19 +492,30 @@ void StringValueResult::Reset() {
485
492
  cur_buffer = buffer_handles[iterator.GetBufferIdx()];
486
493
  }
487
494
  buffer_handles.clear();
495
+ idx_t actual_size = 0;
488
496
  if (cur_buffer) {
489
497
  buffer_handles[cur_buffer->buffer_idx] = cur_buffer;
498
+ actual_size = cur_buffer->actual_size;
490
499
  }
491
500
  current_errors.Reset();
492
501
  borked_rows.clear();
502
+ current_line_position.begin = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, actual_size};
503
+ current_line_position.end = current_line_position.begin;
493
504
  }
494
505
 
495
506
  void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t buffer_pos) {
496
507
  if (!result.unquoted) {
497
508
  result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position);
498
509
  }
499
- AddPossiblyEscapedValue(result, buffer_pos, result.buffer_ptr + result.quoted_position + 1,
500
- buffer_pos - result.quoted_position - 2, buffer_pos < result.last_position.buffer_pos + 2);
510
+ // remove potential empty values
511
+ idx_t length = buffer_pos - result.quoted_position - 1;
512
+ while (length > 0 && result.ignore_empty_values &&
513
+ result.buffer_ptr[result.quoted_position + 1 + length - 1] == ' ') {
514
+ length--;
515
+ }
516
+ length--;
517
+ AddPossiblyEscapedValue(result, buffer_pos, result.buffer_ptr + result.quoted_position + 1, length,
518
+ buffer_pos < result.last_position.buffer_pos + 2);
501
519
  result.quoted = false;
502
520
  }
503
521
 
@@ -511,6 +529,10 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const
511
529
  return;
512
530
  }
513
531
  }
532
+ if (result.cur_col_id >= result.number_of_columns &&
533
+ !result.state_machine.state_machine_options.strict_mode.GetValue()) {
534
+ return;
535
+ }
514
536
  if (!result.HandleTooManyColumnsError(value_ptr, length)) {
515
537
  // If it's an escaped value we have to remove all the escapes, this is not really great
516
538
  // If we are going to escape, this vector must be a varchar vector
@@ -520,7 +542,6 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const
520
542
  // We have to write the cast error message.
521
543
  std::ostringstream error;
522
544
  // Casting Error Message
523
-
524
545
  error << "Could not convert string \"" << std::string(value_ptr, length) << "\" to \'"
525
546
  << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'";
526
547
  auto error_string = error.str();
@@ -533,6 +554,7 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const
533
554
  auto value = StringValueScanner::RemoveEscape(
534
555
  value_ptr, length, result.state_machine.dialect_options.state_machine_options.escape.GetValue(),
535
556
  result.state_machine.dialect_options.state_machine_options.quote.GetValue(),
557
+ result.state_machine.dialect_options.state_machine_options.strict_mode.GetValue(),
536
558
  result.parse_chunk.data[result.chunk_col_id]);
537
559
  result.AddValueToVector(value.GetData(), value.GetSize());
538
560
  }
@@ -806,7 +828,7 @@ bool StringValueResult::AddRowInternal() {
806
828
  quoted_new_line = false;
807
829
  // We need to check if we are getting the correct number of columns here.
808
830
  // If columns are correct, we add it, and that's it.
809
- if (cur_col_id != number_of_columns) {
831
+ if (cur_col_id < number_of_columns) {
810
832
  // We have too few columns:
811
833
  if (null_padding) {
812
834
  while (cur_col_id < number_of_columns) {
@@ -1231,7 +1253,8 @@ void StringValueScanner::ProcessExtraRow() {
1231
1253
  }
1232
1254
  }
1233
1255
 
1234
- string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, Vector &vector) {
1256
+ string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char escape, char quote, bool strict_mode,
1257
+ Vector &vector) {
1235
1258
  // Figure out the exact size
1236
1259
  idx_t str_pos = 0;
1237
1260
  bool just_escaped = false;
@@ -1239,7 +1262,7 @@ string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char e
1239
1262
  if (str_ptr[cur_pos] == escape && !just_escaped) {
1240
1263
  just_escaped = true;
1241
1264
  } else if (str_ptr[cur_pos] == quote) {
1242
- if (just_escaped) {
1265
+ if (just_escaped || !strict_mode) {
1243
1266
  str_pos++;
1244
1267
  }
1245
1268
  just_escaped = false;
@@ -1259,7 +1282,7 @@ string_t StringValueScanner::RemoveEscape(const char *str_ptr, idx_t end, char e
1259
1282
  if (c == escape && !just_escaped) {
1260
1283
  just_escaped = true;
1261
1284
  } else if (str_ptr[cur_pos] == quote) {
1262
- if (just_escaped) {
1285
+ if (just_escaped || !strict_mode) {
1263
1286
  removed_escapes_ptr[str_pos++] = c;
1264
1287
  }
1265
1288
  just_escaped = false;
@@ -1289,10 +1312,8 @@ void StringValueScanner::ProcessOverBufferValue() {
1289
1312
  }
1290
1313
  if (states.NewRow() || states.NewValue()) {
1291
1314
  break;
1292
- } else {
1293
- if (!result.comment) {
1294
- over_buffer_string += previous_buffer[i];
1295
- }
1315
+ } else if (!result.comment) {
1316
+ over_buffer_string += previous_buffer[i];
1296
1317
  }
1297
1318
  if (states.IsQuoted()) {
1298
1319
  result.SetQuoted(result, j);
@@ -1323,16 +1344,13 @@ void StringValueScanner::ProcessOverBufferValue() {
1323
1344
  if (states.EmptyLine()) {
1324
1345
  if (state_machine->dialect_options.num_cols == 1) {
1325
1346
  break;
1326
- } else {
1327
- continue;
1328
1347
  }
1348
+ continue;
1329
1349
  }
1330
1350
  if (states.NewRow() || states.NewValue()) {
1331
1351
  break;
1332
- } else {
1333
- if (!result.comment && !states.IsComment()) {
1334
- over_buffer_string += buffer_handle_ptr[iterator.pos.buffer_pos];
1335
- }
1352
+ } else if (!result.comment && !states.IsComment()) {
1353
+ over_buffer_string += buffer_handle_ptr[iterator.pos.buffer_pos];
1336
1354
  }
1337
1355
  if (states.IsQuoted()) {
1338
1356
  result.SetQuoted(result, j);
@@ -1357,26 +1375,34 @@ void StringValueScanner::ProcessOverBufferValue() {
1357
1375
  }
1358
1376
  if (!skip_value) {
1359
1377
  string_t value;
1360
- if (result.quoted) {
1361
- value = string_t(over_buffer_string.c_str() + result.quoted_position,
1362
- UnsafeNumericCast<uint32_t>(over_buffer_string.size() - 1 - result.quoted_position));
1378
+ if (result.quoted && !result.comment) {
1379
+ idx_t length = over_buffer_string.size() - 1 - result.quoted_position;
1380
+ while (length > 0 && result.ignore_empty_values &&
1381
+ over_buffer_string.c_str()[result.quoted_position + length] == ' ') {
1382
+ length--;
1383
+ }
1384
+ value = string_t(over_buffer_string.c_str() + result.quoted_position, UnsafeNumericCast<uint32_t>(length));
1363
1385
  if (result.escaped) {
1364
1386
  if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) {
1365
1387
  const auto str_ptr = over_buffer_string.c_str() + result.quoted_position;
1366
- value = RemoveEscape(str_ptr, over_buffer_string.size() - 2,
1367
- state_machine->dialect_options.state_machine_options.escape.GetValue(),
1368
- state_machine->dialect_options.state_machine_options.quote.GetValue(),
1369
- result.parse_chunk.data[result.chunk_col_id]);
1388
+ value =
1389
+ RemoveEscape(str_ptr, over_buffer_string.size() - 2,
1390
+ state_machine->dialect_options.state_machine_options.escape.GetValue(),
1391
+ state_machine->dialect_options.state_machine_options.quote.GetValue(),
1392
+ result.state_machine.dialect_options.state_machine_options.strict_mode.GetValue(),
1393
+ result.parse_chunk.data[result.chunk_col_id]);
1370
1394
  }
1371
1395
  }
1372
1396
  } else {
1373
1397
  value = string_t(over_buffer_string.c_str(), UnsafeNumericCast<uint32_t>(over_buffer_string.size()));
1374
1398
  if (result.escaped) {
1375
1399
  if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) {
1376
- value = RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(),
1377
- state_machine->dialect_options.state_machine_options.escape.GetValue(),
1378
- state_machine->dialect_options.state_machine_options.quote.GetValue(),
1379
- result.parse_chunk.data[result.chunk_col_id]);
1400
+ value =
1401
+ RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(),
1402
+ state_machine->dialect_options.state_machine_options.escape.GetValue(),
1403
+ state_machine->dialect_options.state_machine_options.quote.GetValue(),
1404
+ result.state_machine.dialect_options.state_machine_options.strict_mode.GetValue(),
1405
+ result.parse_chunk.data[result.chunk_col_id]);
1380
1406
  }
1381
1407
  }
1382
1408
  }
@@ -1436,7 +1462,7 @@ bool StringValueScanner::MoveToNextBuffer() {
1436
1462
  // This means we reached the end of the file, we must add a last line if there is any to be added
1437
1463
  if (states.EmptyLine() || states.NewRow() || result.added_last_line || states.IsCurrentNewRow() ||
1438
1464
  states.IsNotSet()) {
1439
- if (result.cur_col_id == result.number_of_columns) {
1465
+ if (result.cur_col_id == result.number_of_columns && !result.IsStateCurrent(CSVState::INVALID)) {
1440
1466
  result.number_of_rows++;
1441
1467
  }
1442
1468
  result.cur_col_id = 0;
@@ -1453,7 +1479,7 @@ bool StringValueScanner::MoveToNextBuffer() {
1453
1479
  }
1454
1480
  lines_read++;
1455
1481
  } else if (states.IsQuotedCurrent() &&
1456
- state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) {
1482
+ state_machine->dialect_options.state_machine_options.strict_mode.GetValue()) {
1457
1483
  // Unterminated quote
1458
1484
  LinePosition current_line_start = {iterator.pos.buffer_idx, iterator.pos.buffer_pos,
1459
1485
  result.buffer_size};
@@ -1465,7 +1491,7 @@ bool StringValueScanner::MoveToNextBuffer() {
1465
1491
  result.UnsetComment(result, iterator.pos.buffer_pos);
1466
1492
  } else {
1467
1493
  if (result.quoted && states.IsDelimiterBytes() &&
1468
- state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) {
1494
+ state_machine->dialect_options.state_machine_options.strict_mode.GetValue()) {
1469
1495
  result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
1470
1496
  result.last_position);
1471
1497
  }
@@ -1519,8 +1545,8 @@ bool StringValueScanner::FirstValueEndsOnQuote(CSVIterator iterator) const {
1519
1545
  const idx_t to_pos = iterator.GetEndPos();
1520
1546
  while (iterator.pos.buffer_pos < to_pos) {
1521
1547
  state_machine->Transition(current_state, buffer_handle_ptr[iterator.pos.buffer_pos++]);
1522
- if ((current_state.IsState(CSVState::DELIMITER) || current_state.IsState(CSVState::CARRIAGE_RETURN) ||
1523
- current_state.IsState(CSVState::RECORD_SEPARATOR))) {
1548
+ if (current_state.IsState(CSVState::DELIMITER) || current_state.IsState(CSVState::CARRIAGE_RETURN) ||
1549
+ current_state.IsState(CSVState::RECORD_SEPARATOR)) {
1524
1550
  return buffer_handle_ptr[iterator.pos.buffer_pos - 2] ==
1525
1551
  state_machine->dialect_options.state_machine_options.quote.GetValue();
1526
1552
  }
@@ -1675,9 +1701,9 @@ void StringValueScanner::SetStart() {
1675
1701
  // We need to initialize our strict state machine
1676
1702
  auto &state_machine_cache = CSVStateMachineCache::Get(buffer_manager->context);
1677
1703
  auto state_options = state_machine->state_machine_options;
1678
- // To set the state machine to be strict we ensure that rfc_4180 is set to true
1679
- if (!state_options.rfc_4180.IsSetByUser()) {
1680
- state_options.rfc_4180 = true;
1704
+ // To set the state machine to be strict we ensure that strict_mode is set to true
1705
+ if (!state_options.strict_mode.IsSetByUser()) {
1706
+ state_options.strict_mode = true;
1681
1707
  }
1682
1708
  state_machine_strict =
1683
1709
  make_shared_ptr<CSVStateMachine>(state_machine_cache.Get(state_options), state_machine->options);
@@ -1699,6 +1725,9 @@ void StringValueScanner::SetStart() {
1699
1725
  if (!best_row.is_valid && !quoted_row.is_valid && best_row.start_pos < quoted_row.start_pos) {
1700
1726
  best_row = quoted_row;
1701
1727
  }
1728
+ if (quoted_row.is_valid && quoted_row.start_pos < best_row.start_pos) {
1729
+ best_row = quoted_row;
1730
+ }
1702
1731
  }
1703
1732
  // 3. We are in an escaped value
1704
1733
  if (!best_row.is_valid && state_machine->dialect_options.state_machine_options.escape.GetValue() != '\0' &&
@@ -1794,7 +1823,7 @@ void StringValueScanner::FinalizeChunkProcess() {
1794
1823
  }
1795
1824
  }
1796
1825
  if (states.IsQuotedCurrent() && !found_error &&
1797
- state_machine->dialect_options.state_machine_options.rfc_4180.GetValue()) {
1826
+ state_machine->dialect_options.state_machine_options.strict_mode.GetValue()) {
1798
1827
  // If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
1799
1828
  // quotes
1800
1829
  result.current_errors.Insert(type, result.cur_col_id, result.chunk_col_id, result.last_position);
@@ -156,11 +156,6 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
156
156
  } else {
157
157
  new_line_id = DetectNewLineDelimiter(*buffer_manager);
158
158
  }
159
- // We only sniff RFC 4180 rules, unless manually set by user.
160
- bool rfc_4180 = true;
161
- if (options.dialect_options.state_machine_options.rfc_4180.IsSetByUser()) {
162
- rfc_4180 = options.dialect_options.state_machine_options.rfc_4180.GetValue();
163
- }
164
159
  CSVIterator first_iterator;
165
160
  bool iterator_set = false;
166
161
  for (const auto quote_rule : dialect_candidates.quote_rule_candidates) {
@@ -172,8 +167,9 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
172
167
  for (const auto &escape : escape_candidates) {
173
168
  for (const auto &comment : dialect_candidates.comment_candidates) {
174
169
  D_ASSERT(buffer_manager);
175
- CSVStateMachineOptions state_machine_options(delimiter, quote, escape, comment, new_line_id,
176
- rfc_4180);
170
+ CSVStateMachineOptions state_machine_options(
171
+ delimiter, quote, escape, comment, new_line_id,
172
+ options.dialect_options.state_machine_options.strict_mode.GetValue());
177
173
  auto sniffing_state_machine =
178
174
  make_shared_ptr<CSVStateMachine>(options, state_machine_options, state_machine_cache);
179
175
  if (options.dialect_options.skip_rows.IsSetByUser()) {
@@ -117,9 +117,7 @@ static void ReplaceNames(vector<string> &detected_names, CSVStateMachine &state_
117
117
  detected_names.push_back(GenerateColumnName(options.name_list.size(), col++));
118
118
  best_sql_types_candidates_per_column_idx[i] = {LogicalType::VARCHAR};
119
119
  }
120
-
121
120
  dialect_options.num_cols = options.name_list.size();
122
-
123
121
  } else {
124
122
  // we throw an error
125
123
  const auto error = CSVError::HeaderSniffingError(
@@ -128,8 +126,16 @@ static void ReplaceNames(vector<string> &detected_names, CSVStateMachine &state_
128
126
  error_handler.Error(error);
129
127
  }
130
128
  }
131
- for (idx_t i = 0; i < options.name_list.size(); i++) {
132
- detected_names[i] = options.name_list[i];
129
+ if (options.name_list.size() > detected_names.size()) {
130
+ // we throw an error
131
+ const auto error =
132
+ CSVError::HeaderSniffingError(options, best_header_row, options.name_list.size(),
133
+ state_machine.dialect_options.state_machine_options.delimiter.GetValue());
134
+ error_handler.Error(error);
135
+ } else {
136
+ for (idx_t i = 0; i < options.name_list.size(); i++) {
137
+ detected_names[i] = options.name_list[i];
138
+ }
133
139
  }
134
140
  }
135
141
  }
@@ -335,7 +341,7 @@ void CSVSniffer::DetectHeader() {
335
341
  auto &sniffer_state_machine = best_candidate->GetStateMachine();
336
342
  names = DetectHeaderInternal(buffer_manager->context, best_header_row, sniffer_state_machine, set_columns,
337
343
  best_sql_types_candidates_per_column_idx, options, *error_handler);
338
- if (single_row_file && sniffer_state_machine.dialect_options.header.GetValue()) {
344
+ if (EmptyOrOnlyHeader()) {
339
345
  // This file only contains a header, lets default to the lowest type of all.
340
346
  detected_types.clear();
341
347
  for (idx_t i = 0; i < names.size(); i++) {
@@ -99,6 +99,10 @@ idx_t CSVSniffer::LinesSniffed() const {
99
99
  return lines_sniffed;
100
100
  }
101
101
 
102
+ bool CSVSniffer::EmptyOrOnlyHeader() const {
103
+ return (single_row_file && best_candidate->state_machine->dialect_options.header.GetValue()) || lines_sniffed == 0;
104
+ }
105
+
102
106
  bool CSVSniffer::CanYouCastIt(ClientContext &context, const string_t value, const LogicalType &type,
103
107
  const DialectOptions &dialect_options, const bool is_null, const char decimal_separator) {
104
108
  if (is_null) {
@@ -31,7 +31,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
31
31
  InitializeTransitionArray(transition_array, cur_state, CSVState::QUOTED);
32
32
  break;
33
33
  case CSVState::UNQUOTED:
34
- if (state_machine_options.rfc_4180.GetValue()) {
34
+ if (state_machine_options.strict_mode.GetValue()) {
35
35
  // If we have an unquoted state, following rfc 4180, our base state is invalid
36
36
  InitializeTransitionArray(transition_array, cur_state, CSVState::INVALID);
37
37
  } else {
@@ -58,7 +58,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
58
58
 
59
59
  const bool multi_byte_delimiter = delimiter_value.size() != 1;
60
60
 
61
- const bool enable_unquoted_escape = state_machine_options.rfc_4180.GetValue() == false &&
61
+ const bool enable_unquoted_escape = state_machine_options.strict_mode.GetValue() == false &&
62
62
  state_machine_options.quote != state_machine_options.escape &&
63
63
  state_machine_options.escape != '\0';
64
64
  // Now set values depending on configuration
@@ -75,7 +75,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
75
75
  transition_array[static_cast<uint8_t>('\r')][state] = CSVState::CARRIAGE_RETURN;
76
76
  if (state == static_cast<uint8_t>(CSVState::STANDARD_NEWLINE)) {
77
77
  transition_array[static_cast<uint8_t>('\n')][state] = CSVState::STANDARD;
78
- } else if (!state_machine_options.rfc_4180.GetValue()) {
78
+ } else if (!state_machine_options.strict_mode.GetValue()) {
79
79
  transition_array[static_cast<uint8_t>('\n')][state] = CSVState::RECORD_SEPARATOR;
80
80
  } else {
81
81
  transition_array[static_cast<uint8_t>('\n')][state] = CSVState::INVALID;
@@ -227,7 +227,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
227
227
  if (state_machine_options.quote == state_machine_options.escape) {
228
228
  transition_array[quote][static_cast<uint8_t>(CSVState::UNQUOTED)] = CSVState::QUOTED;
229
229
  }
230
- if (state_machine_options.rfc_4180 == false) {
230
+ if (state_machine_options.strict_mode == false) {
231
231
  if (escape == '\0') {
232
232
  // If escape is defined, it limits a bit how relaxed quotes can be in a reliable way.
233
233
  transition_array[quote][static_cast<uint8_t>(CSVState::UNQUOTED)] = CSVState::MAYBE_QUOTED;
@@ -413,10 +413,10 @@ CSVStateMachineCache::CSVStateMachineCache() {
413
413
  const auto &escape_candidates = default_escape[static_cast<uint8_t>(quote_rule)];
414
414
  for (const auto &escape : escape_candidates) {
415
415
  for (const auto &comment : default_comment) {
416
- for (const bool rfc_4180 : {true, false}) {
417
- Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_N, rfc_4180});
418
- Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_R, rfc_4180});
419
- Insert({delimiter, quote, escape, comment, NewLineIdentifier::CARRY_ON, rfc_4180});
416
+ for (const bool strict_mode : {true, false}) {
417
+ Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_N, strict_mode});
418
+ Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_R, strict_mode});
419
+ Insert({delimiter, quote, escape, comment, NewLineIdentifier::CARRY_ON, strict_mode});
420
420
  }
421
421
  }
422
422
  }
@@ -194,11 +194,18 @@ void CSVErrorHandler::FillRejectsTable(InternalAppender &errors_appender, const
194
194
  errors_appender.Append(Value());
195
195
  break;
196
196
  case CSVErrorType::TOO_FEW_COLUMNS:
197
- D_ASSERT(bind_data.return_names.size() > col_idx + 1);
198
- errors_appender.Append(string_t(bind_data.return_names[col_idx + 1]));
197
+ if (col_idx + 1 < bind_data.return_names.size()) {
198
+ errors_appender.Append(string_t(bind_data.return_names[col_idx + 1]));
199
+ } else {
200
+ errors_appender.Append(Value());
201
+ }
199
202
  break;
200
203
  default:
201
- errors_appender.Append(string_t(bind_data.return_names[col_idx]));
204
+ if (col_idx < bind_data.return_names.size()) {
205
+ errors_appender.Append(string_t(bind_data.return_names[col_idx]));
206
+ } else {
207
+ errors_appender.Append(Value());
208
+ }
202
209
  }
203
210
  // 8. Error Type
204
211
  errors_appender.Append(string_t(CSVErrorTypeToEnum(error.type)));
@@ -321,11 +328,13 @@ CSVError CSVError::InvalidState(const CSVReaderOptions &options, idx_t current_c
321
328
  std::ostringstream error;
322
329
  error << "The CSV Parser state machine reached an invalid state.\nThis can happen when is not possible to parse "
323
330
  "your CSV File with the given options, or the CSV File is not RFC 4180 compliant ";
324
-
325
331
  std::ostringstream how_to_fix_it;
326
- how_to_fix_it << "Possible fixes:" << '\n';
327
- how_to_fix_it << "* Enable scanning files that are not RFC 4180 compliant (rfc_4180=false)." << '\n';
328
-
332
+ if (options.dialect_options.state_machine_options.strict_mode.GetValue()) {
333
+ how_to_fix_it << "Possible fixes:" << '\n';
334
+ how_to_fix_it << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not "
335
+ "comply with the CSV standard."
336
+ << '\n';
337
+ }
329
338
  return CSVError(error.str(), INVALID_STATE, current_column, csv_row, error_info, row_byte_position, byte_position,
330
339
  options, how_to_fix_it.str(), current_path);
331
340
  }
@@ -356,6 +365,11 @@ CSVError CSVError::HeaderSniffingError(const CSVReaderOptions &options, const ve
356
365
 
357
366
  // 3. Suggest how to fix it!
358
367
  error << "Possible fixes:" << '\n';
368
+ if (options.dialect_options.state_machine_options.strict_mode.GetValue()) {
369
+ error << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not comply with "
370
+ "the CSV standard."
371
+ << '\n';
372
+ }
359
373
  // header
360
374
  if (!options.dialect_options.header.IsSetByUser()) {
361
375
  error << "* Set header (header = true) if your CSV has a header, or (header = false) if it doesn't" << '\n';
@@ -395,6 +409,11 @@ CSVError CSVError::SniffingError(const CSVReaderOptions &options, const string &
395
409
  // 3. Suggest how to fix it!
396
410
  error << "Possible fixes:" << '\n';
397
411
  // 3.1 Inform the reader of the dialect
412
+ if (options.dialect_options.state_machine_options.strict_mode.GetValue()) {
413
+ error << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not comply with "
414
+ "the CSV standard."
415
+ << '\n';
416
+ }
398
417
  // delimiter
399
418
  if (!options.dialect_options.state_machine_options.delimiter.IsSetByUser()) {
400
419
  error << "* Set delimiter (e.g., delim=\',\')" << '\n';
@@ -440,11 +459,6 @@ CSVError CSVError::SniffingError(const CSVReaderOptions &options, const string &
440
459
  error << "* Be sure that the maximum line size is set to an appropriate value, otherwise set it (e.g., "
441
460
  "max_line_size=10000000)"
442
461
  << "\n";
443
-
444
- if (options.dialect_options.state_machine_options.rfc_4180.GetValue() != false ||
445
- !options.dialect_options.state_machine_options.rfc_4180.IsSetByUser()) {
446
- error << "* Enable scanning files that are not RFC 4180 compliant (rfc_4180=false). " << '\n';
447
- }
448
462
  return CSVError(error.str(), SNIFFING, {});
449
463
  }
450
464
 
@@ -466,6 +480,11 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_
466
480
  error << "Value with unterminated quote found." << '\n';
467
481
  std::ostringstream how_to_fix_it;
468
482
  how_to_fix_it << "Possible fixes:" << '\n';
483
+ if (options.dialect_options.state_machine_options.strict_mode.GetValue()) {
484
+ how_to_fix_it << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not "
485
+ "comply with the CSV standard."
486
+ << '\n';
487
+ }
469
488
  how_to_fix_it << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
470
489
  how_to_fix_it << "* Set quote to empty or to a different value (e.g., quote=\'\')" << '\n';
471
490
  return CSVError(error.str(), UNTERMINATED_QUOTES, current_column, csv_row, error_info, row_byte_position,
@@ -479,6 +498,11 @@ CSVError CSVError::IncorrectColumnAmountError(const CSVReaderOptions &options, i
479
498
  // We don't have a fix for this
480
499
  std::ostringstream how_to_fix_it;
481
500
  how_to_fix_it << "Possible fixes:" << '\n';
501
+ if (options.dialect_options.state_machine_options.strict_mode.GetValue()) {
502
+ how_to_fix_it << "* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not "
503
+ "comply with the CSV standard."
504
+ << '\n';
505
+ }
482
506
  if (!options.null_padding) {
483
507
  how_to_fix_it << "* Enable null padding (null_padding=true) to replace missing values with NULL" << '\n';
484
508
  }
@@ -189,11 +189,11 @@ void CSVReaderOptions::SetNewline(const string &input) {
189
189
  }
190
190
 
191
191
  bool CSVReaderOptions::GetRFC4180() const {
192
- return this->dialect_options.state_machine_options.rfc_4180.GetValue();
192
+ return this->dialect_options.state_machine_options.strict_mode.GetValue();
193
193
  }
194
194
 
195
195
  void CSVReaderOptions::SetRFC4180(bool input) {
196
- this->dialect_options.state_machine_options.rfc_4180.Set(input);
196
+ this->dialect_options.state_machine_options.strict_mode.Set(input);
197
197
  }
198
198
 
199
199
  bool CSVReaderOptions::IgnoreErrors() const {
@@ -413,7 +413,7 @@ bool CSVReaderOptions::SetBaseOption(const string &loption, const Value &value,
413
413
 
414
414
  } else if (loption == "compression") {
415
415
  SetCompression(ParseString(value, loption));
416
- } else if (loption == "rfc_4180") {
416
+ } else if (loption == "strict_mode") {
417
417
  SetRFC4180(ParseBoolean(value, loption));
418
418
  } else {
419
419
  // unrecognized option in base CSV
@@ -440,7 +440,7 @@ string CSVReaderOptions::ToString(const string &current_file_path) const {
440
440
  auto &escape = dialect_options.state_machine_options.escape;
441
441
  auto &comment = dialect_options.state_machine_options.comment;
442
442
  auto &new_line = dialect_options.state_machine_options.new_line;
443
- auto &rfc_4180 = dialect_options.state_machine_options.rfc_4180;
443
+ auto &strict_mode = dialect_options.state_machine_options.strict_mode;
444
444
  auto &skip_rows = dialect_options.skip_rows;
445
445
 
446
446
  auto &header = dialect_options.header;
@@ -460,8 +460,8 @@ string CSVReaderOptions::ToString(const string &current_file_path) const {
460
460
  error += FormatOptionLine("skip_rows", skip_rows);
461
461
  // comment
462
462
  error += FormatOptionLine("comment", comment);
463
- // rfc_4180
464
- error += FormatOptionLine("rfc_4180", rfc_4180);
463
+ // strict_mode
464
+ error += FormatOptionLine("strict_mode", strict_mode);
465
465
  // date format
466
466
  error += FormatOptionLine("date_format", dialect_options.date_format.at(LogicalType::DATE));
467
467
  // timestamp format
@@ -638,6 +638,9 @@ void CSVReaderOptions::FromNamedParameters(const named_parameter_map_t &in, Clie
638
638
  }
639
639
  auto &children = ListValue::GetChildren(kv.second);
640
640
  for (auto &child : children) {
641
+ if (child.IsNull()) {
642
+ throw BinderException("read_csv %s parameter cannot have a NULL value", kv.first);
643
+ }
641
644
  name_list.push_back(StringValue::Get(child));
642
645
  }
643
646
  for (auto &name : name_list) {
@@ -716,7 +719,7 @@ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) co
716
719
  auto &quote = dialect_options.state_machine_options.quote;
717
720
  auto &escape = dialect_options.state_machine_options.escape;
718
721
  auto &comment = dialect_options.state_machine_options.comment;
719
- auto &rfc_4180 = dialect_options.state_machine_options.rfc_4180;
722
+ auto &strict_mode = dialect_options.state_machine_options.strict_mode;
720
723
  auto &header = dialect_options.header;
721
724
  if (delimiter.IsSetByUser()) {
722
725
  named_params["delim"] = Value(GetDelimiter());
@@ -736,8 +739,8 @@ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) co
736
739
  if (header.IsSetByUser()) {
737
740
  named_params["header"] = Value(GetHeader());
738
741
  }
739
- if (rfc_4180.IsSetByUser()) {
740
- named_params["rfc_4180"] = Value(GetRFC4180());
742
+ if (strict_mode.IsSetByUser()) {
743
+ named_params["strict_mode"] = Value(GetRFC4180());
741
744
  }
742
745
  named_params["max_line_size"] = Value::BIGINT(NumericCast<int64_t>(maximum_line_size.GetValue()));
743
746
  if (dialect_options.skip_rows.IsSetByUser()) {
@@ -638,7 +638,6 @@ void JoinFilterPushdownInfo::PushInFilter(const JoinFilterPushdownFilter &info,
638
638
 
639
639
  // generate the OR filter
640
640
  auto in_filter = make_uniq<InFilter>(std::move(in_list));
641
- in_filter->origin_is_hash_join = true;
642
641
 
643
642
  // we push the OR filter as an OptionalFilter so that we can use it for zonemap pruning only
644
643
  // the IN-list is expensive to execute otherwise
@@ -1,6 +1,8 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_copy_database.hpp"
2
+
2
3
  #include "duckdb/catalog/catalog.hpp"
3
4
  #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
5
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4
6
  #include "duckdb/planner/binder.hpp"
5
7
  #include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
6
8
  #include "duckdb/parser/parsed_data/create_schema_info.hpp"
@@ -9,6 +11,8 @@
9
11
  #include "duckdb/parser/parsed_data/create_type_info.hpp"
10
12
  #include "duckdb/parser/parsed_data/create_view_info.hpp"
11
13
  #include "duckdb/parser/parsed_data/create_index_info.hpp"
14
+ #include "duckdb/execution/index/unbound_index.hpp"
15
+ #include "duckdb/storage/data_table.hpp"
12
16
 
13
17
  namespace duckdb {
14
18
 
@@ -52,7 +56,7 @@ SourceResultType PhysicalCopyDatabase::GetData(ExecutionContext &context, DataCh
52
56
  break;
53
57
  }
54
58
  case CatalogType::INDEX_ENTRY: {
55
- catalog.CreateIndex(context.client, create_info->Cast<CreateIndexInfo>());
59
+ // Skip for now.
56
60
  break;
57
61
  }
58
62
  default:
@@ -60,6 +64,30 @@ SourceResultType PhysicalCopyDatabase::GetData(ExecutionContext &context, DataCh
60
64
  CatalogTypeToString(create_info->type));
61
65
  }
62
66
  }
67
+
68
+ // Create the indexes after table creation.
69
+ for (auto &create_info : info->entries) {
70
+ if (!create_info || create_info->type != CatalogType::INDEX_ENTRY) {
71
+ continue;
72
+ }
73
+ catalog.CreateIndex(context.client, create_info->Cast<CreateIndexInfo>());
74
+
75
+ auto &create_index_info = create_info->Cast<CreateIndexInfo>();
76
+ auto &catalog_table = catalog.GetEntry(context.client, CatalogType::TABLE_ENTRY, create_index_info.schema,
77
+ create_index_info.table);
78
+ auto &table_entry = catalog_table.Cast<TableCatalogEntry>();
79
+ auto &data_table = table_entry.GetStorage();
80
+
81
+ IndexStorageInfo storage_info(create_index_info.index_name);
82
+ storage_info.options.emplace("v1_0_0_storage", false);
83
+ auto unbound_index = make_uniq<UnboundIndex>(create_index_info.Copy(), storage_info,
84
+ data_table.GetTableIOManager(), catalog.GetAttached());
85
+
86
+ data_table.AddIndex(std::move(unbound_index));
87
+ auto &data_table_info = *data_table.GetDataTableInfo();
88
+ data_table_info.GetIndexes().InitializeIndexes(context.client, data_table_info);
89
+ }
90
+
63
91
  return SourceResultType::FINISHED;
64
92
  }
65
93