duckdb 1.3.1-dev6.0 → 1.3.2-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp +27 -39
  3. package/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp +2 -3
  4. package/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp +1 -1
  5. package/src/duckdb/extension/core_functions/lambda_functions.cpp +16 -14
  6. package/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp +3 -2
  7. package/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp +46 -10
  8. package/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp +3 -2
  9. package/src/duckdb/extension/core_functions/scalar/random/random.cpp +3 -1
  10. package/src/duckdb/extension/icu/icu-datefunc.cpp +5 -3
  11. package/src/duckdb/extension/icu/icu-strptime.cpp +6 -1
  12. package/src/duckdb/extension/icu/icu-timezone.cpp +4 -0
  13. package/src/duckdb/extension/icu/icu_extension.cpp +7 -2
  14. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +1 -1
  15. package/src/duckdb/extension/icu/include/icu-helpers.hpp +1 -1
  16. package/src/duckdb/extension/icu/third_party/icu/common/uloc.cpp +5 -5
  17. package/src/duckdb/extension/json/include/json_common.hpp +19 -0
  18. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -4
  19. package/src/duckdb/extension/json/include/json_functions.hpp +4 -4
  20. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +38 -17
  21. package/src/duckdb/extension/json/json_functions/json_table_in_out.cpp +11 -7
  22. package/src/duckdb/extension/json/json_functions.cpp +4 -4
  23. package/src/duckdb/extension/json/json_reader.cpp +1 -1
  24. package/src/duckdb/extension/parquet/column_reader.cpp +7 -1
  25. package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +2 -2
  26. package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +2 -2
  27. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +2 -1
  28. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +1 -1
  29. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +3 -0
  30. package/src/duckdb/extension/parquet/include/writer/parquet_write_operators.hpp +3 -1
  31. package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +1 -1
  32. package/src/duckdb/extension/parquet/parquet_crypto.cpp +9 -5
  33. package/src/duckdb/extension/parquet/parquet_extension.cpp +26 -0
  34. package/src/duckdb/extension/parquet/parquet_float16.cpp +4 -2
  35. package/src/duckdb/extension/parquet/parquet_metadata.cpp +3 -3
  36. package/src/duckdb/extension/parquet/parquet_multi_file_info.cpp +12 -0
  37. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -4
  38. package/src/duckdb/extension/parquet/parquet_statistics.cpp +13 -3
  39. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
  40. package/src/duckdb/extension/parquet/reader/decimal_column_reader.cpp +1 -1
  41. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
  42. package/src/duckdb/extension/parquet/reader/struct_column_reader.cpp +13 -4
  43. package/src/duckdb/extension/parquet/serialize_parquet.cpp +2 -0
  44. package/src/duckdb/src/catalog/catalog.cpp +10 -4
  45. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +4 -10
  46. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -2
  47. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +2 -2
  49. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +1 -1
  50. package/src/duckdb/src/catalog/catalog_search_path.cpp +7 -1
  51. package/src/duckdb/src/catalog/catalog_set.cpp +21 -1
  52. package/src/duckdb/src/common/adbc/adbc.cpp +1 -1
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -5
  54. package/src/duckdb/src/common/arrow/arrow_converter.cpp +23 -15
  55. package/src/duckdb/src/common/box_renderer.cpp +1 -2
  56. package/src/duckdb/src/common/enum_util.cpp +4 -3
  57. package/src/duckdb/src/common/local_file_system.cpp +13 -12
  58. package/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp +35 -12
  59. package/src/duckdb/src/common/multi_file/multi_file_reader.cpp +13 -3
  60. package/src/duckdb/src/common/string_util.cpp +7 -5
  61. package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +4 -4
  62. package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +4 -4
  63. package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +4 -4
  64. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +4 -4
  65. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +1 -1
  66. package/src/duckdb/src/common/types/uuid.cpp +5 -1
  67. package/src/duckdb/src/common/types.cpp +28 -0
  68. package/src/duckdb/src/common/virtual_file_system.cpp +5 -0
  69. package/src/duckdb/src/execution/column_binding_resolver.cpp +49 -30
  70. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +4 -0
  71. package/src/duckdb/src/execution/join_hashtable.cpp +10 -7
  72. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
  73. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +1 -1
  74. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +2 -1
  75. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +1 -4
  76. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +53 -1
  77. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +58 -59
  78. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +10 -5
  79. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +4 -0
  80. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +18 -8
  81. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +1 -1
  82. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -0
  83. package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -5
  84. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +2 -1
  85. package/src/duckdb/src/function/function.cpp +4 -0
  86. package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +6 -0
  87. package/src/duckdb/src/function/scalar/struct/remap_struct.cpp +10 -1
  88. package/src/duckdb/src/function/table/copy_csv.cpp +1 -0
  89. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  90. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
  91. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
  93. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +2 -0
  94. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -2
  95. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -9
  96. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_column_mapper.hpp +3 -5
  98. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp +7 -0
  99. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp +3 -0
  100. package/src/duckdb/src/include/duckdb/common/shadow_forbidden_functions.hpp +40 -0
  101. package/src/duckdb/src/include/duckdb/common/string.hpp +25 -2
  102. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +20 -24
  103. package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +20 -24
  104. package/src/duckdb/src/include/duckdb/common/types.hpp +3 -0
  105. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +34 -8
  106. package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +3 -2
  108. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
  109. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  110. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +15 -3
  111. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +2 -2
  112. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +7 -3
  113. package/src/duckdb/src/include/duckdb/function/function.hpp +1 -0
  114. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
  115. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +20 -12
  116. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +4 -3
  117. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -1
  118. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +17 -0
  119. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -0
  120. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +22 -6
  121. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  122. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -1
  123. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +27 -13
  124. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +1 -0
  125. package/src/duckdb/src/include/duckdb/main/settings.hpp +11 -0
  126. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +7 -1
  127. package/src/duckdb/src/include/duckdb/original/std/locale.hpp +10 -0
  128. package/src/duckdb/src/include/duckdb/original/std/memory.hpp +12 -0
  129. package/src/duckdb/src/include/duckdb/original/std/sstream.hpp +11 -0
  130. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +5 -3
  131. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +4 -2
  132. package/src/duckdb/src/logging/log_manager.cpp +1 -0
  133. package/src/duckdb/src/logging/log_types.cpp +40 -0
  134. package/src/duckdb/src/main/attached_database.cpp +4 -0
  135. package/src/duckdb/src/main/client_context.cpp +1 -0
  136. package/src/duckdb/src/main/config.cpp +1 -0
  137. package/src/duckdb/src/main/database.cpp +1 -0
  138. package/src/duckdb/src/main/database_manager.cpp +19 -2
  139. package/src/duckdb/src/main/extension/extension_helper.cpp +4 -3
  140. package/src/duckdb/src/main/query_profiler.cpp +2 -2
  141. package/src/duckdb/src/main/query_result.cpp +1 -1
  142. package/src/duckdb/src/main/secret/secret_manager.cpp +2 -0
  143. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +7 -0
  144. package/src/duckdb/src/main/settings/custom_settings.cpp +106 -34
  145. package/src/duckdb/src/optimizer/optimizer.cpp +1 -1
  146. package/src/duckdb/src/optimizer/topn_optimizer.cpp +18 -8
  147. package/src/duckdb/src/parallel/executor.cpp +5 -0
  148. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +1 -1
  149. package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +5 -1
  150. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +21 -24
  151. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -8
  152. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +3 -2
  153. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +0 -4
  154. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +3 -0
  155. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +3 -0
  156. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +3 -0
  157. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
  158. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +0 -1
  159. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
  160. package/src/duckdb/src/planner/expression_binder.cpp +4 -2
  161. package/src/duckdb/src/planner/logical_operator.cpp +2 -1
  162. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +4 -1
  163. package/src/duckdb/src/storage/buffer/block_handle.cpp +8 -0
  164. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +44 -18
  165. package/src/duckdb/src/storage/caching_file_system.cpp +7 -7
  166. package/src/duckdb/src/storage/standard_buffer_manager.cpp +4 -3
  167. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  168. package/src/duckdb/src/storage/wal_replay.cpp +9 -4
  169. package/src/duckdb/third_party/fmt/include/fmt/format.h +8 -1
  170. package/src/duckdb/third_party/fsst/libfsst.cpp +4 -3
  171. package/src/duckdb/third_party/httplib/httplib.hpp +25 -22
  172. package/src/duckdb/third_party/hyperloglog/sds.cpp +7 -3
  173. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +8 -1
  174. package/src/duckdb/third_party/re2/re2/filtered_re2.h +8 -2
  175. package/src/duckdb/third_party/re2/re2/pod_array.h +7 -1
  176. package/src/duckdb/third_party/re2/re2/re2.cc +6 -2
  177. package/src/duckdb/third_party/re2/re2/set.cc +1 -1
  178. package/src/duckdb/third_party/re2/re2/set.h +7 -1
  179. package/src/duckdb/ub_src_logging.cpp +4 -4
@@ -173,13 +173,13 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
173
173
  }
174
174
 
175
175
  // Returns true if a comment is acceptable
176
- bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool comment_set_by_user) {
177
- if (comment_set_by_user) {
176
+ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, const CSVReaderOptions &options) {
177
+ if (options.dialect_options.state_machine_options.comment.IsSetByUser()) {
178
178
  return true;
179
179
  }
180
180
  // For a comment to be acceptable, we want 3/5th's the majority of unmatched in the columns
181
181
  constexpr double min_majority = 0.6;
182
- // detected comments, are all lines that started with a comment character.
182
+ // detected comments are all lines that started with a comment character.
183
183
  double detected_comments = 0;
184
184
  // If at least one comment is a full line comment
185
185
  bool has_full_line_comment = false;
@@ -192,7 +192,9 @@ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool
192
192
  has_full_line_comment = true;
193
193
  valid_comments++;
194
194
  }
195
- if (result.column_counts[i].number_of_columns == num_cols && result.column_counts[i].is_mid_comment) {
195
+ if ((result.column_counts[i].number_of_columns == num_cols ||
196
+ (result.column_counts[i].number_of_columns <= num_cols && options.null_padding)) &&
197
+ result.column_counts[i].is_mid_comment) {
196
198
  valid_comments++;
197
199
  }
198
200
  }
@@ -212,13 +214,13 @@ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool
212
214
  return valid_comments / detected_comments >= min_majority;
213
215
  }
214
216
 
215
- void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner, idx_t &rows_read,
216
- idx_t &best_consistent_rows, idx_t &prev_padding_count,
217
- idx_t &min_ignored_rows) {
217
+ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner, CandidateStats &stats,
218
+ vector<unique_ptr<ColumnCountScanner>> &successful_candidates) {
218
219
  // The sniffed_column_counts variable keeps track of the number of columns found for each row
219
220
  auto &sniffed_column_counts = scanner->ParseChunk();
220
221
  idx_t dirty_notes = 0;
221
222
  idx_t dirty_notes_minus_comments = 0;
223
+ idx_t empty_lines = 0;
222
224
  if (sniffed_column_counts.error) {
223
225
  if (!scanner->error_handler->HasError(MAXIMUM_LINE_SIZE)) {
224
226
  all_fail_max_line_size = false;
@@ -232,7 +234,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
232
234
  idx_t consistent_rows = 0;
233
235
  idx_t num_cols = sniffed_column_counts.result_position == 0 ? 1 : sniffed_column_counts[0].number_of_columns;
234
236
  const bool ignore_errors = options.ignore_errors.GetValue();
235
- // If we are ignoring errors and not null_padding , we pick the most frequent number of columns as the right one
237
+ // If we are ignoring errors and not null_padding, we pick the most frequent number of columns as the right one
236
238
  const bool use_most_frequent_columns = ignore_errors && !options.null_padding;
237
239
  if (use_most_frequent_columns) {
238
240
  num_cols = sniffed_column_counts.GetMostFrequentColumnCount();
@@ -242,8 +244,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
242
244
  idx_t ignored_rows = 0;
243
245
  const bool allow_padding = options.null_padding;
244
246
  bool first_valid = false;
245
- if (sniffed_column_counts.result_position > rows_read) {
246
- rows_read = sniffed_column_counts.result_position;
247
+ if (sniffed_column_counts.result_position > stats.rows_read) {
248
+ stats.rows_read = sniffed_column_counts.result_position;
247
249
  }
248
250
  if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, ignore_errors,
249
251
  sniffed_column_counts[0].last_value_always_empty)) {
@@ -279,9 +281,10 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
279
281
  sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
280
282
  }
281
283
  padding_count = 0;
282
- // we use the maximum amount of num_cols that we find
284
+ // we use the maximum number of num_cols that we find
283
285
  num_cols = sniffed_column_counts[row].number_of_columns;
284
- dirty_notes = row;
286
+ dirty_notes = row + sniffed_column_counts[row].empty_lines;
287
+ empty_lines = sniffed_column_counts[row].empty_lines;
285
288
  dirty_notes_minus_comments = dirty_notes - comment_rows;
286
289
  header_idx = row;
287
290
  consistent_rows = 1;
@@ -289,7 +292,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
289
292
  if (!first_valid) {
290
293
  first_valid = true;
291
294
  sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
292
- dirty_notes = row;
295
+ dirty_notes = row + sniffed_column_counts[row].empty_lines;
296
+ empty_lines = sniffed_column_counts[row].empty_lines;
293
297
  dirty_notes_minus_comments = dirty_notes - comment_rows;
294
298
  num_cols = sniffed_column_counts[row].number_of_columns;
295
299
  }
@@ -311,24 +315,26 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
311
315
  consistent_rows += padding_count;
312
316
 
313
317
  // Whether there are more values (rows) available that are consistent, exceeding the current best.
314
- const bool more_values = consistent_rows > best_consistent_rows && num_cols >= max_columns_found;
318
+ const bool more_values = consistent_rows > stats.best_consistent_rows && num_cols >= max_columns_found;
315
319
 
316
- const bool more_columns = consistent_rows == best_consistent_rows && num_cols > max_columns_found;
320
+ const bool more_columns = consistent_rows == stats.best_consistent_rows && num_cols > max_columns_found;
317
321
 
318
322
  // If additional padding is required when compared to the previous padding count.
319
- const bool require_more_padding = padding_count > prev_padding_count;
323
+ const bool require_more_padding = padding_count > stats.prev_padding_count;
320
324
 
321
325
  // If less padding is now required when compared to the previous padding count.
322
- const bool require_less_padding = padding_count < prev_padding_count;
326
+ const bool require_less_padding = padding_count < stats.prev_padding_count;
323
327
 
324
328
  // If there was only a single column before, and the new number of columns exceeds that.
325
- const bool single_column_before = max_columns_found < 2 && num_cols > max_columns_found * candidates.size();
329
+ const bool single_column_before =
330
+ max_columns_found < 2 && num_cols > max_columns_found * successful_candidates.size();
326
331
 
327
332
  // If the number of rows is consistent with the calculated value after accounting for skipped rows and the
328
333
  // start row.
329
- const bool rows_consistent =
330
- consistent_rows + (dirty_notes_minus_comments - options.dialect_options.skip_rows.GetValue()) + comment_rows ==
331
- sniffed_column_counts.result_position - options.dialect_options.skip_rows.GetValue();
334
+ const bool rows_consistent = consistent_rows +
335
+ (dirty_notes_minus_comments - options.dialect_options.skip_rows.GetValue()) +
336
+ comment_rows - empty_lines ==
337
+ sniffed_column_counts.result_position - options.dialect_options.skip_rows.GetValue();
332
338
  // If there are more than one consistent row.
333
339
  const bool more_than_one_row = consistent_rows > 1;
334
340
 
@@ -336,14 +342,14 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
336
342
  const bool more_than_one_column = num_cols > 1;
337
343
 
338
344
  // If the start position is valid.
339
- const bool start_good = !candidates.empty() &&
340
- dirty_notes <= candidates.front()->GetStateMachine().dialect_options.skip_rows.GetValue();
345
+ const bool start_good =
346
+ !successful_candidates.empty() &&
347
+ dirty_notes <= successful_candidates.front()->GetStateMachine().dialect_options.skip_rows.GetValue();
341
348
 
342
349
  // If padding happened but it is not allowed.
343
350
  const bool invalid_padding = !allow_padding && padding_count > 0;
344
351
 
345
- const bool comments_are_acceptable = AreCommentsAcceptable(
346
- sniffed_column_counts, num_cols, options.dialect_options.state_machine_options.comment.IsSetByUser());
352
+ const bool comments_are_acceptable = AreCommentsAcceptable(sniffed_column_counts, num_cols, options);
347
353
 
348
354
  const bool quoted =
349
355
  scanner->ever_quoted &&
@@ -360,44 +366,44 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
360
366
  // If rows are consistent and no invalid padding happens, this is the best suitable candidate if one of the
361
367
  // following is valid:
362
368
  // - There's a single column before.
363
- // - There are more values and no additional padding is required.
369
+ // - There are more values, and no additional padding is required.
364
370
  // - There's more than one column and less padding is required.
365
371
  if (columns_match_set && (rows_consistent || (set_columns.IsSet() && ignore_errors)) &&
366
372
  (single_column_before || ((more_values || more_columns) && !require_more_padding) ||
367
- (more_than_one_column && require_less_padding) || quoted) &&
373
+ (more_than_one_column && require_less_padding) || (quoted && comment_rows == 0)) &&
368
374
  !invalid_padding && comments_are_acceptable) {
369
- if (!candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size() &&
370
- consistent_rows <= best_consistent_rows) {
375
+ if (!successful_candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size() &&
376
+ consistent_rows <= stats.best_consistent_rows) {
371
377
  // We have a candidate that fits our requirements better
372
- if (candidates.front()->ever_quoted || !scanner->ever_quoted) {
378
+ if (successful_candidates.front()->ever_quoted || !scanner->ever_quoted) {
373
379
  return;
374
380
  }
375
381
  }
376
382
  auto &sniffing_state_machine = scanner->GetStateMachine();
377
383
 
378
- if (!candidates.empty() && candidates.front()->ever_quoted) {
384
+ if (!successful_candidates.empty() && successful_candidates.front()->ever_quoted) {
379
385
  // Give preference to quoted boys.
380
386
  if (!scanner->ever_quoted) {
381
387
  return;
382
388
  } else {
383
389
  // Give preference to one that got escaped
384
- if (!scanner->ever_escaped && candidates.front()->ever_escaped &&
390
+ if (!scanner->ever_escaped && successful_candidates.front()->ever_escaped &&
385
391
  sniffing_state_machine.dialect_options.state_machine_options.strict_mode.GetValue()) {
386
392
  return;
387
393
  }
388
- if (best_consistent_rows == consistent_rows && num_cols >= max_columns_found) {
394
+ if (stats.best_consistent_rows == consistent_rows && num_cols >= max_columns_found) {
389
395
  // If both have not been escaped, this might get solved later on.
390
396
  sniffing_state_machine.dialect_options.num_cols = num_cols;
391
- candidates.emplace_back(std::move(scanner));
397
+ successful_candidates.emplace_back(std::move(scanner));
392
398
  max_columns_found = num_cols;
393
399
  return;
394
400
  }
395
401
  }
396
402
  }
397
- if (max_columns_found == num_cols && (ignored_rows > min_ignored_rows)) {
403
+ if (max_columns_found == num_cols && (ignored_rows > stats.min_ignored_rows)) {
398
404
  return;
399
405
  }
400
- if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < best_consistent_rows / 2 &&
406
+ if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < stats.best_consistent_rows / 2 &&
401
407
  (options.null_padding || ignore_errors)) {
402
408
  // When null_padding is true, we only give preference to a max number of columns if null padding is at least
403
409
  // 50% as consistent as the best case scenario
@@ -406,39 +412,40 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
406
412
  if (quoted && num_cols < max_columns_found) {
407
413
  if (scanner->ever_escaped &&
408
414
  sniffing_state_machine.dialect_options.state_machine_options.strict_mode.GetValue()) {
409
- for (auto &candidate : candidates) {
415
+ for (auto &candidate : successful_candidates) {
410
416
  if (candidate->ever_quoted && candidate->ever_escaped) {
411
417
  return;
412
418
  }
413
419
  }
414
420
 
415
421
  } else {
416
- for (auto &candidate : candidates) {
422
+ for (auto &candidate : successful_candidates) {
417
423
  if (candidate->ever_quoted) {
418
424
  return;
419
425
  }
420
426
  }
421
427
  }
422
428
  }
423
- best_consistent_rows = consistent_rows;
429
+ stats.best_consistent_rows = consistent_rows;
424
430
  max_columns_found = num_cols;
425
- prev_padding_count = padding_count;
426
- min_ignored_rows = ignored_rows;
431
+ stats.prev_padding_count = padding_count;
432
+ stats.min_ignored_rows = ignored_rows;
427
433
 
428
434
  if (options.dialect_options.skip_rows.IsSetByUser()) {
429
- // If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
435
+ // If skip rows are set by the user, and we found dirty notes, we only accept it if either null_padding or
430
436
  // ignore_errors is set we have comments
431
- if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue() && comment_rows == 0) {
437
+ if (dirty_notes - empty_lines != 0 && !options.null_padding && !options.ignore_errors.GetValue() &&
438
+ comment_rows == 0) {
432
439
  return;
433
440
  }
434
441
  sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
435
442
  } else if (!options.null_padding) {
436
443
  sniffing_state_machine.dialect_options.skip_rows = dirty_notes_minus_comments;
437
444
  }
438
- candidates.clear();
445
+ successful_candidates.clear();
439
446
  sniffing_state_machine.dialect_options.num_cols = num_cols;
440
447
  lines_sniffed = sniffed_column_counts.result_position;
441
- candidates.emplace_back(std::move(scanner));
448
+ successful_candidates.emplace_back(std::move(scanner));
442
449
  return;
443
450
  }
444
451
  // If there's more than one row and column, the start is good, rows are consistent,
@@ -449,7 +456,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
449
456
  auto &sniffing_state_machine = scanner->GetStateMachine();
450
457
 
451
458
  if (options.dialect_options.skip_rows.IsSetByUser()) {
452
- // If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
459
+ // If skip rows are set by the user, and we found dirty notes, we only accept it if either null_padding or
453
460
  // ignore_errors is set
454
461
  if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue()) {
455
462
  return;
@@ -460,7 +467,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
460
467
  }
461
468
  sniffing_state_machine.dialect_options.num_cols = num_cols;
462
469
  lines_sniffed = sniffed_column_counts.result_position;
463
- candidates.emplace_back(std::move(scanner));
470
+ successful_candidates.emplace_back(std::move(scanner));
464
471
  }
465
472
  }
466
473
 
@@ -481,8 +488,8 @@ bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) const {
481
488
  }
482
489
 
483
490
  void CSVSniffer::RefineCandidates() {
484
- // It's very frequent that more than one dialect can parse a csv file, hence here we run one state machine
485
- // fully on the whole sample dataset, when/if it fails we go to the next one.
491
+ // It's very frequent that more than one dialect can parse a csv file; hence here we run one state machine
492
+ // fully on the whole sample dataset, when/if it fails, we go to the next one.
486
493
  if (candidates.empty()) {
487
494
  // No candidates to refine
488
495
  return;
@@ -587,22 +594,14 @@ NewLineIdentifier CSVSniffer::DetectNewLineDelimiter(CSVBufferManager &buffer_ma
587
594
  void CSVSniffer::DetectDialect() {
588
595
  // Variables for Dialect Detection
589
596
  DialectCandidates dialect_candidates(options.dialect_options.state_machine_options);
590
- // Number of rows read
591
- idx_t rows_read = 0;
592
- // Best Number of consistent rows (i.e., presenting all columns)
593
- idx_t best_consistent_rows = 0;
594
- // If padding was necessary (i.e., rows are missing some columns, how many)
595
- idx_t prev_padding_count = 0;
596
- // Min number of ignores rows
597
- idx_t best_ignored_rows = 0;
597
+ CandidateStats stats;
598
598
  // Vector of CSV State Machines
599
599
  vector<unique_ptr<ColumnCountScanner>> csv_state_machines;
600
600
  // Step 1: Generate state machines
601
601
  GenerateStateMachineSearchSpace(csv_state_machines, dialect_candidates);
602
602
  // Step 2: Analyze all candidates on the first chunk
603
603
  for (auto &state_machine : csv_state_machines) {
604
- AnalyzeDialectCandidate(std::move(state_machine), rows_read, best_consistent_rows, prev_padding_count,
605
- best_ignored_rows);
604
+ AnalyzeDialectCandidate(std::move(state_machine), stats, candidates);
606
605
  }
607
606
  // Step 3: Loop over candidates and find if they can still produce good results for the remaining chunks
608
607
  RefineCandidates();
@@ -17,14 +17,11 @@
17
17
  #include "duckdb/parallel/thread_context.hpp"
18
18
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
19
19
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
20
- #include "duckdb/planner/filter/conjunction_filter.hpp"
21
20
  #include "duckdb/planner/filter/constant_filter.hpp"
22
21
  #include "duckdb/planner/filter/in_filter.hpp"
23
- #include "duckdb/planner/filter/null_filter.hpp"
24
22
  #include "duckdb/planner/filter/optional_filter.hpp"
25
23
  #include "duckdb/planner/table_filter.hpp"
26
24
  #include "duckdb/storage/buffer_manager.hpp"
27
- #include "duckdb/storage/storage_manager.hpp"
28
25
  #include "duckdb/storage/temporary_memory_manager.hpp"
29
26
 
30
27
  namespace duckdb {
@@ -249,7 +246,7 @@ public:
249
246
  };
250
247
 
251
248
  unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
252
- auto result = make_uniq<JoinHashTable>(context, conditions, payload_columns.col_types, join_type,
249
+ auto result = make_uniq<JoinHashTable>(context, *this, conditions, payload_columns.col_types, join_type,
253
250
  rhs_output_columns.col_idxs);
254
251
  if (!delim_types.empty() && join_type == JoinType::MARK) {
255
252
  // correlated MARK join
@@ -779,7 +776,9 @@ unique_ptr<DataChunk> JoinFilterPushdownInfo::Finalize(ClientContext &context, o
779
776
  continue;
780
777
  }
781
778
  // if the HT is small we can generate a complete "OR" filter
782
- if (ht && ht->Count() > 1 && ht->Count() <= dynamic_or_filter_threshold) {
779
+ // but only if the join condition is equality.
780
+ if (ht && ht->Count() > 1 && ht->Count() <= dynamic_or_filter_threshold &&
781
+ cmp == ExpressionType::COMPARE_EQUAL) {
783
782
  PushInFilter(info, *ht, op, filter_idx, filter_col_idx);
784
783
  }
785
784
 
@@ -852,6 +851,8 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
852
851
  sink.external = false;
853
852
  }
854
853
  }
854
+ DUCKDB_LOG(context, PhysicalOperatorLogType, *this, "PhysicalHashJoin", "Finalize",
855
+ {{"external", to_string(sink.external)}});
855
856
  if (sink.external) {
856
857
  // External Hash Join
857
858
  sink.perfect_join_executor.reset();
@@ -862,8 +863,12 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
862
863
  if (!very_very_skewed &&
863
864
  (max_partition_ht_size + sink.probe_side_requirement) > sink.temporary_memory_state->GetReservation()) {
864
865
  // We have to repartition
866
+ const auto radix_bits_before = ht.GetRadixBits();
865
867
  ht.SetRepartitionRadixBits(sink.temporary_memory_state->GetReservation(), sink.max_partition_size,
866
868
  sink.max_partition_count);
869
+ DUCKDB_LOG(context, PhysicalOperatorLogType, *this, "PhysicalHashJoin", "Repartition",
870
+ {{"partitions_before", to_string(RadixPartitioning::NumberOfPartitions(radix_bits_before))},
871
+ {"partitions_after", to_string(RadixPartitioning::NumberOfPartitions(ht.GetRadixBits()))}});
867
872
  auto new_event = make_shared_ptr<HashJoinRepartitionEvent>(pipeline, *this, sink, sink.local_hash_tables);
868
873
  event.InsertEvent(std::move(new_event));
869
874
  } else {
@@ -112,12 +112,16 @@ public:
112
112
  }
113
113
  // initialize writing to the file
114
114
  global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path);
115
+ if (op.function.initialize_operator) {
116
+ op.function.initialize_operator(*global_state, op);
117
+ }
115
118
  if (op.return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS) {
116
119
  written_file_info = make_uniq<CopyToFileInfo>(op.file_path);
117
120
  written_file_info->file_stats = make_uniq<CopyFunctionFileStatistics>();
118
121
  op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state,
119
122
  *written_file_info->file_stats);
120
123
  }
124
+ initialized = true;
121
125
  }
122
126
 
123
127
  void AddBatchData(idx_t batch_index, unique_ptr<PreparedBatchData> new_batch, idx_t memory_usage) {
@@ -52,6 +52,7 @@ public:
52
52
  file_write_lock_if_rotating(make_uniq<StorageLock>()) {
53
53
  max_open_files = ClientConfig::GetConfig(context).partitioned_write_max_open_files;
54
54
  }
55
+
55
56
  StorageLock lock;
56
57
  atomic<bool> initialized;
57
58
  atomic<idx_t> rows_copied;
@@ -78,6 +79,9 @@ public:
78
79
  }
79
80
  // initialize writing to the file
80
81
  global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path);
82
+ if (op.function.initialize_operator) {
83
+ op.function.initialize_operator(*global_state, op);
84
+ }
81
85
  auto written_file_info = AddFile(*write_lock, op.file_path, op.return_type);
82
86
  if (written_file_info) {
83
87
  op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state,
@@ -217,6 +221,9 @@ public:
217
221
  written_file_info->partition_keys = Value::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR,
218
222
  std::move(partition_keys), std::move(partition_values));
219
223
  }
224
+ if (op.function.initialize_operator) {
225
+ op.function.initialize_operator(*info->global_state, op);
226
+ }
220
227
  auto &result = *info;
221
228
  info->active_writes = 1;
222
229
  // store in active write map
@@ -353,6 +360,9 @@ unique_ptr<GlobalFunctionData> PhysicalCopyToFile::CreateFileState(ClientContext
353
360
  if (written_file_info) {
354
361
  function.copy_to_get_written_statistics(context, *bind_data, *result, *written_file_info->file_stats);
355
362
  }
363
+ if (function.initialize_operator) {
364
+ function.initialize_operator(*result, *this);
365
+ }
356
366
  return result;
357
367
  }
358
368
 
@@ -408,12 +418,9 @@ void CheckDirectory(FileSystem &fs, const string &file_path, CopyOverwriteMode o
408
418
  unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext &context) const {
409
419
  if (partition_output || per_thread_output || rotate) {
410
420
  auto &fs = FileSystem::GetFileSystem(context);
411
- if (fs.FileExists(file_path)) {
412
- // the target file exists AND is a file (not a directory)
413
- if (fs.IsRemoteFile(file_path)) {
414
- // for remote files we cannot do anything - as we cannot delete the file
415
- throw IOException("Cannot write to \"%s\" - it exists and is a file, not a directory!", file_path);
416
- } else {
421
+ if (!fs.IsRemoteFile(file_path)) {
422
+ if (fs.FileExists(file_path)) {
423
+ // the target file exists AND is a file (not a directory)
417
424
  // for local files we can remove the file if OVERWRITE_OR_IGNORE is enabled
418
425
  if (overwrite_mode == CopyOverwriteMode::COPY_OVERWRITE) {
419
426
  fs.RemoveFile(file_path);
@@ -432,7 +439,7 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
432
439
  }
433
440
 
434
441
  auto state = make_uniq<CopyToFunctionGlobalState>(context);
435
- if (!per_thread_output && rotate) {
442
+ if (!per_thread_output && rotate && write_empty_file) {
436
443
  auto global_lock = state->lock.GetExclusiveLock();
437
444
  state->global_state = CreateFileState(context, *state, *global_lock);
438
445
  }
@@ -490,6 +497,9 @@ void PhysicalCopyToFile::WriteRotateInternal(ExecutionContext &context, GlobalSi
490
497
  while (true) {
491
498
  // Grab global lock and dereference the current file state (and corresponding lock)
492
499
  auto global_guard = g.lock.GetExclusiveLock();
500
+ if (!g.global_state) {
501
+ g.global_state = CreateFileState(context.client, *sink_state, *global_guard);
502
+ }
493
503
  auto &file_state = *g.global_state;
494
504
  auto &file_lock = *g.file_write_lock_if_rotating;
495
505
  if (rotate && function.rotate_next_file(file_state, *bind_data, file_size_bytes)) {
@@ -523,7 +533,7 @@ SinkResultType PhysicalCopyToFile::Sink(ExecutionContext &context, DataChunk &ch
523
533
  auto &g = input.global_state.Cast<CopyToFunctionGlobalState>();
524
534
  auto &l = input.local_state.Cast<CopyToFunctionLocalState>();
525
535
 
526
- if (!write_empty_file) {
536
+ if (!write_empty_file && !rotate) {
527
537
  // if we are only writing the file when there are rows to write we need to initialize here
528
538
  g.Initialize(context.client, *this);
529
539
  }
@@ -19,7 +19,7 @@ namespace duckdb {
19
19
 
20
20
  void ReorderTableEntries(catalog_entry_vector_t &tables);
21
21
 
22
- using std::stringstream;
22
+ using duckdb::stringstream;
23
23
 
24
24
  PhysicalExport::PhysicalExport(vector<LogicalType> types, CopyFunction function, unique_ptr<CopyInfo> info,
25
25
  idx_t estimated_cardinality, unique_ptr<BoundExportData> exported_tables)
@@ -72,6 +72,7 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
72
72
  if (!options.default_table.name.empty()) {
73
73
  attached_db->GetCatalog().SetDefaultTable(options.default_table.schema, options.default_table.name);
74
74
  }
75
+ attached_db->FinalizeLoad(context.client);
75
76
  return SourceResultType::FINISHED;
76
77
  }
77
78
 
@@ -28,17 +28,17 @@ unique_ptr<PhysicalPlan> PhysicalPlanGenerator::Plan(unique_ptr<LogicalOperator>
28
28
  PhysicalOperator &PhysicalPlanGenerator::ResolveAndPlan(unique_ptr<LogicalOperator> op) {
29
29
  auto &profiler = QueryProfiler::Get(context);
30
30
 
31
+ // Resolve the types of each operator.
32
+ profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES);
33
+ op->ResolveOperatorTypes();
34
+ profiler.EndPhase();
35
+
31
36
  // Resolve the column references.
32
37
  profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_COLUMN_BINDING);
33
38
  ColumnBindingResolver resolver;
34
39
  resolver.VisitOperator(*op);
35
40
  profiler.EndPhase();
36
41
 
37
- // Resolve the types of each operator.
38
- profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES);
39
- op->ResolveOperatorTypes();
40
- profiler.EndPhase();
41
-
42
42
  // Create the main physical plan.
43
43
  profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_CREATE_PLAN);
44
44
  physical_plan = PlanInternal(*op);
@@ -124,7 +124,8 @@ static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t en
124
124
  bool escaped = false;
125
125
 
126
126
  bool quoted = false;
127
- char quote_char;
127
+ // Satisfy GCC warning about uninitialized variable
128
+ char quote_char = '\0';
128
129
  stack<char> scopes;
129
130
  for (idx_t i = 0; i < length; i++) {
130
131
  auto current_char = buf[start + i];
@@ -36,6 +36,10 @@ bool TableFunctionData::Equals(const FunctionData &other) const {
36
36
  return false;
37
37
  }
38
38
 
39
+ bool FunctionData::SupportStatementCache() const {
40
+ return true;
41
+ }
42
+
39
43
  Function::Function(string name_p) : name(std::move(name_p)) {
40
44
  }
41
45
  Function::~Function() {
@@ -882,9 +882,15 @@ ScalarFunctionSet OperatorMultiplyFun::GetFunctions() {
882
882
  multiply.AddFunction(
883
883
  ScalarFunction({LogicalType::INTERVAL, LogicalType::DOUBLE}, LogicalType::INTERVAL,
884
884
  ScalarFunction::BinaryFunction<interval_t, double, interval_t, MultiplyOperator>));
885
+ multiply.AddFunction(
886
+ ScalarFunction({LogicalType::DOUBLE, LogicalType::INTERVAL}, LogicalType::INTERVAL,
887
+ ScalarFunction::BinaryFunction<double, interval_t, interval_t, MultiplyOperator>));
885
888
  multiply.AddFunction(
886
889
  ScalarFunction({LogicalType::BIGINT, LogicalType::INTERVAL}, LogicalType::INTERVAL,
887
890
  ScalarFunction::BinaryFunction<int64_t, interval_t, interval_t, MultiplyOperator>));
891
+ multiply.AddFunction(
892
+ ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
893
+ ScalarFunction::BinaryFunction<interval_t, int64_t, interval_t, MultiplyOperator>));
888
894
  for (auto &func : multiply.functions) {
889
895
  ScalarFunction::SetReturnsError(func);
890
896
  }
@@ -84,6 +84,7 @@ static void RemapMap(Vector &input, Vector &default_vector, Vector &result, idx_
84
84
  auto &result_key_vector = MapVector::GetKeys(result);
85
85
  auto &result_value_vector = MapVector::GetValues(result);
86
86
  auto list_size = ListVector::GetListSize(input);
87
+ ListVector::Reserve(result, list_size);
87
88
  ListVector::SetListSize(result, list_size);
88
89
 
89
90
  bool has_top_level_null = false;
@@ -136,6 +137,7 @@ static void RemapList(Vector &input, Vector &default_vector, Vector &result, idx
136
137
  auto &input_vector = ListVector::GetEntry(input);
137
138
  auto &result_vector = ListVector::GetEntry(result);
138
139
  auto list_size = ListVector::GetListSize(input);
140
+ ListVector::Reserve(result, list_size);
139
141
  ListVector::SetListSize(result, list_size);
140
142
 
141
143
  bool has_top_level_null = false;
@@ -401,6 +403,9 @@ struct RemapEntry {
401
403
  auto &child_types = StructType::GetChildTypes(default_type);
402
404
  for (idx_t child_idx = 0; child_idx < child_types.size(); child_idx++) {
403
405
  auto &child_default = child_types[child_idx];
406
+ if (!result_entry->second.child_remaps || !entry->second.child_map) {
407
+ throw BinderException("No child remaps found");
408
+ }
404
409
  HandleDefault(child_idx, child_default.first, child_default.second, *entry->second.child_map,
405
410
  *result_entry->second.child_remaps);
406
411
  }
@@ -542,6 +547,10 @@ static unique_ptr<FunctionData> RemapStructBind(ClientContext &context, ScalarFu
542
547
  if (arg->return_type.id() == LogicalTypeId::UNKNOWN) {
543
548
  throw ParameterNotResolvedException();
544
549
  }
550
+ if (arg->return_type.id() == LogicalTypeId::SQLNULL && arg_idx == 2) {
551
+ // remap target can be NULL
552
+ continue;
553
+ }
545
554
  if (!arg->return_type.IsNested()) {
546
555
  throw BinderException("Struct remap can only remap nested types, not '%s'", arg->return_type.ToString());
547
556
  } else if (arg->return_type.id() == LogicalTypeId::STRUCT && StructType::IsUnnamed(arg->return_type)) {
@@ -571,11 +580,11 @@ static unique_ptr<FunctionData> RemapStructBind(ClientContext &context, ScalarFu
571
580
  auto target_map = RemapIndex::GetMap(to_type);
572
581
 
573
582
  Value remap_val = ExpressionExecutor::EvaluateScalar(context, *arguments[2]);
574
- auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type);
575
583
 
576
584
  // (recursively) generate the remap entries
577
585
  case_insensitive_map_t<RemapEntry> remap_map;
578
586
  if (!remap_val.IsNull()) {
587
+ auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type);
579
588
  auto &remap_values = StructValue::GetChildren(remap_val);
580
589
  for (idx_t remap_idx = 0; remap_idx < remap_values.size(); remap_idx++) {
581
590
  auto &remap_val = remap_values[remap_idx];
@@ -221,6 +221,7 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyFunctio
221
221
  memset(bind_data->requires_quotes.get(), 0, sizeof(bool) * 256);
222
222
  bind_data->requires_quotes['\n'] = true;
223
223
  bind_data->requires_quotes['\r'] = true;
224
+ bind_data->requires_quotes['#'] = true;
224
225
  bind_data->requires_quotes[NumericCast<idx_t>(
225
226
  bind_data->options.dialect_options.state_machine_options.delimiter.GetValue()[0])] = true;
226
227
  bind_data->requires_quotes[NumericCast<idx_t>(
@@ -1,5 +1,5 @@
1
1
  #ifndef DUCKDB_PATCH_VERSION
2
- #define DUCKDB_PATCH_VERSION "0"
2
+ #define DUCKDB_PATCH_VERSION "1"
3
3
  #endif
4
4
  #ifndef DUCKDB_MINOR_VERSION
5
5
  #define DUCKDB_MINOR_VERSION 3
@@ -8,10 +8,10 @@
8
8
  #define DUCKDB_MAJOR_VERSION 1
9
9
  #endif
10
10
  #ifndef DUCKDB_VERSION
11
- #define DUCKDB_VERSION "v1.3.0"
11
+ #define DUCKDB_VERSION "v1.3.1"
12
12
  #endif
13
13
  #ifndef DUCKDB_SOURCE_ID
14
- #define DUCKDB_SOURCE_ID "71c5c07cdd"
14
+ #define DUCKDB_SOURCE_ID "2063dda3e6"
15
15
  #endif
16
16
  #include "duckdb/function/table/system_functions.hpp"
17
17
  #include "duckdb/main/database.hpp"
@@ -109,6 +109,7 @@ public:
109
109
  }
110
110
  virtual void Initialize(bool load_builtin) = 0;
111
111
  virtual void Initialize(optional_ptr<ClientContext> context, bool load_builtin);
112
+ virtual void FinalizeLoad(optional_ptr<ClientContext> context);
112
113
 
113
114
  bool IsSystemCatalog() const;
114
115
  bool IsTemporaryCatalog() const;
@@ -66,7 +66,7 @@ private:
66
66
  unique_ptr<CatalogEntry> ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info);
67
67
  unique_ptr<CatalogEntry> SetNotNull(ClientContext &context, SetNotNullInfo &info);
68
68
  unique_ptr<CatalogEntry> DropNotNull(ClientContext &context, DropNotNullInfo &info);
69
- unique_ptr<CatalogEntry> AddForeignKeyConstraint(optional_ptr<ClientContext> context, AlterForeignKeyInfo &info);
69
+ unique_ptr<CatalogEntry> AddForeignKeyConstraint(AlterForeignKeyInfo &info);
70
70
  unique_ptr<CatalogEntry> DropForeignKeyConstraint(ClientContext &context, AlterForeignKeyInfo &info);
71
71
  unique_ptr<CatalogEntry> SetColumnComment(ClientContext &context, SetColumnCommentInfo &info);
72
72
  unique_ptr<CatalogEntry> AddConstraint(ClientContext &context, AddConstraintInfo &info);
@@ -35,7 +35,7 @@ private:
35
35
  static string WriteOptionallyQuoted(const string &input);
36
36
  };
37
37
 
38
- enum class CatalogSetPathType { SET_SCHEMA, SET_SCHEMAS };
38
+ enum class CatalogSetPathType { SET_SCHEMA, SET_SCHEMAS, SET_DIRECTLY };
39
39
 
40
40
  //! The schema search path, in order by which entries are searched if no schema entry is provided
41
41
  class CatalogSearchPath {