duckdb 0.7.2-dev3546.0 → 0.7.2-dev3666.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/package.json +1 -1
  2. package/src/database.cpp +1 -0
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
  5. package/src/duckdb/extension/json/include/json_common.hpp +5 -4
  6. package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
  7. package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
  8. package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
  9. package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
  10. package/src/duckdb/extension/json/json_common.cpp +1 -1
  11. package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
  12. package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
  13. package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
  14. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
  15. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
  16. package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
  17. package/src/duckdb/extension/json/json_functions/json_transform.cpp +10 -8
  18. package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
  19. package/src/duckdb/extension/json/json_functions/read_json.cpp +166 -169
  20. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
  21. package/src/duckdb/extension/json/json_functions.cpp +11 -4
  22. package/src/duckdb/extension/json/json_scan.cpp +593 -374
  23. package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
  24. package/src/duckdb/src/common/file_system.cpp +26 -6
  25. package/src/duckdb/src/common/local_file_system.cpp +0 -13
  26. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
  27. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
  28. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -3
  29. package/src/duckdb/src/function/table/copy_csv.cpp +66 -12
  30. package/src/duckdb/src/function/table/read_csv.cpp +13 -3
  31. package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
  32. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
  33. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
  34. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
  35. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
  36. package/src/duckdb/src/include/duckdb/common/file_system.hpp +5 -0
  37. package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
  38. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
  39. package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
  40. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
  41. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
  42. package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
  43. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
  44. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -7
  49. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
  50. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
  51. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
  52. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
  53. package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
  54. package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -3
  57. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
  58. package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
  60. package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
  61. package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
  63. package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
  64. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  65. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
  66. package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
  67. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
  69. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
  70. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
  73. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
  74. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
  75. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
  76. package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
  77. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
@@ -724,6 +724,11 @@ unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string
724
724
  vector<unique_ptr<ParsedExpression>> children;
725
725
  children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
726
726
  table_function->function = make_uniq<FunctionExpression>("parquet_scan", std::move(children));
727
+
728
+ if (!FileSystem::HasGlob(table_name)) {
729
+ table_function->alias = FileSystem::ExtractBaseName(table_name);
730
+ }
731
+
727
732
  return std::move(table_function);
728
733
  }
729
734
 
@@ -305,6 +305,20 @@ void FileSystem::FileSync(FileHandle &handle) {
305
305
  throw NotImplementedException("%s: FileSync is not implemented!", GetName());
306
306
  }
307
307
 
308
+ bool FileSystem::HasGlob(const string &str) {
309
+ for (idx_t i = 0; i < str.size(); i++) {
310
+ switch (str[i]) {
311
+ case '*':
312
+ case '?':
313
+ case '[':
314
+ return true;
315
+ default:
316
+ break;
317
+ }
318
+ }
319
+ return false;
320
+ }
321
+
308
322
  vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
309
323
  throw NotImplementedException("%s: Glob is not implemented!", GetName());
310
324
  }
@@ -333,12 +347,8 @@ vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &conte
333
347
  auto result = Glob(pattern);
334
348
  if (result.empty()) {
335
349
  string required_extension;
336
- const string prefixes[] = {"http://", "https://", "s3://"};
337
- for (auto &prefix : prefixes) {
338
- if (StringUtil::StartsWith(pattern, prefix)) {
339
- required_extension = "httpfs";
340
- break;
341
- }
350
+ if (FileSystem::IsRemoteFile(pattern)) {
351
+ required_extension = "httpfs";
342
352
  }
343
353
  if (!required_extension.empty() && !context.db->ExtensionIsLoaded(required_extension)) {
344
354
  // an extension is required to read this file but it is not loaded - try to load it
@@ -455,4 +465,14 @@ FileType FileHandle::GetType() {
455
465
  return file_system.GetFileType(*this);
456
466
  }
457
467
 
468
+ bool FileSystem::IsRemoteFile(const string &path) {
469
+ const string prefixes[] = {"http://", "https://", "s3://"};
470
+ for (auto &prefix : prefixes) {
471
+ if (StringUtil::StartsWith(path, prefix)) {
472
+ return true;
473
+ }
474
+ }
475
+ return false;
476
+ }
477
+
458
478
  } // namespace duckdb
@@ -819,19 +819,6 @@ idx_t LocalFileSystem::SeekPosition(FileHandle &handle) {
819
819
  return GetFilePointer(handle);
820
820
  }
821
821
 
822
- static bool HasGlob(const string &str) {
823
- for (idx_t i = 0; i < str.size(); i++) {
824
- switch (str[i]) {
825
- case '*':
826
- case '?':
827
- case '[':
828
- return true;
829
- default:
830
- break;
831
- }
832
- }
833
- return false;
834
- }
835
822
  static bool IsCrawl(const string &glob) {
836
823
  // glob must match exactly
837
824
  return glob == "**";
@@ -1172,6 +1172,16 @@ void BufferedCSVReader::SkipEmptyLines() {
1172
1172
  }
1173
1173
  }
1174
1174
 
1175
+ void UpdateMaxLineLength(ClientContext &context, idx_t line_length) {
1176
+ if (!context.client_data->debug_set_max_line_length) {
1177
+ return;
1178
+ }
1179
+ if (line_length < context.client_data->debug_max_line_length) {
1180
+ return;
1181
+ }
1182
+ context.client_data->debug_max_line_length = line_length;
1183
+ }
1184
+
1175
1185
  bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message) {
1176
1186
  // used for parsing algorithm
1177
1187
  bool finished_chunk = false;
@@ -1239,9 +1249,7 @@ add_row : {
1239
1249
  return false;
1240
1250
  }
1241
1251
  finished_chunk = AddRow(insert_chunk, column, error_message);
1242
- if (context.client_data->max_line_length < position - line_start) {
1243
- context.client_data->max_line_length = position - line_start;
1244
- }
1252
+ UpdateMaxLineLength(context, position - line_start);
1245
1253
  if (!error_message.empty()) {
1246
1254
  return false;
1247
1255
  }
@@ -1379,9 +1387,7 @@ final_state:
1379
1387
  AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
1380
1388
  finished_chunk = AddRow(insert_chunk, column, error_message);
1381
1389
  SkipEmptyLines();
1382
- if (context.client_data->max_line_length < position - line_start) {
1383
- context.client_data->max_line_length = position - line_start;
1384
- }
1390
+ UpdateMaxLineLength(context, position - line_start);
1385
1391
  if (!error_message.empty()) {
1386
1392
  return false;
1387
1393
  }
@@ -183,6 +183,12 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
183
183
  }
184
184
 
185
185
  void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
186
+ if (loption == "new_line") {
187
+ // Steal this from SetBaseOption so we can write different newlines (e.g., format JSON ARRAY)
188
+ write_newline = ParseString(value, loption);
189
+ return;
190
+ }
191
+
186
192
  if (SetBaseOption(loption, value)) {
187
193
  return;
188
194
  }
@@ -199,6 +205,10 @@ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value
199
205
  }
200
206
  SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
201
207
  SetDateFormat(LogicalTypeId::TIMESTAMP_TZ, format, false);
208
+ } else if (loption == "prefix") {
209
+ prefix = ParseString(value, loption);
210
+ } else if (loption == "suffix") {
211
+ suffix = ParseString(value, loption);
202
212
  } else {
203
213
  throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
204
214
  }
@@ -50,15 +50,17 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters &parame
50
50
  string PragmaShowTablesExpanded(ClientContext &context, const FunctionParameters &parameters) {
51
51
  return R"(
52
52
  SELECT
53
+ t.database_name AS database,
54
+ t.schema_name AS schema,
53
55
  t.table_name,
54
56
  LIST(c.column_name order by c.column_index) AS column_names,
55
57
  LIST(c.data_type order by c.column_index) AS column_types,
56
- FIRST(t.temporary) AS temporary
58
+ FIRST(t.temporary) AS temporary,
57
59
  FROM duckdb_tables t
58
60
  JOIN duckdb_columns c
59
61
  USING (table_oid)
60
- GROUP BY t.table_name
61
- ORDER BY t.table_name;
62
+ GROUP BY t.database_name, t.schema_name, t.table_name
63
+ ORDER BY t.database_name, t.schema_name, t.table_name;
62
64
  )";
63
65
  }
64
66
 
@@ -1,15 +1,16 @@
1
1
  #include "duckdb/common/bind_helpers.hpp"
2
2
  #include "duckdb/common/file_system.hpp"
3
+ #include "duckdb/common/multi_file_reader.hpp"
3
4
  #include "duckdb/common/serializer/buffered_serializer.hpp"
4
5
  #include "duckdb/common/string_util.hpp"
6
+ #include "duckdb/common/types/column/column_data_collection.hpp"
5
7
  #include "duckdb/common/types/string_type.hpp"
6
8
  #include "duckdb/common/vector_operations/vector_operations.hpp"
7
9
  #include "duckdb/function/copy_function.hpp"
8
10
  #include "duckdb/function/scalar/string_functions.hpp"
9
11
  #include "duckdb/function/table/read_csv.hpp"
10
12
  #include "duckdb/parser/parsed_data/copy_info.hpp"
11
- #include "duckdb/common/multi_file_reader.hpp"
12
- #include "duckdb/common/types/column/column_data_collection.hpp"
13
+
13
14
  #include <limits>
14
15
 
15
16
  namespace duckdb {
@@ -57,6 +58,15 @@ void BaseCSVData::Finalize() {
57
58
  SubstringDetection(options.escape, options.null_str, "ESCAPE", "NULL");
58
59
  }
59
60
  }
61
+
62
+ if (!options.prefix.empty() || !options.suffix.empty()) {
63
+ if (options.prefix.empty() || options.suffix.empty()) {
64
+ throw BinderException("COPY ... (FORMAT CSV) must have both PREFIX and SUFFIX, or none at all");
65
+ }
66
+ if (options.header) {
67
+ throw BinderException("COPY ... (FORMAT CSV)'s HEADER cannot be combined with PREFIX/SUFFIX");
68
+ }
69
+ }
60
70
  }
61
71
 
62
72
  static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &info, vector<string> &names,
@@ -85,6 +95,9 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
85
95
  bind_data->requires_quotes[bind_data->options.delimiter[0]] = true;
86
96
  bind_data->requires_quotes[bind_data->options.quote[0]] = true;
87
97
  }
98
+ if (!bind_data->options.write_newline.empty()) {
99
+ bind_data->newline = bind_data->options.write_newline;
100
+ }
88
101
  return std::move(bind_data);
89
102
  }
90
103
 
@@ -251,24 +264,41 @@ struct LocalWriteCSVData : public LocalFunctionData {
251
264
  BufferedSerializer serializer;
252
265
  //! A chunk with VARCHAR columns to cast intermediates into
253
266
  DataChunk cast_chunk;
267
+ //! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
268
+ bool written_anything = false;
254
269
  };
255
270
 
256
271
  struct GlobalWriteCSVData : public GlobalFunctionData {
257
- GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression) : fs(fs) {
272
+ GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression)
273
+ : fs(fs), written_anything(false) {
258
274
  handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW,
259
275
  FileLockType::WRITE_LOCK, compression);
260
276
  }
261
277
 
278
+ //! Write generic data, e.g., CSV header
262
279
  void WriteData(const_data_ptr_t data, idx_t size) {
263
280
  lock_guard<mutex> flock(lock);
264
281
  handle->Write((void *)data, size);
265
282
  }
266
283
 
284
+ //! Write rows
285
+ void WriteRows(const_data_ptr_t data, idx_t size, const string &newline) {
286
+ lock_guard<mutex> flock(lock);
287
+ if (written_anything) {
288
+ handle->Write((void *)newline.c_str(), newline.length());
289
+ } else {
290
+ written_anything = true;
291
+ }
292
+ handle->Write((void *)data, size);
293
+ }
294
+
267
295
  FileSystem &fs;
268
296
  //! The mutex for writing to the physical file
269
297
  mutex lock;
270
298
  //! The file handle to write to
271
299
  unique_ptr<FileHandle> handle;
300
+ //! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
301
+ bool written_anything;
272
302
  };
273
303
 
274
304
  static unique_ptr<LocalFunctionData> WriteCSVInitializeLocal(ExecutionContext &context, FunctionData &bind_data) {
@@ -290,6 +320,10 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
290
320
  auto global_data =
291
321
  make_uniq<GlobalWriteCSVData>(FileSystem::GetFileSystem(context), file_path, options.compression);
292
322
 
323
+ if (!options.prefix.empty()) {
324
+ global_data->WriteData((const_data_ptr_t)options.prefix.c_str(), options.prefix.size());
325
+ }
326
+
293
327
  if (options.header) {
294
328
  BufferedSerializer serializer;
295
329
  // write the header line to the file
@@ -304,11 +338,12 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
304
338
 
305
339
  global_data->WriteData(serializer.blob.data.get(), serializer.blob.size);
306
340
  }
341
+
307
342
  return std::move(global_data);
308
343
  }
309
344
 
310
345
  static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_data, DataChunk &cast_chunk,
311
- BufferedSerializer &writer, DataChunk &input) {
346
+ BufferedSerializer &writer, DataChunk &input, bool &written_anything) {
312
347
  auto &csv_data = bind_data.Cast<WriteCSVData>();
313
348
  auto &options = csv_data.options;
314
349
 
@@ -338,6 +373,11 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
338
373
  cast_chunk.Flatten();
339
374
  // now loop over the vectors and output the values
340
375
  for (idx_t row_idx = 0; row_idx < cast_chunk.size(); row_idx++) {
376
+ if (row_idx == 0 && !written_anything) {
377
+ written_anything = true;
378
+ } else {
379
+ writer.WriteBufferData(csv_data.newline);
380
+ }
341
381
  // write values
342
382
  for (idx_t col_idx = 0; col_idx < cast_chunk.ColumnCount(); col_idx++) {
343
383
  if (col_idx != 0) {
@@ -357,7 +397,6 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
357
397
  WriteQuotedString(writer, csv_data, str_data[row_idx].GetData(), str_data[row_idx].GetSize(),
358
398
  csv_data.options.force_quote[col_idx]);
359
399
  }
360
- writer.WriteBufferData(csv_data.newline);
361
400
  }
362
401
  }
363
402
 
@@ -368,13 +407,15 @@ static void WriteCSVSink(ExecutionContext &context, FunctionData &bind_data, Glo
368
407
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
369
408
 
370
409
  // write data into the local buffer
371
- WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input);
410
+ WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input,
411
+ local_data.written_anything);
372
412
 
373
413
  // check if we should flush what we have currently written
374
414
  auto &writer = local_data.serializer;
375
415
  if (writer.blob.size >= csv_data.flush_size) {
376
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
416
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
377
417
  writer.Reset();
418
+ local_data.written_anything = false;
378
419
  }
379
420
  }
380
421
 
@@ -385,10 +426,11 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
385
426
  LocalFunctionData &lstate) {
386
427
  auto &local_data = lstate.Cast<LocalWriteCSVData>();
387
428
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
429
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
388
430
  auto &writer = local_data.serializer;
389
431
  // flush the local writer
390
- if (writer.blob.size > 0) {
391
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
432
+ if (local_data.written_anything) {
433
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
392
434
  writer.Reset();
393
435
  }
394
436
  }
@@ -398,6 +440,16 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
398
440
  //===--------------------------------------------------------------------===//
399
441
  void WriteCSVFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) {
400
442
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
443
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
444
+ auto &options = csv_data.options;
445
+
446
+ BufferedSerializer serializer;
447
+ if (!options.suffix.empty()) {
448
+ serializer.WriteBufferData(options.suffix);
449
+ } else if (global_state.written_anything) {
450
+ serializer.WriteBufferData(csv_data.newline);
451
+ }
452
+ global_state.WriteData(serializer.blob.data.get(), serializer.blob.size);
401
453
 
402
454
  global_state.handle->Close();
403
455
  global_state.handle.reset();
@@ -434,10 +486,11 @@ unique_ptr<PreparedBatchData> WriteCSVPrepareBatch(ClientContext &context, Funct
434
486
  DataChunk cast_chunk;
435
487
  cast_chunk.Initialize(Allocator::Get(context), types);
436
488
 
437
- auto batch = make_uniq<WriteCSVBatchData>();
438
489
  // write CSV chunks to the batch data
490
+ bool written_anything = false;
491
+ auto batch = make_uniq<WriteCSVBatchData>();
439
492
  for (auto &chunk : collection->Chunks()) {
440
- WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk);
493
+ WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk, written_anything);
441
494
  }
442
495
  return std::move(batch);
443
496
  }
@@ -449,8 +502,9 @@ void WriteCSVFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalF
449
502
  PreparedBatchData &batch) {
450
503
  auto &csv_batch = batch.Cast<WriteCSVBatchData>();
451
504
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
505
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
452
506
  auto &writer = csv_batch.serializer;
453
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
507
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
454
508
  writer.Reset();
455
509
  }
456
510
 
@@ -33,8 +33,12 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
33
33
  bool complex_options = options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1;
34
34
  bool not_supported_options = options.null_padding;
35
35
 
36
- if (!options.run_parallel || null_or_empty || not_supported_options || complex_options ||
37
- options.new_line == NewLineIdentifier::MIX) {
36
+ auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
37
+ if (options.parallel_mode != ParallelMode::PARALLEL && int64_t(files.size() * 2) >= number_of_threads) {
38
+ single_threaded = true;
39
+ }
40
+ if (options.parallel_mode == ParallelMode::SINGLE_THREADED || null_or_empty || not_supported_options ||
41
+ complex_options || options.new_line == NewLineIdentifier::MIX) {
38
42
  // not supported for parallel CSV reading
39
43
  single_threaded = true;
40
44
  }
@@ -173,7 +177,8 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
173
177
  } else if (loption == "normalize_names") {
174
178
  options.normalize_names = BooleanValue::Get(kv.second);
175
179
  } else if (loption == "parallel") {
176
- options.run_parallel = BooleanValue::Get(kv.second);
180
+ options.parallel_mode =
181
+ BooleanValue::Get(kv.second) ? ParallelMode::PARALLEL : ParallelMode::SINGLE_THREADED;
177
182
  } else {
178
183
  options.SetReadOption(loption, kv.second, names);
179
184
  }
@@ -1190,6 +1195,11 @@ unique_ptr<TableRef> ReadCSVReplacement(ClientContext &context, const string &ta
1190
1195
  vector<unique_ptr<ParsedExpression>> children;
1191
1196
  children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
1192
1197
  table_function->function = make_uniq<FunctionExpression>("read_csv_auto", std::move(children));
1198
+
1199
+ if (!FileSystem::HasGlob(table_name)) {
1200
+ table_function->alias = FileSystem::ExtractBaseName(table_name);
1201
+ }
1202
+
1193
1203
  return std::move(table_function);
1194
1204
  }
1195
1205
 
@@ -1,14 +1,16 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev3546"
2
+ #define DUCKDB_VERSION "0.7.2-dev3666"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "bd8adef6d1"
5
+ #define DUCKDB_SOURCE_ID "eae707d54c"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
9
9
 
10
10
  #include <cstdint>
11
11
 
12
+ #define DUCKDB_STRINGIFY(x) #x
13
+
12
14
  namespace duckdb {
13
15
 
14
16
  struct PragmaVersionData : public GlobalTableFunctionState {
@@ -89,6 +91,10 @@ string DuckDB::Platform() {
89
91
  #endif
90
92
  #ifdef __MINGW32__
91
93
  postfix = "_mingw";
94
+ #endif
95
+ // this is used for the windows R builds which use a separate build environment
96
+ #ifdef DUCKDB_OVERRIDE_PLATFORM_POSTFIX
97
+ postfix = DUCKDB_STRINGIFY(DUCKDB_OVERRIDE_PLATFORM_POSTFIX);
92
98
  #endif
93
99
  return os + "_" + arch + postfix;
94
100
  }
@@ -23,7 +23,7 @@ class ColumnDependencyManager {
23
23
  public:
24
24
  DUCKDB_API ColumnDependencyManager();
25
25
  DUCKDB_API ~ColumnDependencyManager();
26
- DUCKDB_API ColumnDependencyManager(ColumnDependencyManager &&other) = default;
26
+ ColumnDependencyManager(ColumnDependencyManager &&other) = default;
27
27
  ColumnDependencyManager(const ColumnDependencyManager &other) = delete;
28
28
 
29
29
  public:
@@ -46,7 +46,7 @@ public:
46
46
  DUCKDB_API void Reset();
47
47
 
48
48
  DUCKDB_API const vector<CatalogSearchEntry> &Get();
49
- DUCKDB_API const vector<CatalogSearchEntry> &GetSetPaths() {
49
+ const vector<CatalogSearchEntry> &GetSetPaths() {
50
50
  return set_paths;
51
51
  }
52
52
  DUCKDB_API const CatalogSearchEntry &GetDefault();
@@ -23,7 +23,7 @@ struct SimilarCatalogEntry {
23
23
  //! The schema of the entry.
24
24
  optional_ptr<SchemaCatalogEntry> schema;
25
25
 
26
- DUCKDB_API bool Found() const {
26
+ bool Found() const {
27
27
  return !name.empty();
28
28
  }
29
29
 
@@ -121,7 +121,7 @@ public:
121
121
  DUCKDB_API static bool UncaughtException();
122
122
 
123
123
  DUCKDB_API static string GetStackTrace(int max_depth = 120);
124
- DUCKDB_API static string FormatStackTrace(string message = "") {
124
+ static string FormatStackTrace(string message = "") {
125
125
  return (message + "\n" + GetStackTrace());
126
126
  }
127
127
 
@@ -269,7 +269,7 @@ public:
269
269
  class IOException : public Exception {
270
270
  public:
271
271
  DUCKDB_API explicit IOException(const string &msg);
272
- DUCKDB_API explicit IOException(ExceptionType exception_type, const string &msg) : Exception(exception_type, msg) {
272
+ explicit IOException(ExceptionType exception_type, const string &msg) : Exception(exception_type, msg) {
273
273
  }
274
274
 
275
275
  template <typename... Args>
@@ -371,7 +371,7 @@ public:
371
371
 
372
372
  class FatalException : public Exception {
373
373
  public:
374
- DUCKDB_API explicit FatalException(const string &msg) : FatalException(ExceptionType::FATAL, msg) {
374
+ explicit FatalException(const string &msg) : FatalException(ExceptionType::FATAL, msg) {
375
375
  }
376
376
  template <typename... Args>
377
377
  explicit FatalException(const string &msg, Args... params) : FatalException(ConstructMessage(msg, params...)) {
@@ -187,6 +187,8 @@ public:
187
187
  //! Extract the name of a file (e.g if the input is lib/example.dll the name is 'example.dll')
188
188
  DUCKDB_API static string ExtractName(const string &path);
189
189
 
190
+ //! Whether there is a glob in the string
191
+ DUCKDB_API static bool HasGlob(const string &str);
190
192
  //! Runs a glob on the file system, returning a list of matching files
191
193
  DUCKDB_API virtual vector<string> Glob(const string &path, FileOpener *opener = nullptr);
192
194
  DUCKDB_API vector<string> GlobFiles(const string &path, ClientContext &context,
@@ -224,6 +226,9 @@ public:
224
226
 
225
227
  //! Return the name of the filesytem. Used for forming diagnosis messages.
226
228
  DUCKDB_API virtual std::string GetName() const = 0;
229
+
230
+ //! Whether or not a file is remote or local, based only on file path
231
+ DUCKDB_API static bool IsRemoteFile(const string &path);
227
232
  };
228
233
 
229
234
  } // namespace duckdb
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/main/client_context.hpp"
13
13
  #include "duckdb/main/client_data.hpp"
14
14
  #include "duckdb/common/atomic.hpp"
15
+ #include "duckdb/common/optional_ptr.hpp"
15
16
 
16
17
  namespace duckdb {
17
18
 
@@ -48,7 +49,7 @@ public:
48
49
  }
49
50
 
50
51
  //! helper function to get the HTTP
51
- static HTTPState *TryGetState(FileOpener *opener) {
52
+ static optional_ptr<HTTPState> TryGetState(FileOpener *opener) {
52
53
  auto client_context = FileOpener::TryGetClientContext(opener);
53
54
  if (client_context) {
54
55
  return client_context->client_data->http_state.get();
@@ -12,14 +12,14 @@ public:
12
12
  int64_t upper;
13
13
 
14
14
  public:
15
- DUCKDB_API hugeint_t() = default;
15
+ hugeint_t() = default;
16
16
  DUCKDB_API hugeint_t(int64_t value); // NOLINT: Allow implicit conversion from `int64_t`
17
- DUCKDB_API constexpr hugeint_t(int64_t upper, uint64_t lower) : lower(lower), upper(upper) {
17
+ constexpr hugeint_t(int64_t upper, uint64_t lower) : lower(lower), upper(upper) {
18
18
  }
19
- DUCKDB_API constexpr hugeint_t(const hugeint_t &rhs) = default;
20
- DUCKDB_API constexpr hugeint_t(hugeint_t &&rhs) = default;
21
- DUCKDB_API hugeint_t &operator=(const hugeint_t &rhs) = default;
22
- DUCKDB_API hugeint_t &operator=(hugeint_t &&rhs) = default;
19
+ constexpr hugeint_t(const hugeint_t &rhs) = default;
20
+ constexpr hugeint_t(hugeint_t &&rhs) = default;
21
+ hugeint_t &operator=(const hugeint_t &rhs) = default;
22
+ hugeint_t &operator=(hugeint_t &&rhs) = default;
23
23
 
24
24
  DUCKDB_API string ToString() const;
25
25