duckdb 0.7.2-dev3546.0 → 0.7.2-dev3710.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/package.json +1 -1
  2. package/src/database.cpp +1 -0
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
  5. package/src/duckdb/extension/json/include/json_common.hpp +5 -4
  6. package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
  7. package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
  8. package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
  9. package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
  10. package/src/duckdb/extension/json/json_common.cpp +1 -1
  11. package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
  12. package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
  13. package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
  14. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
  15. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
  16. package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
  17. package/src/duckdb/extension/json/json_functions/json_transform.cpp +11 -9
  18. package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
  19. package/src/duckdb/extension/json/json_functions/read_json.cpp +166 -169
  20. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
  21. package/src/duckdb/extension/json/json_functions.cpp +11 -4
  22. package/src/duckdb/extension/json/json_scan.cpp +593 -374
  23. package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
  24. package/src/duckdb/src/common/exception.cpp +17 -0
  25. package/src/duckdb/src/common/exception_format_value.cpp +14 -0
  26. package/src/duckdb/src/common/file_system.cpp +78 -36
  27. package/src/duckdb/src/common/local_file_system.cpp +5 -16
  28. package/src/duckdb/src/common/types.cpp +1 -1
  29. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +1 -1
  30. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
  31. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
  32. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +2 -2
  33. package/src/duckdb/src/function/pragma/pragma_queries.cpp +6 -4
  34. package/src/duckdb/src/function/table/copy_csv.cpp +66 -12
  35. package/src/duckdb/src/function/table/read_csv.cpp +16 -3
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
  37. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
  39. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
  40. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
  41. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +26 -0
  42. package/src/duckdb/src/include/duckdb/common/file_system.hpp +11 -0
  43. package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
  45. package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
  46. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
  47. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
  48. package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
  49. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
  50. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
  53. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/common/string_util.hpp +25 -7
  55. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
  56. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
  57. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
  58. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -3
  63. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
  64. package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
  66. package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
  67. package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
  68. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
  69. package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
  70. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  71. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
  72. package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
  73. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
  75. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -2
  77. package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +5 -0
  78. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
  79. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
  80. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
  81. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
  82. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
  83. package/src/duckdb/src/main/db_instance_cache.cpp +5 -3
  84. package/src/duckdb/src/main/extension/extension_install.cpp +22 -18
  85. package/src/duckdb/src/parser/expression/collate_expression.cpp +1 -1
  86. package/src/duckdb/src/parser/keyword_helper.cpp +11 -1
  87. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  88. package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -2
  89. package/src/duckdb/src/parser/tableref.cpp +1 -1
  90. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
  91. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
  92. package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
  93. package/src/duckdb/src/storage/storage_manager.cpp +3 -0
  94. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
@@ -724,6 +724,11 @@ unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string
724
724
  vector<unique_ptr<ParsedExpression>> children;
725
725
  children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
726
726
  table_function->function = make_uniq<FunctionExpression>("parquet_scan", std::move(children));
727
+
728
+ if (!FileSystem::HasGlob(table_name)) {
729
+ table_function->alias = FileSystem::ExtractBaseName(table_name);
730
+ }
731
+
727
732
  return std::move(table_function);
728
733
  }
729
734
 
@@ -59,6 +59,23 @@ string Exception::GetStackTrace(int max_depth) {
59
59
  }
60
60
 
61
61
  string Exception::ConstructMessageRecursive(const string &msg, std::vector<ExceptionFormatValue> &values) {
62
+ #ifdef DEBUG
63
+ // Verify that we have the required amount of values for the message
64
+ idx_t parameter_count = 0;
65
+ for (idx_t i = 0; i < msg.size(); i++) {
66
+ if (msg[i] != '%') {
67
+ continue;
68
+ }
69
+ if (i < msg.size() && msg[i + 1] == '%') {
70
+ i++;
71
+ continue;
72
+ }
73
+ parameter_count++;
74
+ }
75
+ if (parameter_count != values.size()) {
76
+ throw InternalException("Expected %d parameters, received %d", parameter_count, values.size());
77
+ }
78
+ #endif
62
79
  return ExceptionFormatValue::Format(msg, values);
63
80
  }
64
81
 
@@ -3,6 +3,7 @@
3
3
  #include "fmt/format.h"
4
4
  #include "fmt/printf.h"
5
5
  #include "duckdb/common/types/hugeint.hpp"
6
+ #include "duckdb/parser/keyword_helper.hpp"
6
7
 
7
8
  namespace duckdb {
8
9
 
@@ -40,6 +41,19 @@ template <>
40
41
  ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(string value) {
41
42
  return ExceptionFormatValue(std::move(value));
42
43
  }
44
+
45
+ template <>
46
+ ExceptionFormatValue
47
+ ExceptionFormatValue::CreateFormatValue(SQLString value) { // NOLINT: templating requires us to copy value here
48
+ return KeywordHelper::WriteQuoted(value.raw_string, '\'');
49
+ }
50
+
51
+ template <>
52
+ ExceptionFormatValue
53
+ ExceptionFormatValue::CreateFormatValue(SQLIdentifier value) { // NOLINT: templating requires us to copy value here
54
+ return KeywordHelper::WriteOptionallyQuoted(value.raw_string, '"');
55
+ }
56
+
43
57
  template <>
44
58
  ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(const char *value) {
45
59
  return ExceptionFormatValue(string(value));
@@ -11,6 +11,7 @@
11
11
  #include "duckdb/main/client_data.hpp"
12
12
  #include "duckdb/main/database.hpp"
13
13
  #include "duckdb/main/extension_helper.hpp"
14
+ #include "duckdb/common/windows_util.hpp"
14
15
 
15
16
  #include <cstdint>
16
17
  #include <cstdio>
@@ -53,6 +54,14 @@ bool PathMatched(const string &path, const string &sub_path) {
53
54
 
54
55
  #ifndef _WIN32
55
56
 
57
+ string FileSystem::GetEnvVariable(const string &name) {
58
+ const char *env = getenv(name.c_str());
59
+ if (!env) {
60
+ return string();
61
+ }
62
+ return env;
63
+ }
64
+
56
65
  bool FileSystem::IsPathAbsolute(const string &path) {
57
66
  auto path_separator = FileSystem::PathSeparator();
58
67
  return PathMatched(path, path_separator);
@@ -85,36 +94,59 @@ string FileSystem::GetWorkingDirectory() {
85
94
  }
86
95
  return string(buffer.get());
87
96
  }
97
+
98
+ string FileSystem::NormalizeAbsolutePath(const string &path) {
99
+ D_ASSERT(IsPathAbsolute(path));
100
+ return path;
101
+ }
102
+
88
103
  #else
89
104
 
90
- bool FileSystem::IsPathAbsolute(const string &path) {
91
- // 1) A single backslash
92
- auto sub_path = FileSystem::PathSeparator();
93
- if (PathMatched(path, sub_path)) {
94
- return true;
105
+ string FileSystem::GetEnvVariable(const string &env) {
106
+ // first convert the environment variable name to the correct encoding
107
+ auto env_w = WindowsUtil::UTF8ToUnicode(env.c_str());
108
+ // use _wgetenv to get the value
109
+ auto res_w = _wgetenv(env_w.c_str());
110
+ if (!res_w) {
111
+ // no environment variable of this name found
112
+ return string();
95
113
  }
96
- // 2) check if starts with a double-backslash (i.e., \\)
97
- sub_path += FileSystem::PathSeparator();
98
- if (PathMatched(path, sub_path)) {
114
+ return WindowsUtil::UnicodeToUTF8(res_w);
115
+ }
116
+
117
+ bool FileSystem::IsPathAbsolute(const string &path) {
118
+ // 1) A single backslash or forward-slash
119
+ if (PathMatched(path, "\\") || PathMatched(path, "/")) {
99
120
  return true;
100
121
  }
101
- // 3) A disk designator with a backslash (e.g., C:\)
122
+ // 2) A disk designator with a backslash (e.g., C:\ or C:/)
102
123
  auto path_aux = path;
103
124
  path_aux.erase(0, 1);
104
- sub_path = ":" + FileSystem::PathSeparator();
105
- if (PathMatched(path_aux, sub_path)) {
125
+ if (PathMatched(path_aux, ":\\") || PathMatched(path_aux, ":/")) {
106
126
  return true;
107
127
  }
108
128
  return false;
109
129
  }
110
130
 
131
+ string FileSystem::NormalizeAbsolutePath(const string &path) {
132
+ D_ASSERT(IsPathAbsolute(path));
133
+ auto result = StringUtil::Lower(FileSystem::ConvertSeparators(path));
134
+ if (PathMatched(result, "\\")) {
135
+ // Path starts with a single backslash or forward slash
136
+ // prepend drive letter
137
+ return GetWorkingDirectory().substr(0, 2) + result;
138
+ }
139
+ return result;
140
+ }
141
+
111
142
  string FileSystem::PathSeparator() {
112
143
  return "\\";
113
144
  }
114
145
 
115
146
  void FileSystem::SetWorkingDirectory(const string &path) {
116
- if (!SetCurrentDirectory(path.c_str())) {
117
- throw IOException("Could not change working directory!");
147
+ auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str());
148
+ if (!SetCurrentDirectoryW(unicode_path.c_str())) {
149
+ throw IOException("Could not change working directory to \"%s\"", path);
118
150
  }
119
151
  }
120
152
 
@@ -134,16 +166,16 @@ idx_t FileSystem::GetAvailableMemory() {
134
166
  }
135
167
 
136
168
  string FileSystem::GetWorkingDirectory() {
137
- idx_t count = GetCurrentDirectory(0, nullptr);
169
+ idx_t count = GetCurrentDirectoryW(0, nullptr);
138
170
  if (count == 0) {
139
171
  throw IOException("Could not get working directory!");
140
172
  }
141
- auto buffer = make_unsafe_array<char>(count);
142
- idx_t ret = GetCurrentDirectory(count, buffer.get());
173
+ auto buffer = make_unsafe_array<wchar_t>(count);
174
+ idx_t ret = GetCurrentDirectoryW(count, buffer.get());
143
175
  if (count != ret + 1) {
144
176
  throw IOException("Could not get working directory!");
145
177
  }
146
- return string(buffer.get(), ret);
178
+ return WindowsUtil::UnicodeToUTF8(buffer.get());
147
179
  }
148
180
 
149
181
  #endif
@@ -161,13 +193,7 @@ string FileSystem::ConvertSeparators(const string &path) {
161
193
  return path;
162
194
  }
163
195
  // on windows-based systems we accept both
164
- string result = path;
165
- for (idx_t i = 0; i < result.size(); i++) {
166
- if (result[i] == '/') {
167
- result[i] = separator;
168
- }
169
- }
170
- return result;
196
+ return StringUtil::Replace(path, "/", separator_str);
171
197
  }
172
198
 
173
199
  string FileSystem::ExtractName(const string &path) {
@@ -202,14 +228,10 @@ string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) {
202
228
  }
203
229
  // fallback to the default home directories for the specified system
204
230
  #ifdef DUCKDB_WINDOWS
205
- const char *homedir = getenv("USERPROFILE");
231
+ return FileSystem::GetEnvVariable("USERPROFILE");
206
232
  #else
207
- const char *homedir = getenv("HOME");
233
+ return FileSystem::GetEnvVariable("HOME");
208
234
  #endif
209
- if (homedir) {
210
- return homedir;
211
- }
212
- return string();
213
235
  }
214
236
 
215
237
  string FileSystem::GetHomeDirectory() {
@@ -305,6 +327,20 @@ void FileSystem::FileSync(FileHandle &handle) {
305
327
  throw NotImplementedException("%s: FileSync is not implemented!", GetName());
306
328
  }
307
329
 
330
+ bool FileSystem::HasGlob(const string &str) {
331
+ for (idx_t i = 0; i < str.size(); i++) {
332
+ switch (str[i]) {
333
+ case '*':
334
+ case '?':
335
+ case '[':
336
+ return true;
337
+ default:
338
+ break;
339
+ }
340
+ }
341
+ return false;
342
+ }
343
+
308
344
  vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
309
345
  throw NotImplementedException("%s: Glob is not implemented!", GetName());
310
346
  }
@@ -333,12 +369,8 @@ vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &conte
333
369
  auto result = Glob(pattern);
334
370
  if (result.empty()) {
335
371
  string required_extension;
336
- const string prefixes[] = {"http://", "https://", "s3://"};
337
- for (auto &prefix : prefixes) {
338
- if (StringUtil::StartsWith(pattern, prefix)) {
339
- required_extension = "httpfs";
340
- break;
341
- }
372
+ if (FileSystem::IsRemoteFile(pattern)) {
373
+ required_extension = "httpfs";
342
374
  }
343
375
  if (!required_extension.empty() && !context.db->ExtensionIsLoaded(required_extension)) {
344
376
  // an extension is required to read this file but it is not loaded - try to load it
@@ -455,4 +487,14 @@ FileType FileHandle::GetType() {
455
487
  return file_system.GetFileType(*this);
456
488
  }
457
489
 
490
+ bool FileSystem::IsRemoteFile(const string &path) {
491
+ const string prefixes[] = {"http://", "https://", "s3://"};
492
+ for (auto &prefix : prefixes) {
493
+ if (StringUtil::StartsWith(path, prefix)) {
494
+ return true;
495
+ }
496
+ }
497
+ return false;
498
+ }
499
+
458
500
  } // namespace duckdb
@@ -162,8 +162,9 @@ static FileType GetFileTypeInternal(int fd) { // LCOV_EXCL_START
162
162
  }
163
163
  } // LCOV_EXCL_STOP
164
164
 
165
- unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock_type,
165
+ unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type,
166
166
  FileCompressionType compression, FileOpener *opener) {
167
+ auto path = FileSystem::ExpandPath(path_p, opener);
167
168
  if (compression != FileCompressionType::UNCOMPRESSED) {
168
169
  throw NotImplementedException("Unsupported compression type for default file system");
169
170
  }
@@ -506,8 +507,9 @@ public:
506
507
  };
507
508
  };
508
509
 
509
- unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock_type,
510
+ unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type,
510
511
  FileCompressionType compression, FileOpener *opener) {
512
+ auto path = FileSystem::ExpandPath(path_p, opener);
511
513
  if (compression != FileCompressionType::UNCOMPRESSED) {
512
514
  throw NotImplementedException("Unsupported compression type for default file system");
513
515
  }
@@ -775,7 +777,7 @@ void LocalFileSystem::MoveFile(const string &source, const string &target) {
775
777
  auto source_unicode = WindowsUtil::UTF8ToUnicode(source.c_str());
776
778
  auto target_unicode = WindowsUtil::UTF8ToUnicode(target.c_str());
777
779
  if (!MoveFileW(source_unicode.c_str(), target_unicode.c_str())) {
778
- throw IOException("Could not move file");
780
+ throw IOException("Could not move file: %s", GetLastErrorAsString());
779
781
  }
780
782
  }
781
783
 
@@ -819,19 +821,6 @@ idx_t LocalFileSystem::SeekPosition(FileHandle &handle) {
819
821
  return GetFilePointer(handle);
820
822
  }
821
823
 
822
- static bool HasGlob(const string &str) {
823
- for (idx_t i = 0; i < str.size(); i++) {
824
- switch (str[i]) {
825
- case '*':
826
- case '?':
827
- case '[':
828
- return true;
829
- default:
830
- break;
831
- }
832
- }
833
- return false;
834
- }
835
824
  static bool IsCrawl(const string &glob) {
836
825
  // glob must match exactly
837
826
  return glob == "**";
@@ -349,7 +349,7 @@ string LogicalType::ToString() const {
349
349
  auto &child_types = StructType::GetChildTypes(*this);
350
350
  string ret = "STRUCT(";
351
351
  for (size_t i = 0; i < child_types.size(); i++) {
352
- ret += KeywordHelper::WriteOptionallyQuoted(child_types[i].first) + " " + child_types[i].second.ToString();
352
+ ret += StringUtil::Format("%s %s", SQLIdentifier(child_types[i].first), child_types[i].second);
353
353
  if (i < child_types.size() - 1) {
354
354
  ret += ", ";
355
355
  }
@@ -77,7 +77,7 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
77
77
  }
78
78
 
79
79
  // found a true value
80
- if (lambda_validity.RowIsValid(entry) && lambda_values[entry] > 0) {
80
+ if (lambda_validity.RowIsValid(entry) && lambda_values[entry]) {
81
81
  true_sel.set_index(true_count++, i);
82
82
  curr_list_len++;
83
83
  }
@@ -1172,6 +1172,16 @@ void BufferedCSVReader::SkipEmptyLines() {
1172
1172
  }
1173
1173
  }
1174
1174
 
1175
+ void UpdateMaxLineLength(ClientContext &context, idx_t line_length) {
1176
+ if (!context.client_data->debug_set_max_line_length) {
1177
+ return;
1178
+ }
1179
+ if (line_length < context.client_data->debug_max_line_length) {
1180
+ return;
1181
+ }
1182
+ context.client_data->debug_max_line_length = line_length;
1183
+ }
1184
+
1175
1185
  bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message) {
1176
1186
  // used for parsing algorithm
1177
1187
  bool finished_chunk = false;
@@ -1239,9 +1249,7 @@ add_row : {
1239
1249
  return false;
1240
1250
  }
1241
1251
  finished_chunk = AddRow(insert_chunk, column, error_message);
1242
- if (context.client_data->max_line_length < position - line_start) {
1243
- context.client_data->max_line_length = position - line_start;
1244
- }
1252
+ UpdateMaxLineLength(context, position - line_start);
1245
1253
  if (!error_message.empty()) {
1246
1254
  return false;
1247
1255
  }
@@ -1379,9 +1387,7 @@ final_state:
1379
1387
  AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
1380
1388
  finished_chunk = AddRow(insert_chunk, column, error_message);
1381
1389
  SkipEmptyLines();
1382
- if (context.client_data->max_line_length < position - line_start) {
1383
- context.client_data->max_line_length = position - line_start;
1384
- }
1390
+ UpdateMaxLineLength(context, position - line_start);
1385
1391
  if (!error_message.empty()) {
1386
1392
  return false;
1387
1393
  }
@@ -183,6 +183,12 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
183
183
  }
184
184
 
185
185
  void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
186
+ if (loption == "new_line") {
187
+ // Steal this from SetBaseOption so we can write different newlines (e.g., format JSON ARRAY)
188
+ write_newline = ParseString(value, loption);
189
+ return;
190
+ }
191
+
186
192
  if (SetBaseOption(loption, value)) {
187
193
  return;
188
194
  }
@@ -199,6 +205,10 @@ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value
199
205
  }
200
206
  SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
201
207
  SetDateFormat(LogicalTypeId::TIMESTAMP_TZ, format, false);
208
+ } else if (loption == "prefix") {
209
+ prefix = ParseString(value, loption);
210
+ } else if (loption == "suffix") {
211
+ suffix = ParseString(value, loption);
202
212
  } else {
203
213
  throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
204
214
  }
@@ -51,8 +51,8 @@ static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info,
51
51
  ss << KeywordHelper::WriteOptionallyQuoted(exported_table.schema_name) << ".";
52
52
  }
53
53
 
54
- ss << KeywordHelper::WriteOptionallyQuoted(exported_table.table_name) << " FROM '" << exported_table.file_path
55
- << "' (";
54
+ ss << StringUtil::Format("%s FROM %s (", SQLIdentifier(exported_table.table_name),
55
+ SQLString(exported_table.file_path));
56
56
 
57
57
  // write the copy options
58
58
  ss << "FORMAT '" << info.format << "'";
@@ -41,7 +41,7 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters &parame
41
41
  )
42
42
  SELECT "name"
43
43
  FROM db_objects
44
- ORDER BY "name";)EOF", where_clause, where_clause, where_clause);
44
+ ORDER BY "name";)EOF", where_clause, where_clause);
45
45
  // clang-format on
46
46
 
47
47
  return pragma_query;
@@ -50,15 +50,17 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters &parame
50
50
  string PragmaShowTablesExpanded(ClientContext &context, const FunctionParameters &parameters) {
51
51
  return R"(
52
52
  SELECT
53
+ t.database_name AS database,
54
+ t.schema_name AS schema,
53
55
  t.table_name,
54
56
  LIST(c.column_name order by c.column_index) AS column_names,
55
57
  LIST(c.data_type order by c.column_index) AS column_types,
56
- FIRST(t.temporary) AS temporary
58
+ FIRST(t.temporary) AS temporary,
57
59
  FROM duckdb_tables t
58
60
  JOIN duckdb_columns c
59
61
  USING (table_oid)
60
- GROUP BY t.table_name
61
- ORDER BY t.table_name;
62
+ GROUP BY t.database_name, t.schema_name, t.table_name
63
+ ORDER BY t.database_name, t.schema_name, t.table_name;
62
64
  )";
63
65
  }
64
66
 
@@ -1,15 +1,16 @@
1
1
  #include "duckdb/common/bind_helpers.hpp"
2
2
  #include "duckdb/common/file_system.hpp"
3
+ #include "duckdb/common/multi_file_reader.hpp"
3
4
  #include "duckdb/common/serializer/buffered_serializer.hpp"
4
5
  #include "duckdb/common/string_util.hpp"
6
+ #include "duckdb/common/types/column/column_data_collection.hpp"
5
7
  #include "duckdb/common/types/string_type.hpp"
6
8
  #include "duckdb/common/vector_operations/vector_operations.hpp"
7
9
  #include "duckdb/function/copy_function.hpp"
8
10
  #include "duckdb/function/scalar/string_functions.hpp"
9
11
  #include "duckdb/function/table/read_csv.hpp"
10
12
  #include "duckdb/parser/parsed_data/copy_info.hpp"
11
- #include "duckdb/common/multi_file_reader.hpp"
12
- #include "duckdb/common/types/column/column_data_collection.hpp"
13
+
13
14
  #include <limits>
14
15
 
15
16
  namespace duckdb {
@@ -57,6 +58,15 @@ void BaseCSVData::Finalize() {
57
58
  SubstringDetection(options.escape, options.null_str, "ESCAPE", "NULL");
58
59
  }
59
60
  }
61
+
62
+ if (!options.prefix.empty() || !options.suffix.empty()) {
63
+ if (options.prefix.empty() || options.suffix.empty()) {
64
+ throw BinderException("COPY ... (FORMAT CSV) must have both PREFIX and SUFFIX, or none at all");
65
+ }
66
+ if (options.header) {
67
+ throw BinderException("COPY ... (FORMAT CSV)'s HEADER cannot be combined with PREFIX/SUFFIX");
68
+ }
69
+ }
60
70
  }
61
71
 
62
72
  static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &info, vector<string> &names,
@@ -85,6 +95,9 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
85
95
  bind_data->requires_quotes[bind_data->options.delimiter[0]] = true;
86
96
  bind_data->requires_quotes[bind_data->options.quote[0]] = true;
87
97
  }
98
+ if (!bind_data->options.write_newline.empty()) {
99
+ bind_data->newline = bind_data->options.write_newline;
100
+ }
88
101
  return std::move(bind_data);
89
102
  }
90
103
 
@@ -251,24 +264,41 @@ struct LocalWriteCSVData : public LocalFunctionData {
251
264
  BufferedSerializer serializer;
252
265
  //! A chunk with VARCHAR columns to cast intermediates into
253
266
  DataChunk cast_chunk;
267
+ //! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
268
+ bool written_anything = false;
254
269
  };
255
270
 
256
271
  struct GlobalWriteCSVData : public GlobalFunctionData {
257
- GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression) : fs(fs) {
272
+ GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression)
273
+ : fs(fs), written_anything(false) {
258
274
  handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW,
259
275
  FileLockType::WRITE_LOCK, compression);
260
276
  }
261
277
 
278
+ //! Write generic data, e.g., CSV header
262
279
  void WriteData(const_data_ptr_t data, idx_t size) {
263
280
  lock_guard<mutex> flock(lock);
264
281
  handle->Write((void *)data, size);
265
282
  }
266
283
 
284
+ //! Write rows
285
+ void WriteRows(const_data_ptr_t data, idx_t size, const string &newline) {
286
+ lock_guard<mutex> flock(lock);
287
+ if (written_anything) {
288
+ handle->Write((void *)newline.c_str(), newline.length());
289
+ } else {
290
+ written_anything = true;
291
+ }
292
+ handle->Write((void *)data, size);
293
+ }
294
+
267
295
  FileSystem &fs;
268
296
  //! The mutex for writing to the physical file
269
297
  mutex lock;
270
298
  //! The file handle to write to
271
299
  unique_ptr<FileHandle> handle;
300
+ //! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
301
+ bool written_anything;
272
302
  };
273
303
 
274
304
  static unique_ptr<LocalFunctionData> WriteCSVInitializeLocal(ExecutionContext &context, FunctionData &bind_data) {
@@ -290,6 +320,10 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
290
320
  auto global_data =
291
321
  make_uniq<GlobalWriteCSVData>(FileSystem::GetFileSystem(context), file_path, options.compression);
292
322
 
323
+ if (!options.prefix.empty()) {
324
+ global_data->WriteData((const_data_ptr_t)options.prefix.c_str(), options.prefix.size());
325
+ }
326
+
293
327
  if (options.header) {
294
328
  BufferedSerializer serializer;
295
329
  // write the header line to the file
@@ -304,11 +338,12 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
304
338
 
305
339
  global_data->WriteData(serializer.blob.data.get(), serializer.blob.size);
306
340
  }
341
+
307
342
  return std::move(global_data);
308
343
  }
309
344
 
310
345
  static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_data, DataChunk &cast_chunk,
311
- BufferedSerializer &writer, DataChunk &input) {
346
+ BufferedSerializer &writer, DataChunk &input, bool &written_anything) {
312
347
  auto &csv_data = bind_data.Cast<WriteCSVData>();
313
348
  auto &options = csv_data.options;
314
349
 
@@ -338,6 +373,11 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
338
373
  cast_chunk.Flatten();
339
374
  // now loop over the vectors and output the values
340
375
  for (idx_t row_idx = 0; row_idx < cast_chunk.size(); row_idx++) {
376
+ if (row_idx == 0 && !written_anything) {
377
+ written_anything = true;
378
+ } else {
379
+ writer.WriteBufferData(csv_data.newline);
380
+ }
341
381
  // write values
342
382
  for (idx_t col_idx = 0; col_idx < cast_chunk.ColumnCount(); col_idx++) {
343
383
  if (col_idx != 0) {
@@ -357,7 +397,6 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
357
397
  WriteQuotedString(writer, csv_data, str_data[row_idx].GetData(), str_data[row_idx].GetSize(),
358
398
  csv_data.options.force_quote[col_idx]);
359
399
  }
360
- writer.WriteBufferData(csv_data.newline);
361
400
  }
362
401
  }
363
402
 
@@ -368,13 +407,15 @@ static void WriteCSVSink(ExecutionContext &context, FunctionData &bind_data, Glo
368
407
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
369
408
 
370
409
  // write data into the local buffer
371
- WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input);
410
+ WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input,
411
+ local_data.written_anything);
372
412
 
373
413
  // check if we should flush what we have currently written
374
414
  auto &writer = local_data.serializer;
375
415
  if (writer.blob.size >= csv_data.flush_size) {
376
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
416
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
377
417
  writer.Reset();
418
+ local_data.written_anything = false;
378
419
  }
379
420
  }
380
421
 
@@ -385,10 +426,11 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
385
426
  LocalFunctionData &lstate) {
386
427
  auto &local_data = lstate.Cast<LocalWriteCSVData>();
387
428
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
429
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
388
430
  auto &writer = local_data.serializer;
389
431
  // flush the local writer
390
- if (writer.blob.size > 0) {
391
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
432
+ if (local_data.written_anything) {
433
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
392
434
  writer.Reset();
393
435
  }
394
436
  }
@@ -398,6 +440,16 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
398
440
  //===--------------------------------------------------------------------===//
399
441
  void WriteCSVFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) {
400
442
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
443
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
444
+ auto &options = csv_data.options;
445
+
446
+ BufferedSerializer serializer;
447
+ if (!options.suffix.empty()) {
448
+ serializer.WriteBufferData(options.suffix);
449
+ } else if (global_state.written_anything) {
450
+ serializer.WriteBufferData(csv_data.newline);
451
+ }
452
+ global_state.WriteData(serializer.blob.data.get(), serializer.blob.size);
401
453
 
402
454
  global_state.handle->Close();
403
455
  global_state.handle.reset();
@@ -434,10 +486,11 @@ unique_ptr<PreparedBatchData> WriteCSVPrepareBatch(ClientContext &context, Funct
434
486
  DataChunk cast_chunk;
435
487
  cast_chunk.Initialize(Allocator::Get(context), types);
436
488
 
437
- auto batch = make_uniq<WriteCSVBatchData>();
438
489
  // write CSV chunks to the batch data
490
+ bool written_anything = false;
491
+ auto batch = make_uniq<WriteCSVBatchData>();
439
492
  for (auto &chunk : collection->Chunks()) {
440
- WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk);
493
+ WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk, written_anything);
441
494
  }
442
495
  return std::move(batch);
443
496
  }
@@ -449,8 +502,9 @@ void WriteCSVFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalF
449
502
  PreparedBatchData &batch) {
450
503
  auto &csv_batch = batch.Cast<WriteCSVBatchData>();
451
504
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
505
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
452
506
  auto &writer = csv_batch.serializer;
453
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
507
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
454
508
  writer.Reset();
455
509
  }
456
510