duckdb 0.7.2-dev3546.0 → 0.7.2-dev3710.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/database.cpp +1 -0
- package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
- package/src/duckdb/extension/json/include/json_common.hpp +5 -4
- package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
- package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
- package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
- package/src/duckdb/extension/json/json_common.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
- package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
- package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +11 -9
- package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/read_json.cpp +166 -169
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
- package/src/duckdb/extension/json/json_functions.cpp +11 -4
- package/src/duckdb/extension/json/json_scan.cpp +593 -374
- package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
- package/src/duckdb/src/common/exception.cpp +17 -0
- package/src/duckdb/src/common/exception_format_value.cpp +14 -0
- package/src/duckdb/src/common/file_system.cpp +78 -36
- package/src/duckdb/src/common/local_file_system.cpp +5 -16
- package/src/duckdb/src/common/types.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +2 -2
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +6 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +66 -12
- package/src/duckdb/src/function/table/read_csv.cpp +16 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +26 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +25 -7
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -3
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
- package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
- package/src/duckdb/src/main/db_instance_cache.cpp +5 -3
- package/src/duckdb/src/main/extension/extension_install.cpp +22 -18
- package/src/duckdb/src/parser/expression/collate_expression.cpp +1 -1
- package/src/duckdb/src/parser/keyword_helper.cpp +11 -1
- package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
- package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -2
- package/src/duckdb/src/parser/tableref.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
- package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
- package/src/duckdb/src/storage/storage_manager.cpp +3 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
@@ -724,6 +724,11 @@ unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string
|
|
724
724
|
vector<unique_ptr<ParsedExpression>> children;
|
725
725
|
children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
|
726
726
|
table_function->function = make_uniq<FunctionExpression>("parquet_scan", std::move(children));
|
727
|
+
|
728
|
+
if (!FileSystem::HasGlob(table_name)) {
|
729
|
+
table_function->alias = FileSystem::ExtractBaseName(table_name);
|
730
|
+
}
|
731
|
+
|
727
732
|
return std::move(table_function);
|
728
733
|
}
|
729
734
|
|
@@ -59,6 +59,23 @@ string Exception::GetStackTrace(int max_depth) {
|
|
59
59
|
}
|
60
60
|
|
61
61
|
string Exception::ConstructMessageRecursive(const string &msg, std::vector<ExceptionFormatValue> &values) {
|
62
|
+
#ifdef DEBUG
|
63
|
+
// Verify that we have the required amount of values for the message
|
64
|
+
idx_t parameter_count = 0;
|
65
|
+
for (idx_t i = 0; i < msg.size(); i++) {
|
66
|
+
if (msg[i] != '%') {
|
67
|
+
continue;
|
68
|
+
}
|
69
|
+
if (i < msg.size() && msg[i + 1] == '%') {
|
70
|
+
i++;
|
71
|
+
continue;
|
72
|
+
}
|
73
|
+
parameter_count++;
|
74
|
+
}
|
75
|
+
if (parameter_count != values.size()) {
|
76
|
+
throw InternalException("Expected %d parameters, received %d", parameter_count, values.size());
|
77
|
+
}
|
78
|
+
#endif
|
62
79
|
return ExceptionFormatValue::Format(msg, values);
|
63
80
|
}
|
64
81
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#include "fmt/format.h"
|
4
4
|
#include "fmt/printf.h"
|
5
5
|
#include "duckdb/common/types/hugeint.hpp"
|
6
|
+
#include "duckdb/parser/keyword_helper.hpp"
|
6
7
|
|
7
8
|
namespace duckdb {
|
8
9
|
|
@@ -40,6 +41,19 @@ template <>
|
|
40
41
|
ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(string value) {
|
41
42
|
return ExceptionFormatValue(std::move(value));
|
42
43
|
}
|
44
|
+
|
45
|
+
template <>
|
46
|
+
ExceptionFormatValue
|
47
|
+
ExceptionFormatValue::CreateFormatValue(SQLString value) { // NOLINT: templating requires us to copy value here
|
48
|
+
return KeywordHelper::WriteQuoted(value.raw_string, '\'');
|
49
|
+
}
|
50
|
+
|
51
|
+
template <>
|
52
|
+
ExceptionFormatValue
|
53
|
+
ExceptionFormatValue::CreateFormatValue(SQLIdentifier value) { // NOLINT: templating requires us to copy value here
|
54
|
+
return KeywordHelper::WriteOptionallyQuoted(value.raw_string, '"');
|
55
|
+
}
|
56
|
+
|
43
57
|
template <>
|
44
58
|
ExceptionFormatValue ExceptionFormatValue::CreateFormatValue(const char *value) {
|
45
59
|
return ExceptionFormatValue(string(value));
|
@@ -11,6 +11,7 @@
|
|
11
11
|
#include "duckdb/main/client_data.hpp"
|
12
12
|
#include "duckdb/main/database.hpp"
|
13
13
|
#include "duckdb/main/extension_helper.hpp"
|
14
|
+
#include "duckdb/common/windows_util.hpp"
|
14
15
|
|
15
16
|
#include <cstdint>
|
16
17
|
#include <cstdio>
|
@@ -53,6 +54,14 @@ bool PathMatched(const string &path, const string &sub_path) {
|
|
53
54
|
|
54
55
|
#ifndef _WIN32
|
55
56
|
|
57
|
+
string FileSystem::GetEnvVariable(const string &name) {
|
58
|
+
const char *env = getenv(name.c_str());
|
59
|
+
if (!env) {
|
60
|
+
return string();
|
61
|
+
}
|
62
|
+
return env;
|
63
|
+
}
|
64
|
+
|
56
65
|
bool FileSystem::IsPathAbsolute(const string &path) {
|
57
66
|
auto path_separator = FileSystem::PathSeparator();
|
58
67
|
return PathMatched(path, path_separator);
|
@@ -85,36 +94,59 @@ string FileSystem::GetWorkingDirectory() {
|
|
85
94
|
}
|
86
95
|
return string(buffer.get());
|
87
96
|
}
|
97
|
+
|
98
|
+
string FileSystem::NormalizeAbsolutePath(const string &path) {
|
99
|
+
D_ASSERT(IsPathAbsolute(path));
|
100
|
+
return path;
|
101
|
+
}
|
102
|
+
|
88
103
|
#else
|
89
104
|
|
90
|
-
|
91
|
-
//
|
92
|
-
auto
|
93
|
-
|
94
|
-
|
105
|
+
string FileSystem::GetEnvVariable(const string &env) {
|
106
|
+
// first convert the environment variable name to the correct encoding
|
107
|
+
auto env_w = WindowsUtil::UTF8ToUnicode(env.c_str());
|
108
|
+
// use _wgetenv to get the value
|
109
|
+
auto res_w = _wgetenv(env_w.c_str());
|
110
|
+
if (!res_w) {
|
111
|
+
// no environment variable of this name found
|
112
|
+
return string();
|
95
113
|
}
|
96
|
-
|
97
|
-
|
98
|
-
|
114
|
+
return WindowsUtil::UnicodeToUTF8(res_w);
|
115
|
+
}
|
116
|
+
|
117
|
+
bool FileSystem::IsPathAbsolute(const string &path) {
|
118
|
+
// 1) A single backslash or forward-slash
|
119
|
+
if (PathMatched(path, "\\") || PathMatched(path, "/")) {
|
99
120
|
return true;
|
100
121
|
}
|
101
|
-
//
|
122
|
+
// 2) A disk designator with a backslash (e.g., C:\ or C:/)
|
102
123
|
auto path_aux = path;
|
103
124
|
path_aux.erase(0, 1);
|
104
|
-
|
105
|
-
if (PathMatched(path_aux, sub_path)) {
|
125
|
+
if (PathMatched(path_aux, ":\\") || PathMatched(path_aux, ":/")) {
|
106
126
|
return true;
|
107
127
|
}
|
108
128
|
return false;
|
109
129
|
}
|
110
130
|
|
131
|
+
string FileSystem::NormalizeAbsolutePath(const string &path) {
|
132
|
+
D_ASSERT(IsPathAbsolute(path));
|
133
|
+
auto result = StringUtil::Lower(FileSystem::ConvertSeparators(path));
|
134
|
+
if (PathMatched(result, "\\")) {
|
135
|
+
// Path starts with a single backslash or forward slash
|
136
|
+
// prepend drive letter
|
137
|
+
return GetWorkingDirectory().substr(0, 2) + result;
|
138
|
+
}
|
139
|
+
return result;
|
140
|
+
}
|
141
|
+
|
111
142
|
string FileSystem::PathSeparator() {
|
112
143
|
return "\\";
|
113
144
|
}
|
114
145
|
|
115
146
|
void FileSystem::SetWorkingDirectory(const string &path) {
|
116
|
-
|
117
|
-
|
147
|
+
auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str());
|
148
|
+
if (!SetCurrentDirectoryW(unicode_path.c_str())) {
|
149
|
+
throw IOException("Could not change working directory to \"%s\"", path);
|
118
150
|
}
|
119
151
|
}
|
120
152
|
|
@@ -134,16 +166,16 @@ idx_t FileSystem::GetAvailableMemory() {
|
|
134
166
|
}
|
135
167
|
|
136
168
|
string FileSystem::GetWorkingDirectory() {
|
137
|
-
idx_t count =
|
169
|
+
idx_t count = GetCurrentDirectoryW(0, nullptr);
|
138
170
|
if (count == 0) {
|
139
171
|
throw IOException("Could not get working directory!");
|
140
172
|
}
|
141
|
-
auto buffer = make_unsafe_array<
|
142
|
-
idx_t ret =
|
173
|
+
auto buffer = make_unsafe_array<wchar_t>(count);
|
174
|
+
idx_t ret = GetCurrentDirectoryW(count, buffer.get());
|
143
175
|
if (count != ret + 1) {
|
144
176
|
throw IOException("Could not get working directory!");
|
145
177
|
}
|
146
|
-
return
|
178
|
+
return WindowsUtil::UnicodeToUTF8(buffer.get());
|
147
179
|
}
|
148
180
|
|
149
181
|
#endif
|
@@ -161,13 +193,7 @@ string FileSystem::ConvertSeparators(const string &path) {
|
|
161
193
|
return path;
|
162
194
|
}
|
163
195
|
// on windows-based systems we accept both
|
164
|
-
|
165
|
-
for (idx_t i = 0; i < result.size(); i++) {
|
166
|
-
if (result[i] == '/') {
|
167
|
-
result[i] = separator;
|
168
|
-
}
|
169
|
-
}
|
170
|
-
return result;
|
196
|
+
return StringUtil::Replace(path, "/", separator_str);
|
171
197
|
}
|
172
198
|
|
173
199
|
string FileSystem::ExtractName(const string &path) {
|
@@ -202,14 +228,10 @@ string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) {
|
|
202
228
|
}
|
203
229
|
// fallback to the default home directories for the specified system
|
204
230
|
#ifdef DUCKDB_WINDOWS
|
205
|
-
|
231
|
+
return FileSystem::GetEnvVariable("USERPROFILE");
|
206
232
|
#else
|
207
|
-
|
233
|
+
return FileSystem::GetEnvVariable("HOME");
|
208
234
|
#endif
|
209
|
-
if (homedir) {
|
210
|
-
return homedir;
|
211
|
-
}
|
212
|
-
return string();
|
213
235
|
}
|
214
236
|
|
215
237
|
string FileSystem::GetHomeDirectory() {
|
@@ -305,6 +327,20 @@ void FileSystem::FileSync(FileHandle &handle) {
|
|
305
327
|
throw NotImplementedException("%s: FileSync is not implemented!", GetName());
|
306
328
|
}
|
307
329
|
|
330
|
+
bool FileSystem::HasGlob(const string &str) {
|
331
|
+
for (idx_t i = 0; i < str.size(); i++) {
|
332
|
+
switch (str[i]) {
|
333
|
+
case '*':
|
334
|
+
case '?':
|
335
|
+
case '[':
|
336
|
+
return true;
|
337
|
+
default:
|
338
|
+
break;
|
339
|
+
}
|
340
|
+
}
|
341
|
+
return false;
|
342
|
+
}
|
343
|
+
|
308
344
|
vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
|
309
345
|
throw NotImplementedException("%s: Glob is not implemented!", GetName());
|
310
346
|
}
|
@@ -333,12 +369,8 @@ vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &conte
|
|
333
369
|
auto result = Glob(pattern);
|
334
370
|
if (result.empty()) {
|
335
371
|
string required_extension;
|
336
|
-
|
337
|
-
|
338
|
-
if (StringUtil::StartsWith(pattern, prefix)) {
|
339
|
-
required_extension = "httpfs";
|
340
|
-
break;
|
341
|
-
}
|
372
|
+
if (FileSystem::IsRemoteFile(pattern)) {
|
373
|
+
required_extension = "httpfs";
|
342
374
|
}
|
343
375
|
if (!required_extension.empty() && !context.db->ExtensionIsLoaded(required_extension)) {
|
344
376
|
// an extension is required to read this file but it is not loaded - try to load it
|
@@ -455,4 +487,14 @@ FileType FileHandle::GetType() {
|
|
455
487
|
return file_system.GetFileType(*this);
|
456
488
|
}
|
457
489
|
|
490
|
+
bool FileSystem::IsRemoteFile(const string &path) {
|
491
|
+
const string prefixes[] = {"http://", "https://", "s3://"};
|
492
|
+
for (auto &prefix : prefixes) {
|
493
|
+
if (StringUtil::StartsWith(path, prefix)) {
|
494
|
+
return true;
|
495
|
+
}
|
496
|
+
}
|
497
|
+
return false;
|
498
|
+
}
|
499
|
+
|
458
500
|
} // namespace duckdb
|
@@ -162,8 +162,9 @@ static FileType GetFileTypeInternal(int fd) { // LCOV_EXCL_START
|
|
162
162
|
}
|
163
163
|
} // LCOV_EXCL_STOP
|
164
164
|
|
165
|
-
unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &
|
165
|
+
unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type,
|
166
166
|
FileCompressionType compression, FileOpener *opener) {
|
167
|
+
auto path = FileSystem::ExpandPath(path_p, opener);
|
167
168
|
if (compression != FileCompressionType::UNCOMPRESSED) {
|
168
169
|
throw NotImplementedException("Unsupported compression type for default file system");
|
169
170
|
}
|
@@ -506,8 +507,9 @@ public:
|
|
506
507
|
};
|
507
508
|
};
|
508
509
|
|
509
|
-
unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &
|
510
|
+
unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type,
|
510
511
|
FileCompressionType compression, FileOpener *opener) {
|
512
|
+
auto path = FileSystem::ExpandPath(path_p, opener);
|
511
513
|
if (compression != FileCompressionType::UNCOMPRESSED) {
|
512
514
|
throw NotImplementedException("Unsupported compression type for default file system");
|
513
515
|
}
|
@@ -775,7 +777,7 @@ void LocalFileSystem::MoveFile(const string &source, const string &target) {
|
|
775
777
|
auto source_unicode = WindowsUtil::UTF8ToUnicode(source.c_str());
|
776
778
|
auto target_unicode = WindowsUtil::UTF8ToUnicode(target.c_str());
|
777
779
|
if (!MoveFileW(source_unicode.c_str(), target_unicode.c_str())) {
|
778
|
-
throw IOException("Could not move file");
|
780
|
+
throw IOException("Could not move file: %s", GetLastErrorAsString());
|
779
781
|
}
|
780
782
|
}
|
781
783
|
|
@@ -819,19 +821,6 @@ idx_t LocalFileSystem::SeekPosition(FileHandle &handle) {
|
|
819
821
|
return GetFilePointer(handle);
|
820
822
|
}
|
821
823
|
|
822
|
-
static bool HasGlob(const string &str) {
|
823
|
-
for (idx_t i = 0; i < str.size(); i++) {
|
824
|
-
switch (str[i]) {
|
825
|
-
case '*':
|
826
|
-
case '?':
|
827
|
-
case '[':
|
828
|
-
return true;
|
829
|
-
default:
|
830
|
-
break;
|
831
|
-
}
|
832
|
-
}
|
833
|
-
return false;
|
834
|
-
}
|
835
824
|
static bool IsCrawl(const string &glob) {
|
836
825
|
// glob must match exactly
|
837
826
|
return glob == "**";
|
@@ -349,7 +349,7 @@ string LogicalType::ToString() const {
|
|
349
349
|
auto &child_types = StructType::GetChildTypes(*this);
|
350
350
|
string ret = "STRUCT(";
|
351
351
|
for (size_t i = 0; i < child_types.size(); i++) {
|
352
|
-
ret +=
|
352
|
+
ret += StringUtil::Format("%s %s", SQLIdentifier(child_types[i].first), child_types[i].second);
|
353
353
|
if (i < child_types.size() - 1) {
|
354
354
|
ret += ", ";
|
355
355
|
}
|
@@ -77,7 +77,7 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
|
|
77
77
|
}
|
78
78
|
|
79
79
|
// found a true value
|
80
|
-
if (lambda_validity.RowIsValid(entry) && lambda_values[entry]
|
80
|
+
if (lambda_validity.RowIsValid(entry) && lambda_values[entry]) {
|
81
81
|
true_sel.set_index(true_count++, i);
|
82
82
|
curr_list_len++;
|
83
83
|
}
|
@@ -1172,6 +1172,16 @@ void BufferedCSVReader::SkipEmptyLines() {
|
|
1172
1172
|
}
|
1173
1173
|
}
|
1174
1174
|
|
1175
|
+
void UpdateMaxLineLength(ClientContext &context, idx_t line_length) {
|
1176
|
+
if (!context.client_data->debug_set_max_line_length) {
|
1177
|
+
return;
|
1178
|
+
}
|
1179
|
+
if (line_length < context.client_data->debug_max_line_length) {
|
1180
|
+
return;
|
1181
|
+
}
|
1182
|
+
context.client_data->debug_max_line_length = line_length;
|
1183
|
+
}
|
1184
|
+
|
1175
1185
|
bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message) {
|
1176
1186
|
// used for parsing algorithm
|
1177
1187
|
bool finished_chunk = false;
|
@@ -1239,9 +1249,7 @@ add_row : {
|
|
1239
1249
|
return false;
|
1240
1250
|
}
|
1241
1251
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1242
|
-
|
1243
|
-
context.client_data->max_line_length = position - line_start;
|
1244
|
-
}
|
1252
|
+
UpdateMaxLineLength(context, position - line_start);
|
1245
1253
|
if (!error_message.empty()) {
|
1246
1254
|
return false;
|
1247
1255
|
}
|
@@ -1379,9 +1387,7 @@ final_state:
|
|
1379
1387
|
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
1380
1388
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1381
1389
|
SkipEmptyLines();
|
1382
|
-
|
1383
|
-
context.client_data->max_line_length = position - line_start;
|
1384
|
-
}
|
1390
|
+
UpdateMaxLineLength(context, position - line_start);
|
1385
1391
|
if (!error_message.empty()) {
|
1386
1392
|
return false;
|
1387
1393
|
}
|
@@ -183,6 +183,12 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
183
183
|
}
|
184
184
|
|
185
185
|
void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
|
186
|
+
if (loption == "new_line") {
|
187
|
+
// Steal this from SetBaseOption so we can write different newlines (e.g., format JSON ARRAY)
|
188
|
+
write_newline = ParseString(value, loption);
|
189
|
+
return;
|
190
|
+
}
|
191
|
+
|
186
192
|
if (SetBaseOption(loption, value)) {
|
187
193
|
return;
|
188
194
|
}
|
@@ -199,6 +205,10 @@ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value
|
|
199
205
|
}
|
200
206
|
SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
|
201
207
|
SetDateFormat(LogicalTypeId::TIMESTAMP_TZ, format, false);
|
208
|
+
} else if (loption == "prefix") {
|
209
|
+
prefix = ParseString(value, loption);
|
210
|
+
} else if (loption == "suffix") {
|
211
|
+
suffix = ParseString(value, loption);
|
202
212
|
} else {
|
203
213
|
throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
|
204
214
|
}
|
@@ -51,8 +51,8 @@ static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info,
|
|
51
51
|
ss << KeywordHelper::WriteOptionallyQuoted(exported_table.schema_name) << ".";
|
52
52
|
}
|
53
53
|
|
54
|
-
ss <<
|
55
|
-
|
54
|
+
ss << StringUtil::Format("%s FROM %s (", SQLIdentifier(exported_table.table_name),
|
55
|
+
SQLString(exported_table.file_path));
|
56
56
|
|
57
57
|
// write the copy options
|
58
58
|
ss << "FORMAT '" << info.format << "'";
|
@@ -41,7 +41,7 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters ¶me
|
|
41
41
|
)
|
42
42
|
SELECT "name"
|
43
43
|
FROM db_objects
|
44
|
-
ORDER BY "name";)EOF", where_clause, where_clause
|
44
|
+
ORDER BY "name";)EOF", where_clause, where_clause);
|
45
45
|
// clang-format on
|
46
46
|
|
47
47
|
return pragma_query;
|
@@ -50,15 +50,17 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters ¶me
|
|
50
50
|
string PragmaShowTablesExpanded(ClientContext &context, const FunctionParameters ¶meters) {
|
51
51
|
return R"(
|
52
52
|
SELECT
|
53
|
+
t.database_name AS database,
|
54
|
+
t.schema_name AS schema,
|
53
55
|
t.table_name,
|
54
56
|
LIST(c.column_name order by c.column_index) AS column_names,
|
55
57
|
LIST(c.data_type order by c.column_index) AS column_types,
|
56
|
-
FIRST(t.temporary) AS temporary
|
58
|
+
FIRST(t.temporary) AS temporary,
|
57
59
|
FROM duckdb_tables t
|
58
60
|
JOIN duckdb_columns c
|
59
61
|
USING (table_oid)
|
60
|
-
GROUP BY t.table_name
|
61
|
-
ORDER BY t.table_name;
|
62
|
+
GROUP BY t.database_name, t.schema_name, t.table_name
|
63
|
+
ORDER BY t.database_name, t.schema_name, t.table_name;
|
62
64
|
)";
|
63
65
|
}
|
64
66
|
|
@@ -1,15 +1,16 @@
|
|
1
1
|
#include "duckdb/common/bind_helpers.hpp"
|
2
2
|
#include "duckdb/common/file_system.hpp"
|
3
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
3
4
|
#include "duckdb/common/serializer/buffered_serializer.hpp"
|
4
5
|
#include "duckdb/common/string_util.hpp"
|
6
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
5
7
|
#include "duckdb/common/types/string_type.hpp"
|
6
8
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
7
9
|
#include "duckdb/function/copy_function.hpp"
|
8
10
|
#include "duckdb/function/scalar/string_functions.hpp"
|
9
11
|
#include "duckdb/function/table/read_csv.hpp"
|
10
12
|
#include "duckdb/parser/parsed_data/copy_info.hpp"
|
11
|
-
|
12
|
-
#include "duckdb/common/types/column/column_data_collection.hpp"
|
13
|
+
|
13
14
|
#include <limits>
|
14
15
|
|
15
16
|
namespace duckdb {
|
@@ -57,6 +58,15 @@ void BaseCSVData::Finalize() {
|
|
57
58
|
SubstringDetection(options.escape, options.null_str, "ESCAPE", "NULL");
|
58
59
|
}
|
59
60
|
}
|
61
|
+
|
62
|
+
if (!options.prefix.empty() || !options.suffix.empty()) {
|
63
|
+
if (options.prefix.empty() || options.suffix.empty()) {
|
64
|
+
throw BinderException("COPY ... (FORMAT CSV) must have both PREFIX and SUFFIX, or none at all");
|
65
|
+
}
|
66
|
+
if (options.header) {
|
67
|
+
throw BinderException("COPY ... (FORMAT CSV)'s HEADER cannot be combined with PREFIX/SUFFIX");
|
68
|
+
}
|
69
|
+
}
|
60
70
|
}
|
61
71
|
|
62
72
|
static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &info, vector<string> &names,
|
@@ -85,6 +95,9 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
|
|
85
95
|
bind_data->requires_quotes[bind_data->options.delimiter[0]] = true;
|
86
96
|
bind_data->requires_quotes[bind_data->options.quote[0]] = true;
|
87
97
|
}
|
98
|
+
if (!bind_data->options.write_newline.empty()) {
|
99
|
+
bind_data->newline = bind_data->options.write_newline;
|
100
|
+
}
|
88
101
|
return std::move(bind_data);
|
89
102
|
}
|
90
103
|
|
@@ -251,24 +264,41 @@ struct LocalWriteCSVData : public LocalFunctionData {
|
|
251
264
|
BufferedSerializer serializer;
|
252
265
|
//! A chunk with VARCHAR columns to cast intermediates into
|
253
266
|
DataChunk cast_chunk;
|
267
|
+
//! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
|
268
|
+
bool written_anything = false;
|
254
269
|
};
|
255
270
|
|
256
271
|
struct GlobalWriteCSVData : public GlobalFunctionData {
|
257
|
-
GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression)
|
272
|
+
GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression)
|
273
|
+
: fs(fs), written_anything(false) {
|
258
274
|
handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW,
|
259
275
|
FileLockType::WRITE_LOCK, compression);
|
260
276
|
}
|
261
277
|
|
278
|
+
//! Write generic data, e.g., CSV header
|
262
279
|
void WriteData(const_data_ptr_t data, idx_t size) {
|
263
280
|
lock_guard<mutex> flock(lock);
|
264
281
|
handle->Write((void *)data, size);
|
265
282
|
}
|
266
283
|
|
284
|
+
//! Write rows
|
285
|
+
void WriteRows(const_data_ptr_t data, idx_t size, const string &newline) {
|
286
|
+
lock_guard<mutex> flock(lock);
|
287
|
+
if (written_anything) {
|
288
|
+
handle->Write((void *)newline.c_str(), newline.length());
|
289
|
+
} else {
|
290
|
+
written_anything = true;
|
291
|
+
}
|
292
|
+
handle->Write((void *)data, size);
|
293
|
+
}
|
294
|
+
|
267
295
|
FileSystem &fs;
|
268
296
|
//! The mutex for writing to the physical file
|
269
297
|
mutex lock;
|
270
298
|
//! The file handle to write to
|
271
299
|
unique_ptr<FileHandle> handle;
|
300
|
+
//! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
|
301
|
+
bool written_anything;
|
272
302
|
};
|
273
303
|
|
274
304
|
static unique_ptr<LocalFunctionData> WriteCSVInitializeLocal(ExecutionContext &context, FunctionData &bind_data) {
|
@@ -290,6 +320,10 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
|
|
290
320
|
auto global_data =
|
291
321
|
make_uniq<GlobalWriteCSVData>(FileSystem::GetFileSystem(context), file_path, options.compression);
|
292
322
|
|
323
|
+
if (!options.prefix.empty()) {
|
324
|
+
global_data->WriteData((const_data_ptr_t)options.prefix.c_str(), options.prefix.size());
|
325
|
+
}
|
326
|
+
|
293
327
|
if (options.header) {
|
294
328
|
BufferedSerializer serializer;
|
295
329
|
// write the header line to the file
|
@@ -304,11 +338,12 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
|
|
304
338
|
|
305
339
|
global_data->WriteData(serializer.blob.data.get(), serializer.blob.size);
|
306
340
|
}
|
341
|
+
|
307
342
|
return std::move(global_data);
|
308
343
|
}
|
309
344
|
|
310
345
|
static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_data, DataChunk &cast_chunk,
|
311
|
-
BufferedSerializer &writer, DataChunk &input) {
|
346
|
+
BufferedSerializer &writer, DataChunk &input, bool &written_anything) {
|
312
347
|
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
313
348
|
auto &options = csv_data.options;
|
314
349
|
|
@@ -338,6 +373,11 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
|
|
338
373
|
cast_chunk.Flatten();
|
339
374
|
// now loop over the vectors and output the values
|
340
375
|
for (idx_t row_idx = 0; row_idx < cast_chunk.size(); row_idx++) {
|
376
|
+
if (row_idx == 0 && !written_anything) {
|
377
|
+
written_anything = true;
|
378
|
+
} else {
|
379
|
+
writer.WriteBufferData(csv_data.newline);
|
380
|
+
}
|
341
381
|
// write values
|
342
382
|
for (idx_t col_idx = 0; col_idx < cast_chunk.ColumnCount(); col_idx++) {
|
343
383
|
if (col_idx != 0) {
|
@@ -357,7 +397,6 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
|
|
357
397
|
WriteQuotedString(writer, csv_data, str_data[row_idx].GetData(), str_data[row_idx].GetSize(),
|
358
398
|
csv_data.options.force_quote[col_idx]);
|
359
399
|
}
|
360
|
-
writer.WriteBufferData(csv_data.newline);
|
361
400
|
}
|
362
401
|
}
|
363
402
|
|
@@ -368,13 +407,15 @@ static void WriteCSVSink(ExecutionContext &context, FunctionData &bind_data, Glo
|
|
368
407
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
369
408
|
|
370
409
|
// write data into the local buffer
|
371
|
-
WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input
|
410
|
+
WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input,
|
411
|
+
local_data.written_anything);
|
372
412
|
|
373
413
|
// check if we should flush what we have currently written
|
374
414
|
auto &writer = local_data.serializer;
|
375
415
|
if (writer.blob.size >= csv_data.flush_size) {
|
376
|
-
global_state.
|
416
|
+
global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
|
377
417
|
writer.Reset();
|
418
|
+
local_data.written_anything = false;
|
378
419
|
}
|
379
420
|
}
|
380
421
|
|
@@ -385,10 +426,11 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
|
|
385
426
|
LocalFunctionData &lstate) {
|
386
427
|
auto &local_data = lstate.Cast<LocalWriteCSVData>();
|
387
428
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
429
|
+
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
388
430
|
auto &writer = local_data.serializer;
|
389
431
|
// flush the local writer
|
390
|
-
if (
|
391
|
-
global_state.
|
432
|
+
if (local_data.written_anything) {
|
433
|
+
global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
|
392
434
|
writer.Reset();
|
393
435
|
}
|
394
436
|
}
|
@@ -398,6 +440,16 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
|
|
398
440
|
//===--------------------------------------------------------------------===//
|
399
441
|
void WriteCSVFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) {
|
400
442
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
443
|
+
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
444
|
+
auto &options = csv_data.options;
|
445
|
+
|
446
|
+
BufferedSerializer serializer;
|
447
|
+
if (!options.suffix.empty()) {
|
448
|
+
serializer.WriteBufferData(options.suffix);
|
449
|
+
} else if (global_state.written_anything) {
|
450
|
+
serializer.WriteBufferData(csv_data.newline);
|
451
|
+
}
|
452
|
+
global_state.WriteData(serializer.blob.data.get(), serializer.blob.size);
|
401
453
|
|
402
454
|
global_state.handle->Close();
|
403
455
|
global_state.handle.reset();
|
@@ -434,10 +486,11 @@ unique_ptr<PreparedBatchData> WriteCSVPrepareBatch(ClientContext &context, Funct
|
|
434
486
|
DataChunk cast_chunk;
|
435
487
|
cast_chunk.Initialize(Allocator::Get(context), types);
|
436
488
|
|
437
|
-
auto batch = make_uniq<WriteCSVBatchData>();
|
438
489
|
// write CSV chunks to the batch data
|
490
|
+
bool written_anything = false;
|
491
|
+
auto batch = make_uniq<WriteCSVBatchData>();
|
439
492
|
for (auto &chunk : collection->Chunks()) {
|
440
|
-
WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk);
|
493
|
+
WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk, written_anything);
|
441
494
|
}
|
442
495
|
return std::move(batch);
|
443
496
|
}
|
@@ -449,8 +502,9 @@ void WriteCSVFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalF
|
|
449
502
|
PreparedBatchData &batch) {
|
450
503
|
auto &csv_batch = batch.Cast<WriteCSVBatchData>();
|
451
504
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
505
|
+
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
452
506
|
auto &writer = csv_batch.serializer;
|
453
|
-
global_state.
|
507
|
+
global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
|
454
508
|
writer.Reset();
|
455
509
|
}
|
456
510
|
|