duckdb 0.7.2-dev3546.0 → 0.7.2-dev3666.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/database.cpp +1 -0
- package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
- package/src/duckdb/extension/json/include/json_common.hpp +5 -4
- package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
- package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
- package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
- package/src/duckdb/extension/json/json_common.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
- package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
- package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +10 -8
- package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/read_json.cpp +166 -169
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
- package/src/duckdb/extension/json/json_functions.cpp +11 -4
- package/src/duckdb/extension/json/json_scan.cpp +593 -374
- package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
- package/src/duckdb/src/common/file_system.cpp +26 -6
- package/src/duckdb/src/common/local_file_system.cpp +0 -13
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -3
- package/src/duckdb/src/function/table/copy_csv.cpp +66 -12
- package/src/duckdb/src/function/table/read_csv.cpp +13 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -7
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -3
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
- package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
- package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
@@ -724,6 +724,11 @@ unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string
|
|
724
724
|
vector<unique_ptr<ParsedExpression>> children;
|
725
725
|
children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
|
726
726
|
table_function->function = make_uniq<FunctionExpression>("parquet_scan", std::move(children));
|
727
|
+
|
728
|
+
if (!FileSystem::HasGlob(table_name)) {
|
729
|
+
table_function->alias = FileSystem::ExtractBaseName(table_name);
|
730
|
+
}
|
731
|
+
|
727
732
|
return std::move(table_function);
|
728
733
|
}
|
729
734
|
|
@@ -305,6 +305,20 @@ void FileSystem::FileSync(FileHandle &handle) {
|
|
305
305
|
throw NotImplementedException("%s: FileSync is not implemented!", GetName());
|
306
306
|
}
|
307
307
|
|
308
|
+
bool FileSystem::HasGlob(const string &str) {
|
309
|
+
for (idx_t i = 0; i < str.size(); i++) {
|
310
|
+
switch (str[i]) {
|
311
|
+
case '*':
|
312
|
+
case '?':
|
313
|
+
case '[':
|
314
|
+
return true;
|
315
|
+
default:
|
316
|
+
break;
|
317
|
+
}
|
318
|
+
}
|
319
|
+
return false;
|
320
|
+
}
|
321
|
+
|
308
322
|
vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
|
309
323
|
throw NotImplementedException("%s: Glob is not implemented!", GetName());
|
310
324
|
}
|
@@ -333,12 +347,8 @@ vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &conte
|
|
333
347
|
auto result = Glob(pattern);
|
334
348
|
if (result.empty()) {
|
335
349
|
string required_extension;
|
336
|
-
|
337
|
-
|
338
|
-
if (StringUtil::StartsWith(pattern, prefix)) {
|
339
|
-
required_extension = "httpfs";
|
340
|
-
break;
|
341
|
-
}
|
350
|
+
if (FileSystem::IsRemoteFile(pattern)) {
|
351
|
+
required_extension = "httpfs";
|
342
352
|
}
|
343
353
|
if (!required_extension.empty() && !context.db->ExtensionIsLoaded(required_extension)) {
|
344
354
|
// an extension is required to read this file but it is not loaded - try to load it
|
@@ -455,4 +465,14 @@ FileType FileHandle::GetType() {
|
|
455
465
|
return file_system.GetFileType(*this);
|
456
466
|
}
|
457
467
|
|
468
|
+
bool FileSystem::IsRemoteFile(const string &path) {
|
469
|
+
const string prefixes[] = {"http://", "https://", "s3://"};
|
470
|
+
for (auto &prefix : prefixes) {
|
471
|
+
if (StringUtil::StartsWith(path, prefix)) {
|
472
|
+
return true;
|
473
|
+
}
|
474
|
+
}
|
475
|
+
return false;
|
476
|
+
}
|
477
|
+
|
458
478
|
} // namespace duckdb
|
@@ -819,19 +819,6 @@ idx_t LocalFileSystem::SeekPosition(FileHandle &handle) {
|
|
819
819
|
return GetFilePointer(handle);
|
820
820
|
}
|
821
821
|
|
822
|
-
static bool HasGlob(const string &str) {
|
823
|
-
for (idx_t i = 0; i < str.size(); i++) {
|
824
|
-
switch (str[i]) {
|
825
|
-
case '*':
|
826
|
-
case '?':
|
827
|
-
case '[':
|
828
|
-
return true;
|
829
|
-
default:
|
830
|
-
break;
|
831
|
-
}
|
832
|
-
}
|
833
|
-
return false;
|
834
|
-
}
|
835
822
|
static bool IsCrawl(const string &glob) {
|
836
823
|
// glob must match exactly
|
837
824
|
return glob == "**";
|
@@ -1172,6 +1172,16 @@ void BufferedCSVReader::SkipEmptyLines() {
|
|
1172
1172
|
}
|
1173
1173
|
}
|
1174
1174
|
|
1175
|
+
void UpdateMaxLineLength(ClientContext &context, idx_t line_length) {
|
1176
|
+
if (!context.client_data->debug_set_max_line_length) {
|
1177
|
+
return;
|
1178
|
+
}
|
1179
|
+
if (line_length < context.client_data->debug_max_line_length) {
|
1180
|
+
return;
|
1181
|
+
}
|
1182
|
+
context.client_data->debug_max_line_length = line_length;
|
1183
|
+
}
|
1184
|
+
|
1175
1185
|
bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message) {
|
1176
1186
|
// used for parsing algorithm
|
1177
1187
|
bool finished_chunk = false;
|
@@ -1239,9 +1249,7 @@ add_row : {
|
|
1239
1249
|
return false;
|
1240
1250
|
}
|
1241
1251
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1242
|
-
|
1243
|
-
context.client_data->max_line_length = position - line_start;
|
1244
|
-
}
|
1252
|
+
UpdateMaxLineLength(context, position - line_start);
|
1245
1253
|
if (!error_message.empty()) {
|
1246
1254
|
return false;
|
1247
1255
|
}
|
@@ -1379,9 +1387,7 @@ final_state:
|
|
1379
1387
|
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
1380
1388
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1381
1389
|
SkipEmptyLines();
|
1382
|
-
|
1383
|
-
context.client_data->max_line_length = position - line_start;
|
1384
|
-
}
|
1390
|
+
UpdateMaxLineLength(context, position - line_start);
|
1385
1391
|
if (!error_message.empty()) {
|
1386
1392
|
return false;
|
1387
1393
|
}
|
@@ -183,6 +183,12 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
183
183
|
}
|
184
184
|
|
185
185
|
void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
|
186
|
+
if (loption == "new_line") {
|
187
|
+
// Steal this from SetBaseOption so we can write different newlines (e.g., format JSON ARRAY)
|
188
|
+
write_newline = ParseString(value, loption);
|
189
|
+
return;
|
190
|
+
}
|
191
|
+
|
186
192
|
if (SetBaseOption(loption, value)) {
|
187
193
|
return;
|
188
194
|
}
|
@@ -199,6 +205,10 @@ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value
|
|
199
205
|
}
|
200
206
|
SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
|
201
207
|
SetDateFormat(LogicalTypeId::TIMESTAMP_TZ, format, false);
|
208
|
+
} else if (loption == "prefix") {
|
209
|
+
prefix = ParseString(value, loption);
|
210
|
+
} else if (loption == "suffix") {
|
211
|
+
suffix = ParseString(value, loption);
|
202
212
|
} else {
|
203
213
|
throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
|
204
214
|
}
|
@@ -50,15 +50,17 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters ¶me
|
|
50
50
|
string PragmaShowTablesExpanded(ClientContext &context, const FunctionParameters ¶meters) {
|
51
51
|
return R"(
|
52
52
|
SELECT
|
53
|
+
t.database_name AS database,
|
54
|
+
t.schema_name AS schema,
|
53
55
|
t.table_name,
|
54
56
|
LIST(c.column_name order by c.column_index) AS column_names,
|
55
57
|
LIST(c.data_type order by c.column_index) AS column_types,
|
56
|
-
FIRST(t.temporary) AS temporary
|
58
|
+
FIRST(t.temporary) AS temporary,
|
57
59
|
FROM duckdb_tables t
|
58
60
|
JOIN duckdb_columns c
|
59
61
|
USING (table_oid)
|
60
|
-
GROUP BY t.table_name
|
61
|
-
ORDER BY t.table_name;
|
62
|
+
GROUP BY t.database_name, t.schema_name, t.table_name
|
63
|
+
ORDER BY t.database_name, t.schema_name, t.table_name;
|
62
64
|
)";
|
63
65
|
}
|
64
66
|
|
@@ -1,15 +1,16 @@
|
|
1
1
|
#include "duckdb/common/bind_helpers.hpp"
|
2
2
|
#include "duckdb/common/file_system.hpp"
|
3
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
3
4
|
#include "duckdb/common/serializer/buffered_serializer.hpp"
|
4
5
|
#include "duckdb/common/string_util.hpp"
|
6
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
5
7
|
#include "duckdb/common/types/string_type.hpp"
|
6
8
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
7
9
|
#include "duckdb/function/copy_function.hpp"
|
8
10
|
#include "duckdb/function/scalar/string_functions.hpp"
|
9
11
|
#include "duckdb/function/table/read_csv.hpp"
|
10
12
|
#include "duckdb/parser/parsed_data/copy_info.hpp"
|
11
|
-
|
12
|
-
#include "duckdb/common/types/column/column_data_collection.hpp"
|
13
|
+
|
13
14
|
#include <limits>
|
14
15
|
|
15
16
|
namespace duckdb {
|
@@ -57,6 +58,15 @@ void BaseCSVData::Finalize() {
|
|
57
58
|
SubstringDetection(options.escape, options.null_str, "ESCAPE", "NULL");
|
58
59
|
}
|
59
60
|
}
|
61
|
+
|
62
|
+
if (!options.prefix.empty() || !options.suffix.empty()) {
|
63
|
+
if (options.prefix.empty() || options.suffix.empty()) {
|
64
|
+
throw BinderException("COPY ... (FORMAT CSV) must have both PREFIX and SUFFIX, or none at all");
|
65
|
+
}
|
66
|
+
if (options.header) {
|
67
|
+
throw BinderException("COPY ... (FORMAT CSV)'s HEADER cannot be combined with PREFIX/SUFFIX");
|
68
|
+
}
|
69
|
+
}
|
60
70
|
}
|
61
71
|
|
62
72
|
static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &info, vector<string> &names,
|
@@ -85,6 +95,9 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
|
|
85
95
|
bind_data->requires_quotes[bind_data->options.delimiter[0]] = true;
|
86
96
|
bind_data->requires_quotes[bind_data->options.quote[0]] = true;
|
87
97
|
}
|
98
|
+
if (!bind_data->options.write_newline.empty()) {
|
99
|
+
bind_data->newline = bind_data->options.write_newline;
|
100
|
+
}
|
88
101
|
return std::move(bind_data);
|
89
102
|
}
|
90
103
|
|
@@ -251,24 +264,41 @@ struct LocalWriteCSVData : public LocalFunctionData {
|
|
251
264
|
BufferedSerializer serializer;
|
252
265
|
//! A chunk with VARCHAR columns to cast intermediates into
|
253
266
|
DataChunk cast_chunk;
|
267
|
+
//! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
|
268
|
+
bool written_anything = false;
|
254
269
|
};
|
255
270
|
|
256
271
|
struct GlobalWriteCSVData : public GlobalFunctionData {
|
257
|
-
GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression)
|
272
|
+
GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression)
|
273
|
+
: fs(fs), written_anything(false) {
|
258
274
|
handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW,
|
259
275
|
FileLockType::WRITE_LOCK, compression);
|
260
276
|
}
|
261
277
|
|
278
|
+
//! Write generic data, e.g., CSV header
|
262
279
|
void WriteData(const_data_ptr_t data, idx_t size) {
|
263
280
|
lock_guard<mutex> flock(lock);
|
264
281
|
handle->Write((void *)data, size);
|
265
282
|
}
|
266
283
|
|
284
|
+
//! Write rows
|
285
|
+
void WriteRows(const_data_ptr_t data, idx_t size, const string &newline) {
|
286
|
+
lock_guard<mutex> flock(lock);
|
287
|
+
if (written_anything) {
|
288
|
+
handle->Write((void *)newline.c_str(), newline.length());
|
289
|
+
} else {
|
290
|
+
written_anything = true;
|
291
|
+
}
|
292
|
+
handle->Write((void *)data, size);
|
293
|
+
}
|
294
|
+
|
267
295
|
FileSystem &fs;
|
268
296
|
//! The mutex for writing to the physical file
|
269
297
|
mutex lock;
|
270
298
|
//! The file handle to write to
|
271
299
|
unique_ptr<FileHandle> handle;
|
300
|
+
//! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
|
301
|
+
bool written_anything;
|
272
302
|
};
|
273
303
|
|
274
304
|
static unique_ptr<LocalFunctionData> WriteCSVInitializeLocal(ExecutionContext &context, FunctionData &bind_data) {
|
@@ -290,6 +320,10 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
|
|
290
320
|
auto global_data =
|
291
321
|
make_uniq<GlobalWriteCSVData>(FileSystem::GetFileSystem(context), file_path, options.compression);
|
292
322
|
|
323
|
+
if (!options.prefix.empty()) {
|
324
|
+
global_data->WriteData((const_data_ptr_t)options.prefix.c_str(), options.prefix.size());
|
325
|
+
}
|
326
|
+
|
293
327
|
if (options.header) {
|
294
328
|
BufferedSerializer serializer;
|
295
329
|
// write the header line to the file
|
@@ -304,11 +338,12 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
|
|
304
338
|
|
305
339
|
global_data->WriteData(serializer.blob.data.get(), serializer.blob.size);
|
306
340
|
}
|
341
|
+
|
307
342
|
return std::move(global_data);
|
308
343
|
}
|
309
344
|
|
310
345
|
static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_data, DataChunk &cast_chunk,
|
311
|
-
BufferedSerializer &writer, DataChunk &input) {
|
346
|
+
BufferedSerializer &writer, DataChunk &input, bool &written_anything) {
|
312
347
|
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
313
348
|
auto &options = csv_data.options;
|
314
349
|
|
@@ -338,6 +373,11 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
|
|
338
373
|
cast_chunk.Flatten();
|
339
374
|
// now loop over the vectors and output the values
|
340
375
|
for (idx_t row_idx = 0; row_idx < cast_chunk.size(); row_idx++) {
|
376
|
+
if (row_idx == 0 && !written_anything) {
|
377
|
+
written_anything = true;
|
378
|
+
} else {
|
379
|
+
writer.WriteBufferData(csv_data.newline);
|
380
|
+
}
|
341
381
|
// write values
|
342
382
|
for (idx_t col_idx = 0; col_idx < cast_chunk.ColumnCount(); col_idx++) {
|
343
383
|
if (col_idx != 0) {
|
@@ -357,7 +397,6 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
|
|
357
397
|
WriteQuotedString(writer, csv_data, str_data[row_idx].GetData(), str_data[row_idx].GetSize(),
|
358
398
|
csv_data.options.force_quote[col_idx]);
|
359
399
|
}
|
360
|
-
writer.WriteBufferData(csv_data.newline);
|
361
400
|
}
|
362
401
|
}
|
363
402
|
|
@@ -368,13 +407,15 @@ static void WriteCSVSink(ExecutionContext &context, FunctionData &bind_data, Glo
|
|
368
407
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
369
408
|
|
370
409
|
// write data into the local buffer
|
371
|
-
WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input
|
410
|
+
WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input,
|
411
|
+
local_data.written_anything);
|
372
412
|
|
373
413
|
// check if we should flush what we have currently written
|
374
414
|
auto &writer = local_data.serializer;
|
375
415
|
if (writer.blob.size >= csv_data.flush_size) {
|
376
|
-
global_state.
|
416
|
+
global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
|
377
417
|
writer.Reset();
|
418
|
+
local_data.written_anything = false;
|
378
419
|
}
|
379
420
|
}
|
380
421
|
|
@@ -385,10 +426,11 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
|
|
385
426
|
LocalFunctionData &lstate) {
|
386
427
|
auto &local_data = lstate.Cast<LocalWriteCSVData>();
|
387
428
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
429
|
+
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
388
430
|
auto &writer = local_data.serializer;
|
389
431
|
// flush the local writer
|
390
|
-
if (
|
391
|
-
global_state.
|
432
|
+
if (local_data.written_anything) {
|
433
|
+
global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
|
392
434
|
writer.Reset();
|
393
435
|
}
|
394
436
|
}
|
@@ -398,6 +440,16 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
|
|
398
440
|
//===--------------------------------------------------------------------===//
|
399
441
|
void WriteCSVFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) {
|
400
442
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
443
|
+
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
444
|
+
auto &options = csv_data.options;
|
445
|
+
|
446
|
+
BufferedSerializer serializer;
|
447
|
+
if (!options.suffix.empty()) {
|
448
|
+
serializer.WriteBufferData(options.suffix);
|
449
|
+
} else if (global_state.written_anything) {
|
450
|
+
serializer.WriteBufferData(csv_data.newline);
|
451
|
+
}
|
452
|
+
global_state.WriteData(serializer.blob.data.get(), serializer.blob.size);
|
401
453
|
|
402
454
|
global_state.handle->Close();
|
403
455
|
global_state.handle.reset();
|
@@ -434,10 +486,11 @@ unique_ptr<PreparedBatchData> WriteCSVPrepareBatch(ClientContext &context, Funct
|
|
434
486
|
DataChunk cast_chunk;
|
435
487
|
cast_chunk.Initialize(Allocator::Get(context), types);
|
436
488
|
|
437
|
-
auto batch = make_uniq<WriteCSVBatchData>();
|
438
489
|
// write CSV chunks to the batch data
|
490
|
+
bool written_anything = false;
|
491
|
+
auto batch = make_uniq<WriteCSVBatchData>();
|
439
492
|
for (auto &chunk : collection->Chunks()) {
|
440
|
-
WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk);
|
493
|
+
WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk, written_anything);
|
441
494
|
}
|
442
495
|
return std::move(batch);
|
443
496
|
}
|
@@ -449,8 +502,9 @@ void WriteCSVFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalF
|
|
449
502
|
PreparedBatchData &batch) {
|
450
503
|
auto &csv_batch = batch.Cast<WriteCSVBatchData>();
|
451
504
|
auto &global_state = gstate.Cast<GlobalWriteCSVData>();
|
505
|
+
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
452
506
|
auto &writer = csv_batch.serializer;
|
453
|
-
global_state.
|
507
|
+
global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
|
454
508
|
writer.Reset();
|
455
509
|
}
|
456
510
|
|
@@ -33,8 +33,12 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
|
|
33
33
|
bool complex_options = options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1;
|
34
34
|
bool not_supported_options = options.null_padding;
|
35
35
|
|
36
|
-
|
37
|
-
|
36
|
+
auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
37
|
+
if (options.parallel_mode != ParallelMode::PARALLEL && int64_t(files.size() * 2) >= number_of_threads) {
|
38
|
+
single_threaded = true;
|
39
|
+
}
|
40
|
+
if (options.parallel_mode == ParallelMode::SINGLE_THREADED || null_or_empty || not_supported_options ||
|
41
|
+
complex_options || options.new_line == NewLineIdentifier::MIX) {
|
38
42
|
// not supported for parallel CSV reading
|
39
43
|
single_threaded = true;
|
40
44
|
}
|
@@ -173,7 +177,8 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
173
177
|
} else if (loption == "normalize_names") {
|
174
178
|
options.normalize_names = BooleanValue::Get(kv.second);
|
175
179
|
} else if (loption == "parallel") {
|
176
|
-
options.
|
180
|
+
options.parallel_mode =
|
181
|
+
BooleanValue::Get(kv.second) ? ParallelMode::PARALLEL : ParallelMode::SINGLE_THREADED;
|
177
182
|
} else {
|
178
183
|
options.SetReadOption(loption, kv.second, names);
|
179
184
|
}
|
@@ -1190,6 +1195,11 @@ unique_ptr<TableRef> ReadCSVReplacement(ClientContext &context, const string &ta
|
|
1190
1195
|
vector<unique_ptr<ParsedExpression>> children;
|
1191
1196
|
children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
|
1192
1197
|
table_function->function = make_uniq<FunctionExpression>("read_csv_auto", std::move(children));
|
1198
|
+
|
1199
|
+
if (!FileSystem::HasGlob(table_name)) {
|
1200
|
+
table_function->alias = FileSystem::ExtractBaseName(table_name);
|
1201
|
+
}
|
1202
|
+
|
1193
1203
|
return std::move(table_function);
|
1194
1204
|
}
|
1195
1205
|
|
@@ -1,14 +1,16 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev3666"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "eae707d54c"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
9
9
|
|
10
10
|
#include <cstdint>
|
11
11
|
|
12
|
+
#define DUCKDB_STRINGIFY(x) #x
|
13
|
+
|
12
14
|
namespace duckdb {
|
13
15
|
|
14
16
|
struct PragmaVersionData : public GlobalTableFunctionState {
|
@@ -89,6 +91,10 @@ string DuckDB::Platform() {
|
|
89
91
|
#endif
|
90
92
|
#ifdef __MINGW32__
|
91
93
|
postfix = "_mingw";
|
94
|
+
#endif
|
95
|
+
// this is used for the windows R builds which use a separate build environment
|
96
|
+
#ifdef DUCKDB_OVERRIDE_PLATFORM_POSTFIX
|
97
|
+
postfix = DUCKDB_STRINGIFY(DUCKDB_OVERRIDE_PLATFORM_POSTFIX);
|
92
98
|
#endif
|
93
99
|
return os + "_" + arch + postfix;
|
94
100
|
}
|
@@ -23,7 +23,7 @@ class ColumnDependencyManager {
|
|
23
23
|
public:
|
24
24
|
DUCKDB_API ColumnDependencyManager();
|
25
25
|
DUCKDB_API ~ColumnDependencyManager();
|
26
|
-
|
26
|
+
ColumnDependencyManager(ColumnDependencyManager &&other) = default;
|
27
27
|
ColumnDependencyManager(const ColumnDependencyManager &other) = delete;
|
28
28
|
|
29
29
|
public:
|
@@ -46,7 +46,7 @@ public:
|
|
46
46
|
DUCKDB_API void Reset();
|
47
47
|
|
48
48
|
DUCKDB_API const vector<CatalogSearchEntry> &Get();
|
49
|
-
|
49
|
+
const vector<CatalogSearchEntry> &GetSetPaths() {
|
50
50
|
return set_paths;
|
51
51
|
}
|
52
52
|
DUCKDB_API const CatalogSearchEntry &GetDefault();
|
@@ -121,7 +121,7 @@ public:
|
|
121
121
|
DUCKDB_API static bool UncaughtException();
|
122
122
|
|
123
123
|
DUCKDB_API static string GetStackTrace(int max_depth = 120);
|
124
|
-
|
124
|
+
static string FormatStackTrace(string message = "") {
|
125
125
|
return (message + "\n" + GetStackTrace());
|
126
126
|
}
|
127
127
|
|
@@ -269,7 +269,7 @@ public:
|
|
269
269
|
class IOException : public Exception {
|
270
270
|
public:
|
271
271
|
DUCKDB_API explicit IOException(const string &msg);
|
272
|
-
|
272
|
+
explicit IOException(ExceptionType exception_type, const string &msg) : Exception(exception_type, msg) {
|
273
273
|
}
|
274
274
|
|
275
275
|
template <typename... Args>
|
@@ -371,7 +371,7 @@ public:
|
|
371
371
|
|
372
372
|
class FatalException : public Exception {
|
373
373
|
public:
|
374
|
-
|
374
|
+
explicit FatalException(const string &msg) : FatalException(ExceptionType::FATAL, msg) {
|
375
375
|
}
|
376
376
|
template <typename... Args>
|
377
377
|
explicit FatalException(const string &msg, Args... params) : FatalException(ConstructMessage(msg, params...)) {
|
@@ -187,6 +187,8 @@ public:
|
|
187
187
|
//! Extract the name of a file (e.g if the input is lib/example.dll the name is 'example.dll')
|
188
188
|
DUCKDB_API static string ExtractName(const string &path);
|
189
189
|
|
190
|
+
//! Whether there is a glob in the string
|
191
|
+
DUCKDB_API static bool HasGlob(const string &str);
|
190
192
|
//! Runs a glob on the file system, returning a list of matching files
|
191
193
|
DUCKDB_API virtual vector<string> Glob(const string &path, FileOpener *opener = nullptr);
|
192
194
|
DUCKDB_API vector<string> GlobFiles(const string &path, ClientContext &context,
|
@@ -224,6 +226,9 @@ public:
|
|
224
226
|
|
225
227
|
//! Return the name of the filesytem. Used for forming diagnosis messages.
|
226
228
|
DUCKDB_API virtual std::string GetName() const = 0;
|
229
|
+
|
230
|
+
//! Whether or not a file is remote or local, based only on file path
|
231
|
+
DUCKDB_API static bool IsRemoteFile(const string &path);
|
227
232
|
};
|
228
233
|
|
229
234
|
} // namespace duckdb
|
@@ -12,6 +12,7 @@
|
|
12
12
|
#include "duckdb/main/client_context.hpp"
|
13
13
|
#include "duckdb/main/client_data.hpp"
|
14
14
|
#include "duckdb/common/atomic.hpp"
|
15
|
+
#include "duckdb/common/optional_ptr.hpp"
|
15
16
|
|
16
17
|
namespace duckdb {
|
17
18
|
|
@@ -48,7 +49,7 @@ public:
|
|
48
49
|
}
|
49
50
|
|
50
51
|
//! helper function to get the HTTP
|
51
|
-
static HTTPState
|
52
|
+
static optional_ptr<HTTPState> TryGetState(FileOpener *opener) {
|
52
53
|
auto client_context = FileOpener::TryGetClientContext(opener);
|
53
54
|
if (client_context) {
|
54
55
|
return client_context->client_data->http_state.get();
|
@@ -12,14 +12,14 @@ public:
|
|
12
12
|
int64_t upper;
|
13
13
|
|
14
14
|
public:
|
15
|
-
|
15
|
+
hugeint_t() = default;
|
16
16
|
DUCKDB_API hugeint_t(int64_t value); // NOLINT: Allow implicit conversion from `int64_t`
|
17
|
-
|
17
|
+
constexpr hugeint_t(int64_t upper, uint64_t lower) : lower(lower), upper(upper) {
|
18
18
|
}
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
constexpr hugeint_t(const hugeint_t &rhs) = default;
|
20
|
+
constexpr hugeint_t(hugeint_t &&rhs) = default;
|
21
|
+
hugeint_t &operator=(const hugeint_t &rhs) = default;
|
22
|
+
hugeint_t &operator=(hugeint_t &&rhs) = default;
|
23
23
|
|
24
24
|
DUCKDB_API string ToString() const;
|
25
25
|
|