duckdb 0.7.1-dev16.0 → 0.7.1-dev180.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +29 -5
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -1
- package/src/duckdb/extension/json/include/json_scan.hpp +17 -2
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +19 -0
- package/src/duckdb/extension/json/json_functions/read_json.cpp +30 -28
- package/src/duckdb/extension/json/json_functions.cpp +6 -0
- package/src/duckdb/extension/json/json_scan.cpp +111 -23
- package/src/duckdb/extension/parquet/parquet-extension.cpp +3 -2
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
- package/src/duckdb/src/common/file_system.cpp +14 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +14 -8
- package/src/duckdb/src/common/printer.cpp +1 -1
- package/src/duckdb/src/common/types/time.cpp +1 -1
- package/src/duckdb/src/common/types/timestamp.cpp +35 -4
- package/src/duckdb/src/common/types.cpp +36 -10
- package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +6 -11
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +13 -13
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +37 -0
- package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -5
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +4 -0
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -0
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +36 -9
- package/src/duckdb/src/function/table/read_csv.cpp +15 -4
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +32 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +32 -0
- package/src/duckdb/src/include/duckdb/parser/statement/detach_statement.hpp +29 -0
- package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +7 -0
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -0
- package/src/duckdb/src/main/client_context.cpp +2 -0
- package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
- package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
- package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
- package/src/duckdb/src/parser/statement/detach_statement.cpp +15 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
- package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
- package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_detach.cpp +19 -0
- package/src/duckdb/src/parser/transformer.cpp +2 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +6 -3
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +16 -14
- package/src/duckdb/src/planner/binder/statement/bind_detach.cpp +19 -0
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +29 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +2 -1
- package/src/duckdb/src/planner/binder.cpp +2 -0
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +21 -5
- package/src/duckdb/src/planner/logical_operator.cpp +4 -0
- package/src/duckdb/src/planner/planner.cpp +1 -0
- package/src/duckdb/src/storage/storage_info.cpp +2 -1
- package/src/duckdb/src/storage/table/column_data.cpp +4 -2
- package/src/duckdb/src/storage/table/update_segment.cpp +15 -0
- package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +14 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +530 -1006
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17659 -17626
- package/src/duckdb/ub_src_execution_operator_schema.cpp +2 -0
- package/src/duckdb/ub_src_parser_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -0
- package/src/duckdb/src/include/duckdb/function/create_database_extension.hpp +0 -37
package/package.json
CHANGED
|
@@ -25,7 +25,12 @@ JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, Alloca
|
|
|
25
25
|
JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p, Allocator &allocator_p)
|
|
26
26
|
: file_handle(std::move(file_handle_p)), allocator(allocator_p), can_seek(file_handle->CanSeek()),
|
|
27
27
|
plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()), read_position(0),
|
|
28
|
-
cached_size(0) {
|
|
28
|
+
requested_reads(0), actual_reads(0), cached_size(0) {
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
void JSONFileHandle::Close() {
|
|
32
|
+
file_handle->Close();
|
|
33
|
+
cached_buffers.clear();
|
|
29
34
|
}
|
|
30
35
|
|
|
31
36
|
idx_t JSONFileHandle::FileSize() const {
|
|
@@ -36,10 +41,6 @@ idx_t JSONFileHandle::Remaining() const {
|
|
|
36
41
|
return file_size - read_position;
|
|
37
42
|
}
|
|
38
43
|
|
|
39
|
-
bool JSONFileHandle::PlainFileSource() const {
|
|
40
|
-
return plain_file_source;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
44
|
bool JSONFileHandle::CanSeek() const {
|
|
44
45
|
return can_seek;
|
|
45
46
|
}
|
|
@@ -53,6 +54,9 @@ idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size)
|
|
|
53
54
|
position = read_position;
|
|
54
55
|
auto actual_size = MinValue<idx_t>(requested_size, Remaining());
|
|
55
56
|
read_position += actual_size;
|
|
57
|
+
if (actual_size != 0) {
|
|
58
|
+
requested_reads++;
|
|
59
|
+
}
|
|
56
60
|
return actual_size;
|
|
57
61
|
}
|
|
58
62
|
|
|
@@ -60,11 +64,13 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
|
|
|
60
64
|
D_ASSERT(size != 0);
|
|
61
65
|
if (plain_file_source) {
|
|
62
66
|
file_handle->Read((void *)pointer, size, position);
|
|
67
|
+
actual_reads++;
|
|
63
68
|
return;
|
|
64
69
|
}
|
|
65
70
|
|
|
66
71
|
if (sample_run) { // Cache the buffer
|
|
67
72
|
file_handle->Read((void *)pointer, size, position);
|
|
73
|
+
actual_reads++;
|
|
68
74
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
|
69
75
|
memcpy(cached_buffers.back().get(), pointer, size);
|
|
70
76
|
cached_size += size;
|
|
@@ -73,9 +79,11 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
|
|
|
73
79
|
|
|
74
80
|
if (!cached_buffers.empty() || position < cached_size) {
|
|
75
81
|
ReadFromCache(pointer, size, position);
|
|
82
|
+
actual_reads++;
|
|
76
83
|
}
|
|
77
84
|
if (size != 0) {
|
|
78
85
|
file_handle->Read((void *)pointer, size, position);
|
|
86
|
+
actual_reads++;
|
|
79
87
|
}
|
|
80
88
|
}
|
|
81
89
|
|
|
@@ -143,6 +151,16 @@ void BufferedJSONReader::OpenJSONFile() {
|
|
|
143
151
|
file_handle = make_unique<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
|
|
144
152
|
}
|
|
145
153
|
|
|
154
|
+
void BufferedJSONReader::CloseJSONFile() {
|
|
155
|
+
while (true) {
|
|
156
|
+
lock_guard<mutex> guard(lock);
|
|
157
|
+
if (file_handle->RequestedReadsComplete()) {
|
|
158
|
+
file_handle->Close();
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
146
164
|
bool BufferedJSONReader::IsOpen() {
|
|
147
165
|
return file_handle != nullptr;
|
|
148
166
|
}
|
|
@@ -246,9 +264,15 @@ void BufferedJSONReader::Reset() {
|
|
|
246
264
|
|
|
247
265
|
void JSONFileHandle::Reset() {
|
|
248
266
|
read_position = 0;
|
|
267
|
+
requested_reads = 0;
|
|
268
|
+
actual_reads = 0;
|
|
249
269
|
if (plain_file_source) {
|
|
250
270
|
file_handle->Reset();
|
|
251
271
|
}
|
|
252
272
|
}
|
|
253
273
|
|
|
274
|
+
bool JSONFileHandle::RequestedReadsComplete() {
|
|
275
|
+
return requested_reads == actual_reads;
|
|
276
|
+
}
|
|
277
|
+
|
|
254
278
|
} // namespace duckdb
|
|
@@ -58,11 +58,11 @@ public:
|
|
|
58
58
|
struct JSONFileHandle {
|
|
59
59
|
public:
|
|
60
60
|
JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
|
|
61
|
+
void Close();
|
|
61
62
|
|
|
62
63
|
idx_t FileSize() const;
|
|
63
64
|
idx_t Remaining() const;
|
|
64
65
|
|
|
65
|
-
bool PlainFileSource() const;
|
|
66
66
|
bool CanSeek() const;
|
|
67
67
|
void Seek(idx_t position);
|
|
68
68
|
|
|
@@ -71,6 +71,7 @@ public:
|
|
|
71
71
|
idx_t Read(const char *pointer, idx_t requested_size, bool sample_run);
|
|
72
72
|
|
|
73
73
|
void Reset();
|
|
74
|
+
bool RequestedReadsComplete();
|
|
74
75
|
|
|
75
76
|
private:
|
|
76
77
|
idx_t ReadFromCache(const char *&pointer, idx_t &size, idx_t &position);
|
|
@@ -87,6 +88,8 @@ private:
|
|
|
87
88
|
|
|
88
89
|
//! Read properties
|
|
89
90
|
idx_t read_position;
|
|
91
|
+
idx_t requested_reads;
|
|
92
|
+
atomic<idx_t> actual_reads;
|
|
90
93
|
|
|
91
94
|
//! Cached buffers for resetting when reading stream
|
|
92
95
|
vector<AllocatedData> cached_buffers;
|
|
@@ -98,6 +101,7 @@ public:
|
|
|
98
101
|
BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_path);
|
|
99
102
|
|
|
100
103
|
void OpenJSONFile();
|
|
104
|
+
void CloseJSONFile();
|
|
101
105
|
bool IsOpen();
|
|
102
106
|
|
|
103
107
|
BufferedJSONReaderOptions &GetOptions();
|
|
@@ -26,6 +26,16 @@ enum class JSONScanType : uint8_t {
|
|
|
26
26
|
SAMPLE = 3,
|
|
27
27
|
};
|
|
28
28
|
|
|
29
|
+
enum class JSONScanTopLevelType : uint8_t {
|
|
30
|
+
INVALID = 0,
|
|
31
|
+
//! Sequential objects, e.g., NDJSON
|
|
32
|
+
OBJECTS = 1,
|
|
33
|
+
//! Top-level array containing objects
|
|
34
|
+
ARRAY_OF_OBJECTS = 2,
|
|
35
|
+
//! Other, e.g., array of integer, or just strings
|
|
36
|
+
OTHER = 3
|
|
37
|
+
};
|
|
38
|
+
|
|
29
39
|
//! Even though LogicalTypeId is just a uint8_t, this is still needed ...
|
|
30
40
|
struct LogicalTypeIdHash {
|
|
31
41
|
inline std::size_t operator()(const LogicalTypeId &id) const {
|
|
@@ -105,7 +115,7 @@ public:
|
|
|
105
115
|
//! Max depth we go to detect nested JSON schema (defaults to unlimited)
|
|
106
116
|
idx_t max_depth = NumericLimits<idx_t>::Maximum();
|
|
107
117
|
//! Whether we're parsing objects (usually), or something else like arrays
|
|
108
|
-
|
|
118
|
+
JSONScanTopLevelType top_level_type = JSONScanTopLevelType::OBJECTS;
|
|
109
119
|
//! Forced date/timestamp formats
|
|
110
120
|
string date_format;
|
|
111
121
|
string timestamp_format;
|
|
@@ -181,9 +191,14 @@ public:
|
|
|
181
191
|
yyjson_alc *GetAllocator();
|
|
182
192
|
void ThrowTransformError(idx_t count, idx_t object_index, const string &error_message);
|
|
183
193
|
|
|
194
|
+
idx_t scan_count;
|
|
184
195
|
JSONLine lines[STANDARD_VECTOR_SIZE];
|
|
185
196
|
yyjson_val *objects[STANDARD_VECTOR_SIZE];
|
|
186
197
|
|
|
198
|
+
idx_t array_idx;
|
|
199
|
+
idx_t array_offset;
|
|
200
|
+
yyjson_val *array_objects[STANDARD_VECTOR_SIZE];
|
|
201
|
+
|
|
187
202
|
idx_t batch_index;
|
|
188
203
|
|
|
189
204
|
//! Options when transforming the JSON to columnar data
|
|
@@ -192,6 +207,7 @@ public:
|
|
|
192
207
|
|
|
193
208
|
private:
|
|
194
209
|
yyjson_val *ParseLine(char *line_start, idx_t line_size, idx_t remaining, JSONLine &line);
|
|
210
|
+
idx_t GetObjectsFromArray();
|
|
195
211
|
|
|
196
212
|
private:
|
|
197
213
|
//! Bind data
|
|
@@ -300,7 +316,6 @@ public:
|
|
|
300
316
|
table_function.serialize = JSONScanSerialize;
|
|
301
317
|
table_function.deserialize = JSONScanDeserialize;
|
|
302
318
|
|
|
303
|
-
// TODO: might be able to do some of these
|
|
304
319
|
table_function.projection_pushdown = false;
|
|
305
320
|
table_function.filter_pushdown = false;
|
|
306
321
|
table_function.filter_prune = false;
|
|
@@ -523,6 +523,21 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
|
523
523
|
return success;
|
|
524
524
|
}
|
|
525
525
|
|
|
526
|
+
bool TransformToJSON(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
|
|
527
|
+
auto data = (string_t *)FlatVector::GetData(result);
|
|
528
|
+
auto &validity = FlatVector::Validity(result);
|
|
529
|
+
for (idx_t i = 0; i < count; i++) {
|
|
530
|
+
const auto &val = vals[i];
|
|
531
|
+
if (!val) {
|
|
532
|
+
validity.SetInvalid(i);
|
|
533
|
+
} else {
|
|
534
|
+
data[i] = JSONCommon::WriteVal(val, alc);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
// Can always transform to JSON
|
|
538
|
+
return true;
|
|
539
|
+
}
|
|
540
|
+
|
|
526
541
|
bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
|
|
527
542
|
JSONTransformOptions &options) {
|
|
528
543
|
auto result_type = result.GetType();
|
|
@@ -531,6 +546,10 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
|
|
|
531
546
|
return TransformFromStringWithFormat(vals, result, count, options);
|
|
532
547
|
}
|
|
533
548
|
|
|
549
|
+
if (JSONCommon::LogicalTypeIsJSON(result_type)) {
|
|
550
|
+
return TransformToJSON(vals, alc, result, count);
|
|
551
|
+
}
|
|
552
|
+
|
|
534
553
|
switch (result_type.id()) {
|
|
535
554
|
case LogicalTypeId::SQLNULL:
|
|
536
555
|
return true;
|
|
@@ -13,32 +13,17 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
|
|
|
13
13
|
JSONScanLocalState lstate(context, gstate);
|
|
14
14
|
ArenaAllocator allocator(BufferAllocator::Get(context));
|
|
15
15
|
|
|
16
|
-
static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
|
|
17
|
-
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
|
18
|
-
{LogicalTypeId::TIMESTAMP,
|
|
19
|
-
{"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
|
|
20
|
-
"%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
// Populate possible date/timestamp formats, assume this is consistent across columns
|
|
24
|
-
for (auto &kv : FORMAT_TEMPLATES) {
|
|
25
|
-
const auto &type = kv.first;
|
|
26
|
-
if (bind_data.date_format_map.HasFormats(type)) {
|
|
27
|
-
continue; // Already populated
|
|
28
|
-
}
|
|
29
|
-
const auto &format_strings = kv.second;
|
|
30
|
-
for (auto &format_string : format_strings) {
|
|
31
|
-
bind_data.date_format_map.AddFormat(type, format_string);
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
|
|
35
16
|
// Read for the specified sample size
|
|
36
17
|
JSONStructureNode node;
|
|
18
|
+
bool more_than_one = false;
|
|
37
19
|
Vector string_vector(LogicalType::VARCHAR);
|
|
38
20
|
idx_t remaining = bind_data.sample_size;
|
|
39
21
|
while (remaining != 0) {
|
|
40
22
|
allocator.Reset();
|
|
41
23
|
auto read_count = lstate.ReadNext(gstate);
|
|
24
|
+
if (read_count > 1) {
|
|
25
|
+
more_than_one = true;
|
|
26
|
+
}
|
|
42
27
|
if (read_count == 0) {
|
|
43
28
|
break;
|
|
44
29
|
}
|
|
@@ -54,15 +39,29 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
|
|
|
54
39
|
node.InitializeCandidateTypes(bind_data.max_depth);
|
|
55
40
|
node.RefineCandidateTypes(lstate.objects, next, string_vector, allocator, bind_data.date_format_map);
|
|
56
41
|
remaining -= next;
|
|
42
|
+
|
|
43
|
+
if (gstate.file_index == 10) {
|
|
44
|
+
// We really shouldn't open more than 10 files when sampling
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
57
47
|
}
|
|
58
48
|
bind_data.type = original_scan_type;
|
|
59
49
|
bind_data.transform_options.date_format_map = &bind_data.date_format_map;
|
|
60
50
|
|
|
61
|
-
|
|
51
|
+
auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
|
|
52
|
+
if (type.id() == LogicalTypeId::STRUCT) {
|
|
53
|
+
bind_data.top_level_type = JSONScanTopLevelType::OBJECTS;
|
|
54
|
+
} else if (!more_than_one && type.id() == LogicalTypeId::LIST &&
|
|
55
|
+
ListType::GetChildType(type).id() == LogicalTypeId::STRUCT) {
|
|
56
|
+
bind_data.top_level_type = JSONScanTopLevelType::ARRAY_OF_OBJECTS;
|
|
57
|
+
bind_data.options.format = JSONFormat::UNSTRUCTURED;
|
|
58
|
+
type = ListType::GetChildType(type);
|
|
59
|
+
}
|
|
60
|
+
|
|
62
61
|
if (type.id() != LogicalTypeId::STRUCT) {
|
|
63
62
|
return_types.emplace_back(type);
|
|
64
63
|
names.emplace_back("json");
|
|
65
|
-
bind_data.
|
|
64
|
+
bind_data.top_level_type = JSONScanTopLevelType::OTHER;
|
|
66
65
|
} else {
|
|
67
66
|
const auto &child_types = StructType::GetChildTypes(type);
|
|
68
67
|
return_types.reserve(child_types.size());
|
|
@@ -189,9 +188,11 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
189
188
|
auto &gstate = ((JSONGlobalTableFunctionState &)*data_p.global_state).state;
|
|
190
189
|
auto &lstate = ((JSONLocalTableFunctionState &)*data_p.local_state).state;
|
|
191
190
|
|
|
192
|
-
// Fetch next lines
|
|
193
191
|
const auto count = lstate.ReadNext(gstate);
|
|
194
|
-
const auto objects =
|
|
192
|
+
const auto objects = gstate.bind_data.top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS
|
|
193
|
+
? lstate.array_objects
|
|
194
|
+
: lstate.objects;
|
|
195
|
+
output.SetCardinality(count);
|
|
195
196
|
|
|
196
197
|
vector<Vector *> result_vectors;
|
|
197
198
|
result_vectors.reserve(output.ColumnCount());
|
|
@@ -202,13 +203,14 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
202
203
|
|
|
203
204
|
// Pass current reader to transform options so we can get line number information if an error occurs
|
|
204
205
|
bool success;
|
|
205
|
-
if (gstate.bind_data.
|
|
206
|
-
success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
|
|
207
|
-
result_vectors, lstate.transform_options);
|
|
208
|
-
} else {
|
|
206
|
+
if (gstate.bind_data.top_level_type == JSONScanTopLevelType::OTHER) {
|
|
209
207
|
success = JSONTransform::Transform(objects, lstate.GetAllocator(), *result_vectors[0], count,
|
|
210
208
|
lstate.transform_options);
|
|
209
|
+
} else {
|
|
210
|
+
success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
|
|
211
|
+
result_vectors, lstate.transform_options);
|
|
211
212
|
}
|
|
213
|
+
|
|
212
214
|
if (!success) {
|
|
213
215
|
string hint = gstate.bind_data.auto_detect
|
|
214
216
|
? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns' manually, "
|
|
@@ -217,7 +219,6 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
217
219
|
lstate.ThrowTransformError(count, lstate.transform_options.object_index,
|
|
218
220
|
lstate.transform_options.error_message + hint);
|
|
219
221
|
}
|
|
220
|
-
output.SetCardinality(count);
|
|
221
222
|
}
|
|
222
223
|
|
|
223
224
|
TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, shared_ptr<JSONScanInfo> function_info) {
|
|
@@ -235,6 +236,7 @@ TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, share
|
|
|
235
236
|
table_function.named_parameters["timestamp_format"] = LogicalType::VARCHAR;
|
|
236
237
|
|
|
237
238
|
table_function.projection_pushdown = true;
|
|
239
|
+
// TODO: might be able to do filter pushdown/prune too
|
|
238
240
|
|
|
239
241
|
table_function.function_info = std::move(function_info);
|
|
240
242
|
|
|
@@ -166,6 +166,12 @@ vector<CreateTableFunctionInfo> JSONFunctions::GetTableFunctions() {
|
|
|
166
166
|
unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context, const string &table_name,
|
|
167
167
|
ReplacementScanData *data) {
|
|
168
168
|
auto lower_name = StringUtil::Lower(table_name);
|
|
169
|
+
// remove any compression
|
|
170
|
+
if (StringUtil::EndsWith(lower_name, ".gz")) {
|
|
171
|
+
lower_name = lower_name.substr(0, lower_name.size() - 3);
|
|
172
|
+
} else if (StringUtil::EndsWith(lower_name, ".zst")) {
|
|
173
|
+
lower_name = lower_name.substr(0, lower_name.size() - 4);
|
|
174
|
+
}
|
|
169
175
|
if (!StringUtil::EndsWith(lower_name, ".json") && !StringUtil::Contains(lower_name, ".json?") &&
|
|
170
176
|
!StringUtil::EndsWith(lower_name, ".ndjson") && !StringUtil::Contains(lower_name, ".ndjson?")) {
|
|
171
177
|
return nullptr;
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#include "duckdb/main/database.hpp"
|
|
4
4
|
#include "duckdb/parallel/task_scheduler.hpp"
|
|
5
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
|
6
|
+
#include "duckdb/main/extension_helper.hpp"
|
|
6
7
|
|
|
7
8
|
namespace duckdb {
|
|
8
9
|
|
|
@@ -47,8 +48,11 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
47
48
|
options.format = JSONFormat::UNSTRUCTURED;
|
|
48
49
|
} else if (format == "newline_delimited") {
|
|
49
50
|
options.format = JSONFormat::NEWLINE_DELIMITED;
|
|
51
|
+
} else if (format == "array_of_objects") {
|
|
52
|
+
result->top_level_type = JSONScanTopLevelType::ARRAY_OF_OBJECTS;
|
|
50
53
|
} else {
|
|
51
|
-
throw BinderException(
|
|
54
|
+
throw BinderException(
|
|
55
|
+
"format must be one of ['auto', 'unstructured', 'newline_delimited', 'array_of_objects']");
|
|
52
56
|
}
|
|
53
57
|
} else if (loption == "compression") {
|
|
54
58
|
auto compression = StringUtil::Lower(StringValue::Get(kv.second));
|
|
@@ -66,6 +70,10 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
66
70
|
}
|
|
67
71
|
}
|
|
68
72
|
|
|
73
|
+
if (result->top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS) {
|
|
74
|
+
result->options.format = JSONFormat::UNSTRUCTURED;
|
|
75
|
+
}
|
|
76
|
+
|
|
69
77
|
return std::move(result);
|
|
70
78
|
}
|
|
71
79
|
|
|
@@ -75,7 +83,7 @@ void JSONScanData::InitializeFilePaths(ClientContext &context, const vector<stri
|
|
|
75
83
|
for (auto &file_pattern : patterns) {
|
|
76
84
|
auto found_files = fs.Glob(file_pattern, context);
|
|
77
85
|
if (found_files.empty()) {
|
|
78
|
-
throw
|
|
86
|
+
throw FileSystem::MissingFileException(file_pattern, context);
|
|
79
87
|
}
|
|
80
88
|
file_paths.insert(file_paths.end(), found_files.begin(), found_files.end());
|
|
81
89
|
}
|
|
@@ -97,6 +105,27 @@ void JSONScanData::InitializeFormats() {
|
|
|
97
105
|
if (!timestamp_format.empty()) {
|
|
98
106
|
date_format_map.AddFormat(LogicalTypeId::TIMESTAMP, timestamp_format);
|
|
99
107
|
}
|
|
108
|
+
|
|
109
|
+
if (auto_detect) {
|
|
110
|
+
static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
|
|
111
|
+
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
|
112
|
+
{LogicalTypeId::TIMESTAMP,
|
|
113
|
+
{"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
|
|
114
|
+
"%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Populate possible date/timestamp formats, assume this is consistent across columns
|
|
118
|
+
for (auto &kv : FORMAT_TEMPLATES) {
|
|
119
|
+
const auto &type = kv.first;
|
|
120
|
+
if (date_format_map.HasFormats(type)) {
|
|
121
|
+
continue; // Already populated
|
|
122
|
+
}
|
|
123
|
+
const auto &format_strings = kv.second;
|
|
124
|
+
for (auto &format_string : format_strings) {
|
|
125
|
+
date_format_map.AddFormat(type, format_string);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
100
129
|
}
|
|
101
130
|
|
|
102
131
|
void JSONScanData::Serialize(FieldWriter &writer) {
|
|
@@ -111,9 +140,17 @@ void JSONScanData::Serialize(FieldWriter &writer) {
|
|
|
111
140
|
writer.WriteList<string>(names);
|
|
112
141
|
writer.WriteList<idx_t>(valid_cols);
|
|
113
142
|
writer.WriteField<idx_t>(max_depth);
|
|
114
|
-
writer.WriteField<
|
|
115
|
-
|
|
116
|
-
|
|
143
|
+
writer.WriteField<JSONScanTopLevelType>(top_level_type);
|
|
144
|
+
if (!date_format.empty()) {
|
|
145
|
+
writer.WriteString(date_format);
|
|
146
|
+
} else {
|
|
147
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::DATE).format_specifier);
|
|
148
|
+
}
|
|
149
|
+
if (!timestamp_format.empty()) {
|
|
150
|
+
writer.WriteString(timestamp_format);
|
|
151
|
+
} else {
|
|
152
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::TIMESTAMP).format_specifier);
|
|
153
|
+
}
|
|
117
154
|
}
|
|
118
155
|
|
|
119
156
|
void JSONScanData::Deserialize(FieldReader &reader) {
|
|
@@ -128,9 +165,12 @@ void JSONScanData::Deserialize(FieldReader &reader) {
|
|
|
128
165
|
names = reader.ReadRequiredList<string>();
|
|
129
166
|
valid_cols = reader.ReadRequiredList<idx_t>();
|
|
130
167
|
max_depth = reader.ReadRequired<idx_t>();
|
|
131
|
-
|
|
168
|
+
top_level_type = reader.ReadRequired<JSONScanTopLevelType>();
|
|
132
169
|
date_format = reader.ReadRequired<string>();
|
|
133
170
|
timestamp_format = reader.ReadRequired<string>();
|
|
171
|
+
|
|
172
|
+
InitializeFormats();
|
|
173
|
+
transform_options.date_format_map = &date_format_map;
|
|
134
174
|
}
|
|
135
175
|
|
|
136
176
|
JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &bind_data_p)
|
|
@@ -149,9 +189,9 @@ JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &b
|
|
|
149
189
|
}
|
|
150
190
|
|
|
151
191
|
JSONScanLocalState::JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate)
|
|
152
|
-
: batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
192
|
+
: scan_count(0), array_idx(0), array_offset(0), batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
153
193
|
json_allocator(BufferAllocator::Get(context)), current_reader(nullptr), current_buffer_handle(nullptr),
|
|
154
|
-
buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
194
|
+
is_last(false), buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
155
195
|
|
|
156
196
|
// Buffer to reconstruct JSON objects when they cross a buffer boundary
|
|
157
197
|
reconstruct_buffer = gstate.allocator.Allocate(gstate.bind_data.maximum_object_size + YYJSON_PADDING_SIZE);
|
|
@@ -173,11 +213,6 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
173
213
|
// Perform projection pushdown
|
|
174
214
|
if (bind_data.type == JSONScanType::READ_JSON) {
|
|
175
215
|
D_ASSERT(input.column_ids.size() <= bind_data.names.size()); // Can't project to have more columns
|
|
176
|
-
if (bind_data.auto_detect && input.column_ids.size() < bind_data.names.size()) {
|
|
177
|
-
// If we are auto-detecting, but don't need all columns present in the file,
|
|
178
|
-
// then we don't need to throw an error if we encounter an unseen column
|
|
179
|
-
bind_data.transform_options.error_unknown_key = false;
|
|
180
|
-
}
|
|
181
216
|
vector<string> names;
|
|
182
217
|
names.reserve(input.column_ids.size());
|
|
183
218
|
for (idx_t i = 0; i < input.column_ids.size(); i++) {
|
|
@@ -188,6 +223,11 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
188
223
|
names.push_back(std::move(bind_data.names[id]));
|
|
189
224
|
bind_data.valid_cols.push_back(i);
|
|
190
225
|
}
|
|
226
|
+
if (names.size() < bind_data.names.size()) {
|
|
227
|
+
// If we are auto-detecting, but don't need all columns present in the file,
|
|
228
|
+
// then we don't need to throw an error if we encounter an unseen column
|
|
229
|
+
bind_data.transform_options.error_unknown_key = false;
|
|
230
|
+
}
|
|
191
231
|
bind_data.names = std::move(names);
|
|
192
232
|
}
|
|
193
233
|
return result;
|
|
@@ -230,6 +270,10 @@ static inline void SkipWhitespace(const char *buffer_ptr, idx_t &buffer_offset,
|
|
|
230
270
|
idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
231
271
|
json_allocator.Reset();
|
|
232
272
|
|
|
273
|
+
if (gstate.bind_data.top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS && array_idx < scan_count) {
|
|
274
|
+
return GetObjectsFromArray();
|
|
275
|
+
}
|
|
276
|
+
|
|
233
277
|
idx_t count = 0;
|
|
234
278
|
if (buffer_offset == buffer_size) {
|
|
235
279
|
if (!ReadNextBuffer(gstate)) {
|
|
@@ -253,10 +297,20 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
|
253
297
|
default:
|
|
254
298
|
throw InternalException("Unknown JSON format");
|
|
255
299
|
}
|
|
300
|
+
scan_count = count;
|
|
256
301
|
|
|
257
302
|
// Skip over any remaining whitespace for the next scan
|
|
258
303
|
SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
|
|
259
304
|
|
|
305
|
+
if (gstate.bind_data.top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS) {
|
|
306
|
+
if (scan_count > 1) {
|
|
307
|
+
throw InvalidInputException("File must have exactly one array of objects when format='array_of_objects'");
|
|
308
|
+
}
|
|
309
|
+
array_idx = 0;
|
|
310
|
+
array_offset = 0;
|
|
311
|
+
return GetObjectsFromArray();
|
|
312
|
+
}
|
|
313
|
+
|
|
260
314
|
return count;
|
|
261
315
|
}
|
|
262
316
|
|
|
@@ -331,10 +385,39 @@ yyjson_val *JSONScanLocalState::ParseLine(char *line_start, idx_t line_size, idx
|
|
|
331
385
|
}
|
|
332
386
|
}
|
|
333
387
|
|
|
388
|
+
idx_t JSONScanLocalState::GetObjectsFromArray() {
|
|
389
|
+
idx_t arr_count = 0;
|
|
390
|
+
|
|
391
|
+
size_t idx, max;
|
|
392
|
+
yyjson_val *val;
|
|
393
|
+
for (; array_idx < scan_count; array_idx++, array_offset = 0) {
|
|
394
|
+
if (objects[array_idx]) {
|
|
395
|
+
yyjson_arr_foreach(objects[array_idx], idx, max, val) {
|
|
396
|
+
if (idx < array_offset) {
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
array_objects[arr_count++] = val;
|
|
400
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
401
|
+
break;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
array_offset = idx + 1;
|
|
405
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
406
|
+
break;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
return arr_count;
|
|
411
|
+
}
|
|
412
|
+
|
|
334
413
|
bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
335
414
|
if (current_reader) {
|
|
336
415
|
D_ASSERT(current_buffer_handle);
|
|
337
416
|
current_reader->SetBufferLineOrObjectCount(current_buffer_handle->buffer_index, lines_or_objects_in_buffer);
|
|
417
|
+
if (is_last && gstate.bind_data.type != JSONScanType::SAMPLE) {
|
|
418
|
+
// Close files that are done if we're not sampling
|
|
419
|
+
current_reader->CloseJSONFile();
|
|
420
|
+
}
|
|
338
421
|
}
|
|
339
422
|
|
|
340
423
|
AllocatedData buffer;
|
|
@@ -395,7 +478,9 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
395
478
|
// Unopened file
|
|
396
479
|
current_reader->OpenJSONFile();
|
|
397
480
|
batch_index = gstate.batch_index++;
|
|
398
|
-
if (options.format == JSONFormat::UNSTRUCTURED
|
|
481
|
+
if (options.format == JSONFormat::UNSTRUCTURED || (options.format == JSONFormat::NEWLINE_DELIMITED &&
|
|
482
|
+
options.compression != FileCompressionType::UNCOMPRESSED &&
|
|
483
|
+
gstate.file_index < gstate.json_readers.size())) {
|
|
399
484
|
gstate.file_index++; // UNSTRUCTURED necessitates single-threaded read
|
|
400
485
|
}
|
|
401
486
|
if (options.format != JSONFormat::AUTO_DETECT) {
|
|
@@ -449,9 +534,6 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
449
534
|
auto json_buffer_handle = make_unique<JSONBufferHandle>(buffer_index, readers, std::move(buffer), buffer_size);
|
|
450
535
|
current_buffer_handle = json_buffer_handle.get();
|
|
451
536
|
current_reader->InsertBuffer(buffer_index, std::move(json_buffer_handle));
|
|
452
|
-
if (!current_reader->GetFileHandle().PlainFileSource() && gstate.bind_data.type == JSONScanType::SAMPLE) {
|
|
453
|
-
// TODO: store buffer
|
|
454
|
-
}
|
|
455
537
|
|
|
456
538
|
buffer_offset = 0;
|
|
457
539
|
prev_buffer_remainder = 0;
|
|
@@ -507,16 +589,18 @@ void JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, idx_t &
|
|
|
507
589
|
}
|
|
508
590
|
|
|
509
591
|
void JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, idx_t &buffer_index) {
|
|
510
|
-
auto &file_handle = current_reader->GetFileHandle();
|
|
511
|
-
|
|
512
592
|
idx_t request_size = gstate.buffer_capacity - prev_buffer_remainder - YYJSON_PADDING_SIZE;
|
|
513
593
|
idx_t read_size;
|
|
514
594
|
{
|
|
515
595
|
lock_guard<mutex> reader_guard(current_reader->lock);
|
|
516
596
|
buffer_index = current_reader->GetBufferIndex();
|
|
517
597
|
|
|
518
|
-
|
|
519
|
-
|
|
598
|
+
if (current_reader->IsOpen()) {
|
|
599
|
+
read_size = current_reader->GetFileHandle().Read(buffer_ptr + prev_buffer_remainder, request_size,
|
|
600
|
+
gstate.bind_data.type == JSONScanType::SAMPLE);
|
|
601
|
+
} else {
|
|
602
|
+
read_size = 0;
|
|
603
|
+
}
|
|
520
604
|
is_last = read_size < request_size;
|
|
521
605
|
|
|
522
606
|
if (!gstate.bind_data.ignore_errors && read_size == 0 && prev_buffer_remainder != 0) {
|
|
@@ -582,6 +666,11 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
|
|
|
582
666
|
}
|
|
583
667
|
|
|
584
668
|
void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
669
|
+
// yyjson does not always return YYJSON_READ_ERROR_UNEXPECTED_END properly
|
|
670
|
+
// if a different error code happens within the last 50 bytes
|
|
671
|
+
// we assume it should be YYJSON_READ_ERROR_UNEXPECTED_END instead
|
|
672
|
+
static constexpr idx_t END_BOUND = 50;
|
|
673
|
+
|
|
585
674
|
const auto max_obj_size = reconstruct_buffer.GetSize();
|
|
586
675
|
yyjson_read_err error;
|
|
587
676
|
for (; count < STANDARD_VECTOR_SIZE; count++) {
|
|
@@ -607,8 +696,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
|
607
696
|
} else if (error.pos > max_obj_size) {
|
|
608
697
|
current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error,
|
|
609
698
|
"Try increasing \"maximum_object_size\".");
|
|
610
|
-
|
|
611
|
-
} else if (error.code == YYJSON_READ_ERROR_UNEXPECTED_END && !is_last) {
|
|
699
|
+
} else if (!is_last && (error.code == YYJSON_READ_ERROR_UNEXPECTED_END || remaining - error.pos < END_BOUND)) {
|
|
612
700
|
// Copy remaining to reconstruct_buffer
|
|
613
701
|
const auto reconstruct_ptr = reconstruct_buffer.get();
|
|
614
702
|
memcpy(reconstruct_ptr, obj_copy_start, remaining);
|
|
@@ -223,7 +223,7 @@ public:
|
|
|
223
223
|
FileSystem &fs = FileSystem::GetFileSystem(context);
|
|
224
224
|
auto files = fs.Glob(info.file_path, context);
|
|
225
225
|
if (files.empty()) {
|
|
226
|
-
throw
|
|
226
|
+
throw FileSystem::MissingFileException(info.file_path, context);
|
|
227
227
|
}
|
|
228
228
|
|
|
229
229
|
// The most likely path (Parquet read without union by name option)
|
|
@@ -363,8 +363,9 @@ public:
|
|
|
363
363
|
|
|
364
364
|
static vector<string> ParquetGlob(FileSystem &fs, const string &glob, ClientContext &context) {
|
|
365
365
|
auto files = fs.Glob(glob, FileSystem::GetFileOpener(context));
|
|
366
|
+
|
|
366
367
|
if (files.empty()) {
|
|
367
|
-
throw
|
|
368
|
+
throw FileSystem::MissingFileException(glob, context);
|
|
368
369
|
}
|
|
369
370
|
return files;
|
|
370
371
|
}
|
|
@@ -100,6 +100,8 @@ string LogicalOperatorToString(LogicalOperatorType type) {
|
|
|
100
100
|
return "CREATE_SCHEMA";
|
|
101
101
|
case LogicalOperatorType::LOGICAL_ATTACH:
|
|
102
102
|
return "ATTACH";
|
|
103
|
+
case LogicalOperatorType::LOGICAL_DETACH:
|
|
104
|
+
return "ATTACH";
|
|
103
105
|
case LogicalOperatorType::LOGICAL_DROP:
|
|
104
106
|
return "DROP";
|
|
105
107
|
case LogicalOperatorType::LOGICAL_PRAGMA:
|
|
@@ -133,6 +133,8 @@ string PhysicalOperatorToString(PhysicalOperatorType type) {
|
|
|
133
133
|
return "CREATE_TYPE";
|
|
134
134
|
case PhysicalOperatorType::ATTACH:
|
|
135
135
|
return "ATTACH";
|
|
136
|
+
case PhysicalOperatorType::DETACH:
|
|
137
|
+
return "DETACH";
|
|
136
138
|
case PhysicalOperatorType::RESULT_COLLECTOR:
|
|
137
139
|
return "RESULT_COLLECTOR";
|
|
138
140
|
case PhysicalOperatorType::EXTENSION:
|