duckdb 0.7.2-dev3546.0 → 0.7.2-dev3710.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/database.cpp +1 -0
- package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
- package/src/duckdb/extension/json/include/json_common.hpp +5 -4
- package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
- package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
- package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
- package/src/duckdb/extension/json/json_common.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
- package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
- package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +11 -9
- package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/read_json.cpp +166 -169
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
- package/src/duckdb/extension/json/json_functions.cpp +11 -4
- package/src/duckdb/extension/json/json_scan.cpp +593 -374
- package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
- package/src/duckdb/src/common/exception.cpp +17 -0
- package/src/duckdb/src/common/exception_format_value.cpp +14 -0
- package/src/duckdb/src/common/file_system.cpp +78 -36
- package/src/duckdb/src/common/local_file_system.cpp +5 -16
- package/src/duckdb/src/common/types.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +2 -2
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +6 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +66 -12
- package/src/duckdb/src/function/table/read_csv.cpp +16 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +26 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +25 -7
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -3
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
- package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
- package/src/duckdb/src/main/db_instance_cache.cpp +5 -3
- package/src/duckdb/src/main/extension/extension_install.cpp +22 -18
- package/src/duckdb/src/parser/expression/collate_expression.cpp +1 -1
- package/src/duckdb/src/parser/keyword_helper.cpp +11 -1
- package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
- package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -2
- package/src/duckdb/src/parser/tableref.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
- package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
- package/src/duckdb/src/storage/storage_manager.cpp +3 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
package/package.json
CHANGED
package/src/database.cpp
CHANGED
@@ -328,6 +328,7 @@ ScanReplacement(duckdb::ClientContext &context, const std::string &table_name, d
|
|
328
328
|
children.push_back(duckdb::make_uniq<duckdb::ConstantExpression>(std::move(param)));
|
329
329
|
}
|
330
330
|
table_function->function = duckdb::make_uniq<duckdb::FunctionExpression>(jsargs.function, std::move(children));
|
331
|
+
table_function->alias = table_name;
|
331
332
|
return std::move(table_function);
|
332
333
|
}
|
333
334
|
return nullptr;
|
@@ -6,30 +6,35 @@
|
|
6
6
|
|
7
7
|
namespace duckdb {
|
8
8
|
|
9
|
-
void BufferedJSONReaderOptions::Serialize(FieldWriter &writer) {
|
10
|
-
writer.WriteString(file_path);
|
9
|
+
void BufferedJSONReaderOptions::Serialize(FieldWriter &writer) const {
|
11
10
|
writer.WriteField<JSONFormat>(format);
|
11
|
+
writer.WriteField<JSONRecordType>(record_type);
|
12
12
|
writer.WriteField<FileCompressionType>(compression);
|
13
|
+
writer.WriteSerializable(file_options);
|
13
14
|
}
|
14
15
|
|
15
16
|
void BufferedJSONReaderOptions::Deserialize(FieldReader &reader) {
|
16
|
-
file_path = reader.ReadRequired<string>();
|
17
17
|
format = reader.ReadRequired<JSONFormat>();
|
18
|
+
record_type = reader.ReadRequired<JSONRecordType>();
|
18
19
|
compression = reader.ReadRequired<FileCompressionType>();
|
20
|
+
file_options = reader.ReadRequiredSerializable<MultiFileReaderOptions, MultiFileReaderOptions>();
|
19
21
|
}
|
20
22
|
|
21
23
|
JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, AllocatedData &&buffer_p, idx_t buffer_size_p)
|
22
24
|
: buffer_index(buffer_index_p), readers(readers_p), buffer(std::move(buffer_p)), buffer_size(buffer_size_p) {
|
23
25
|
}
|
24
26
|
|
25
|
-
JSONFileHandle::JSONFileHandle(
|
27
|
+
JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p, Allocator &allocator_p)
|
26
28
|
: file_handle(std::move(file_handle_p)), allocator(allocator_p), can_seek(file_handle->CanSeek()),
|
27
29
|
plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()), read_position(0),
|
28
30
|
requested_reads(0), actual_reads(0), cached_size(0) {
|
29
31
|
}
|
30
32
|
|
31
33
|
void JSONFileHandle::Close() {
|
32
|
-
file_handle
|
34
|
+
if (file_handle) {
|
35
|
+
file_handle->Close();
|
36
|
+
file_handle = nullptr;
|
37
|
+
}
|
33
38
|
cached_buffers.clear();
|
34
39
|
}
|
35
40
|
|
@@ -155,15 +160,15 @@ idx_t JSONFileHandle::ReadInternal(const char *pointer, const idx_t requested_si
|
|
155
160
|
return total_read_size;
|
156
161
|
}
|
157
162
|
|
158
|
-
BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string
|
159
|
-
:
|
163
|
+
BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string file_name_p)
|
164
|
+
: context(context), options(options_p), file_name(std::move(file_name_p)), buffer_index(0) {
|
160
165
|
}
|
161
166
|
|
162
167
|
void BufferedJSONReader::OpenJSONFile() {
|
163
168
|
lock_guard<mutex> guard(lock);
|
164
169
|
auto &file_system = FileSystem::GetFileSystem(context);
|
165
170
|
auto regular_file_handle =
|
166
|
-
file_system.OpenFile(
|
171
|
+
file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, options.compression);
|
167
172
|
file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
|
168
173
|
}
|
169
174
|
|
@@ -177,7 +182,7 @@ void BufferedJSONReader::CloseJSONFile() {
|
|
177
182
|
}
|
178
183
|
}
|
179
184
|
|
180
|
-
bool BufferedJSONReader::IsOpen() {
|
185
|
+
bool BufferedJSONReader::IsOpen() const {
|
181
186
|
return file_handle != nullptr;
|
182
187
|
}
|
183
188
|
|
@@ -185,11 +190,41 @@ BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() {
|
|
185
190
|
return options;
|
186
191
|
}
|
187
192
|
|
193
|
+
const BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() const {
|
194
|
+
return options;
|
195
|
+
}
|
196
|
+
|
197
|
+
JSONFormat BufferedJSONReader::GetFormat() const {
|
198
|
+
return options.format;
|
199
|
+
}
|
200
|
+
|
201
|
+
void BufferedJSONReader::SetFormat(JSONFormat format) {
|
202
|
+
D_ASSERT(options.format == JSONFormat::AUTO_DETECT);
|
203
|
+
options.format = format;
|
204
|
+
}
|
205
|
+
|
206
|
+
JSONRecordType BufferedJSONReader::GetRecordType() const {
|
207
|
+
return options.record_type;
|
208
|
+
}
|
209
|
+
|
210
|
+
void BufferedJSONReader::SetRecordType(duckdb::JSONRecordType type) {
|
211
|
+
D_ASSERT(options.record_type == JSONRecordType::AUTO_DETECT);
|
212
|
+
options.record_type = type;
|
213
|
+
}
|
214
|
+
|
215
|
+
bool BufferedJSONReader::IsParallel() const {
|
216
|
+
return options.format == JSONFormat::NEWLINE_DELIMITED && file_handle->CanSeek();
|
217
|
+
}
|
218
|
+
|
219
|
+
const string &BufferedJSONReader::GetFileName() const {
|
220
|
+
return file_name;
|
221
|
+
}
|
222
|
+
|
188
223
|
JSONFileHandle &BufferedJSONReader::GetFileHandle() const {
|
189
224
|
return *file_handle;
|
190
225
|
}
|
191
226
|
|
192
|
-
void BufferedJSONReader::InsertBuffer(idx_t buffer_idx,
|
227
|
+
void BufferedJSONReader::InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer) {
|
193
228
|
lock_guard<mutex> guard(lock);
|
194
229
|
buffer_map.insert(make_pair(buffer_idx, std::move(buffer)));
|
195
230
|
}
|
@@ -220,7 +255,7 @@ void BufferedJSONReader::SetBufferLineOrObjectCount(idx_t index, idx_t count) {
|
|
220
255
|
}
|
221
256
|
|
222
257
|
idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf) {
|
223
|
-
D_ASSERT(options.format
|
258
|
+
D_ASSERT(options.format != JSONFormat::AUTO_DETECT);
|
224
259
|
while (true) {
|
225
260
|
lock_guard<mutex> guard(lock);
|
226
261
|
idx_t line = line_or_object_in_buf;
|
@@ -243,23 +278,23 @@ idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in
|
|
243
278
|
|
244
279
|
void BufferedJSONReader::ThrowParseError(idx_t buf_index, idx_t line_or_object_in_buf, yyjson_read_err &err,
|
245
280
|
const string &extra) {
|
246
|
-
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "
|
281
|
+
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
|
247
282
|
auto line = GetLineNumber(buf_index, line_or_object_in_buf);
|
248
|
-
throw InvalidInputException("Malformed JSON in file \"%s\", at byte %llu in %s %llu: %s. %s",
|
283
|
+
throw InvalidInputException("Malformed JSON in file \"%s\", at byte %llu in %s %llu: %s. %s", file_name,
|
249
284
|
err.pos + 1, unit, line + 1, err.msg, extra);
|
250
285
|
}
|
251
286
|
|
252
287
|
void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_object_in_buf,
|
253
288
|
const string &error_message) {
|
254
|
-
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "
|
289
|
+
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
|
255
290
|
auto line = GetLineNumber(buf_index, line_or_object_in_buf);
|
256
|
-
throw InvalidInputException("JSON transform error in file \"%s\", in %s %llu: %s",
|
291
|
+
throw InvalidInputException("JSON transform error in file \"%s\", in %s %llu: %s.", file_name, unit, line,
|
257
292
|
error_message);
|
258
293
|
}
|
259
294
|
|
260
295
|
double BufferedJSONReader::GetProgress() const {
|
261
|
-
if (
|
262
|
-
return 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
|
296
|
+
if (IsOpen()) {
|
297
|
+
return 100.0 - 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
|
263
298
|
} else {
|
264
299
|
return 0;
|
265
300
|
}
|
@@ -270,6 +305,10 @@ void BufferedJSONReader::Reset() {
|
|
270
305
|
buffer_map.clear();
|
271
306
|
buffer_line_or_object_counts.clear();
|
272
307
|
|
308
|
+
if (!file_handle) {
|
309
|
+
return;
|
310
|
+
}
|
311
|
+
|
273
312
|
if (file_handle->CanSeek()) {
|
274
313
|
file_handle->Seek(0);
|
275
314
|
} else {
|
@@ -11,6 +11,7 @@
|
|
11
11
|
#include "duckdb/common/atomic.hpp"
|
12
12
|
#include "duckdb/common/enums/file_compression_type.hpp"
|
13
13
|
#include "duckdb/common/file_system.hpp"
|
14
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
14
15
|
#include "duckdb/common/mutex.hpp"
|
15
16
|
#include "json_common.hpp"
|
16
17
|
|
@@ -19,23 +20,35 @@ namespace duckdb {
|
|
19
20
|
enum class JSONFormat : uint8_t {
|
20
21
|
//! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
|
21
22
|
AUTO_DETECT = 0,
|
22
|
-
//! One
|
23
|
+
//! One unit after another, newlines can be anywhere
|
23
24
|
UNSTRUCTURED = 1,
|
24
|
-
//!
|
25
|
+
//! Units are separated by newlines, newlines do not occur within Units (NDJSON)
|
25
26
|
NEWLINE_DELIMITED = 2,
|
27
|
+
//! File is one big array of units
|
28
|
+
ARRAY = 3,
|
29
|
+
};
|
30
|
+
|
31
|
+
enum class JSONRecordType : uint8_t {
|
32
|
+
AUTO_DETECT = 0,
|
33
|
+
//! Sequential objects that are unpacked
|
34
|
+
RECORDS = 1,
|
35
|
+
//! Any other JSON type, e.g., ARRAY
|
36
|
+
VALUES = 2,
|
26
37
|
};
|
27
38
|
|
28
39
|
struct BufferedJSONReaderOptions {
|
29
40
|
public:
|
30
|
-
//! The file path of the JSON file to read
|
31
|
-
string file_path;
|
32
41
|
//! The format of the JSON
|
33
42
|
JSONFormat format = JSONFormat::AUTO_DETECT;
|
43
|
+
//! Whether record types in the JSON
|
44
|
+
JSONRecordType record_type = JSONRecordType::AUTO_DETECT;
|
34
45
|
//! Whether file is compressed or not, and if so which compression type
|
35
46
|
FileCompressionType compression = FileCompressionType::AUTO_DETECT;
|
47
|
+
//! Multi-file reader options
|
48
|
+
MultiFileReaderOptions file_options;
|
36
49
|
|
37
50
|
public:
|
38
|
-
void Serialize(FieldWriter &writer);
|
51
|
+
void Serialize(FieldWriter &writer) const;
|
39
52
|
void Deserialize(FieldReader &reader);
|
40
53
|
};
|
41
54
|
|
@@ -57,7 +70,7 @@ public:
|
|
57
70
|
|
58
71
|
struct JSONFileHandle {
|
59
72
|
public:
|
60
|
-
JSONFileHandle(
|
73
|
+
JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
|
61
74
|
void Close();
|
62
75
|
|
63
76
|
idx_t FileSize() const;
|
@@ -79,7 +92,7 @@ private:
|
|
79
92
|
|
80
93
|
private:
|
81
94
|
//! The JSON file handle
|
82
|
-
|
95
|
+
unique_ptr<FileHandle> file_handle;
|
83
96
|
Allocator &allocator;
|
84
97
|
|
85
98
|
//! File properties
|
@@ -99,17 +112,50 @@ private:
|
|
99
112
|
|
100
113
|
class BufferedJSONReader {
|
101
114
|
public:
|
102
|
-
BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string
|
115
|
+
BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_name);
|
103
116
|
|
117
|
+
private:
|
118
|
+
ClientContext &context;
|
119
|
+
BufferedJSONReaderOptions options;
|
120
|
+
|
121
|
+
//! File name
|
122
|
+
const string file_name;
|
123
|
+
//! File handle
|
124
|
+
unique_ptr<JSONFileHandle> file_handle;
|
125
|
+
|
126
|
+
//! Next buffer index within the file
|
127
|
+
idx_t buffer_index;
|
128
|
+
//! Mapping from batch index to currently held buffers
|
129
|
+
unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
|
130
|
+
|
131
|
+
//! Line count per buffer
|
132
|
+
vector<int64_t> buffer_line_or_object_counts;
|
133
|
+
|
134
|
+
public:
|
135
|
+
mutex lock;
|
136
|
+
MultiFileReaderData reader_data;
|
137
|
+
|
138
|
+
public:
|
104
139
|
void OpenJSONFile();
|
105
140
|
void CloseJSONFile();
|
106
|
-
bool IsOpen();
|
141
|
+
bool IsOpen() const;
|
107
142
|
|
108
143
|
BufferedJSONReaderOptions &GetOptions();
|
144
|
+
const BufferedJSONReaderOptions &GetOptions() const;
|
145
|
+
|
146
|
+
JSONFormat GetFormat() const;
|
147
|
+
void SetFormat(JSONFormat format);
|
148
|
+
JSONRecordType GetRecordType() const;
|
149
|
+
void SetRecordType(JSONRecordType type);
|
150
|
+
|
151
|
+
bool IsParallel() const;
|
152
|
+
|
153
|
+
const string &GetFileName() const;
|
109
154
|
JSONFileHandle &GetFileHandle() const;
|
110
155
|
|
156
|
+
public:
|
111
157
|
//! Insert/get/remove buffer (grabs the lock)
|
112
|
-
void InsertBuffer(idx_t buffer_idx,
|
158
|
+
void InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer);
|
113
159
|
JSONBufferHandle *GetBuffer(idx_t buffer_idx);
|
114
160
|
AllocatedData RemoveBuffer(idx_t buffer_idx);
|
115
161
|
|
@@ -127,27 +173,6 @@ public:
|
|
127
173
|
|
128
174
|
private:
|
129
175
|
idx_t GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf);
|
130
|
-
|
131
|
-
public:
|
132
|
-
mutex lock;
|
133
|
-
|
134
|
-
//! File path
|
135
|
-
const string file_path;
|
136
|
-
|
137
|
-
private:
|
138
|
-
ClientContext &context;
|
139
|
-
BufferedJSONReaderOptions options;
|
140
|
-
|
141
|
-
//! File handle
|
142
|
-
duckdb::unique_ptr<JSONFileHandle> file_handle;
|
143
|
-
|
144
|
-
//! Next buffer index within the file
|
145
|
-
idx_t buffer_index;
|
146
|
-
//! Mapping from batch index to currently held buffers
|
147
|
-
unordered_map<idx_t, duckdb::unique_ptr<JSONBufferHandle>> buffer_map;
|
148
|
-
|
149
|
-
//! Line count per buffer
|
150
|
-
vector<int64_t> buffer_line_or_object_counts;
|
151
176
|
};
|
152
177
|
|
153
178
|
} // namespace duckdb
|
@@ -22,7 +22,7 @@ public:
|
|
22
22
|
: arena_allocator(allocator), yyjson_allocator({Allocate, Reallocate, Free, &arena_allocator}) {
|
23
23
|
}
|
24
24
|
|
25
|
-
inline yyjson_alc *
|
25
|
+
inline yyjson_alc *GetYYAlc() {
|
26
26
|
return &yyjson_allocator;
|
27
27
|
}
|
28
28
|
|
@@ -62,7 +62,7 @@ struct JSONKeyHash {
|
|
62
62
|
memcpy(&result, k.ptr + k.len - sizeof(size_t), sizeof(size_t));
|
63
63
|
} else {
|
64
64
|
result = 0;
|
65
|
-
|
65
|
+
FastMemcpy(&result, k.ptr, k.len);
|
66
66
|
}
|
67
67
|
return result;
|
68
68
|
}
|
@@ -73,7 +73,7 @@ struct JSONKeyEquality {
|
|
73
73
|
if (a.len != b.len) {
|
74
74
|
return false;
|
75
75
|
}
|
76
|
-
return
|
76
|
+
return FastMemcmp(a.ptr, b.ptr, a.len) == 0;
|
77
77
|
}
|
78
78
|
};
|
79
79
|
|
@@ -98,7 +98,8 @@ public:
|
|
98
98
|
public:
|
99
99
|
//! Read/Write flags
|
100
100
|
static constexpr auto READ_FLAG = YYJSON_READ_ALLOW_INF_AND_NAN | YYJSON_READ_ALLOW_TRAILING_COMMAS;
|
101
|
-
static constexpr auto
|
101
|
+
static constexpr auto READ_STOP_FLAG = READ_FLAG | YYJSON_READ_STOP_WHEN_DONE;
|
102
|
+
static constexpr auto READ_INSITU_FLAG = READ_STOP_FLAG | YYJSON_READ_INSITU;
|
102
103
|
static constexpr auto WRITE_FLAG = YYJSON_WRITE_ALLOW_INF_AND_NAN;
|
103
104
|
static constexpr auto WRITE_PRETTY_FLAG = YYJSON_WRITE_ALLOW_INF_AND_NAN | YYJSON_WRITE_PRETTY;
|
104
105
|
|
@@ -20,7 +20,7 @@ public:
|
|
20
20
|
static void UnaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
|
21
21
|
std::function<T(yyjson_val *, yyjson_alc *, Vector &)> fun) {
|
22
22
|
auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
|
23
|
-
auto alc = lstate.json_allocator.
|
23
|
+
auto alc = lstate.json_allocator.GetYYAlc();
|
24
24
|
|
25
25
|
auto &inputs = args.data[0];
|
26
26
|
UnaryExecutor::Execute<string_t, T>(inputs, result, args.size(), [&](string_t input) {
|
@@ -34,36 +34,32 @@ public:
|
|
34
34
|
static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
|
35
35
|
std::function<T(yyjson_val *, yyjson_alc *, Vector &)> fun) {
|
36
36
|
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
37
|
-
const auto &info =
|
37
|
+
const auto &info = func_expr.bind_info->Cast<JSONReadFunctionData>();
|
38
38
|
auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
|
39
|
-
auto alc = lstate.json_allocator.
|
39
|
+
auto alc = lstate.json_allocator.GetYYAlc();
|
40
40
|
|
41
41
|
auto &inputs = args.data[0];
|
42
|
-
if (info.constant) {
|
43
|
-
// Constant path
|
42
|
+
if (info.constant) { // Constant path
|
44
43
|
const char *ptr = info.ptr;
|
45
44
|
const idx_t &len = info.len;
|
46
45
|
UnaryExecutor::ExecuteWithNulls<string_t, T>(
|
47
46
|
inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
|
48
|
-
auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG,
|
49
|
-
lstate.json_allocator.GetYYJSONAllocator());
|
47
|
+
auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
|
50
48
|
auto val = JSONCommon::GetPointerUnsafe<yyjson_val>(doc->root, ptr, len);
|
51
|
-
if (!val) {
|
49
|
+
if (!val || unsafe_yyjson_is_null(val)) {
|
52
50
|
mask.SetInvalid(idx);
|
53
51
|
return T {};
|
54
52
|
} else {
|
55
53
|
return fun(val, alc, result);
|
56
54
|
}
|
57
55
|
});
|
58
|
-
} else {
|
59
|
-
// Columnref path
|
56
|
+
} else { // Columnref path
|
60
57
|
auto &paths = args.data[1];
|
61
58
|
BinaryExecutor::ExecuteWithNulls<string_t, string_t, T>(
|
62
59
|
inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
|
63
|
-
auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG,
|
64
|
-
lstate.json_allocator.GetYYJSONAllocator());
|
60
|
+
auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
|
65
61
|
auto val = JSONCommon::GetPointer<yyjson_val>(doc->root, path);
|
66
|
-
if (!val) {
|
62
|
+
if (!val || unsafe_yyjson_is_null(val)) {
|
67
63
|
mask.SetInvalid(idx);
|
68
64
|
return T {};
|
69
65
|
} else {
|
@@ -81,9 +77,9 @@ public:
|
|
81
77
|
static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result,
|
82
78
|
std::function<T(yyjson_val *, yyjson_alc *, Vector &)> fun) {
|
83
79
|
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
84
|
-
const auto &info =
|
80
|
+
const auto &info = func_expr.bind_info->Cast<JSONReadManyFunctionData>();
|
85
81
|
auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
|
86
|
-
auto alc = lstate.json_allocator.
|
82
|
+
auto alc = lstate.json_allocator.GetYYAlc();
|
87
83
|
D_ASSERT(info.ptrs.size() == info.lens.size());
|
88
84
|
|
89
85
|
const auto count = args.size();
|
@@ -112,12 +108,11 @@ public:
|
|
112
108
|
continue;
|
113
109
|
}
|
114
110
|
|
115
|
-
auto doc = JSONCommon::ReadDocument(inputs[idx], JSONCommon::READ_FLAG,
|
116
|
-
lstate.json_allocator.GetYYJSONAllocator());
|
111
|
+
auto doc = JSONCommon::ReadDocument(inputs[idx], JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
|
117
112
|
for (idx_t path_i = 0; path_i < num_paths; path_i++) {
|
118
113
|
auto child_idx = offset + path_i;
|
119
114
|
val = JSONCommon::GetPointerUnsafe<yyjson_val>(doc->root, info.ptrs[path_i], info.lens[path_i]);
|
120
|
-
if (!val) {
|
115
|
+
if (!val || unsafe_yyjson_is_null(val)) {
|
121
116
|
child_validity.SetInvalid(child_idx);
|
122
117
|
} else {
|
123
118
|
child_data[child_idx] = fun(val, alc, child);
|
@@ -115,10 +115,13 @@ private:
|
|
115
115
|
// Table functions
|
116
116
|
static TableFunctionSet GetReadJSONObjectsFunction();
|
117
117
|
static TableFunctionSet GetReadNDJSONObjectsFunction();
|
118
|
+
static TableFunctionSet GetReadJSONObjectsAutoFunction();
|
119
|
+
|
118
120
|
static TableFunctionSet GetReadJSONFunction();
|
119
121
|
static TableFunctionSet GetReadNDJSONFunction();
|
120
122
|
static TableFunctionSet GetReadJSONAutoFunction();
|
121
123
|
static TableFunctionSet GetReadNDJSONAutoFunction();
|
124
|
+
|
122
125
|
static TableFunctionSet GetExecuteJsonSerializedSqlFunction();
|
123
126
|
};
|
124
127
|
|