duckdb 0.6.2-dev1376.0 → 0.6.2-dev1568.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +3 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +127 -0
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +123 -0
- package/src/duckdb/extension/json/include/json_common.hpp +95 -230
- package/src/duckdb/extension/json/include/json_executors.hpp +139 -0
- package/src/duckdb/extension/json/include/json_functions.hpp +57 -30
- package/src/duckdb/extension/json/include/json_scan.hpp +196 -0
- package/src/duckdb/extension/json/json-extension.cpp +24 -15
- package/src/duckdb/extension/json/json_common.cpp +6 -91
- package/src/duckdb/extension/json/json_functions/json_array_length.cpp +17 -12
- package/src/duckdb/extension/json/json_functions/json_contains.cpp +94 -43
- package/src/duckdb/extension/json/json_functions/json_create.cpp +38 -25
- package/src/duckdb/extension/json/json_functions/json_extract.cpp +29 -20
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +33 -11
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +16 -11
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +60 -41
- package/src/duckdb/extension/json/json_functions/json_type.cpp +18 -13
- package/src/duckdb/extension/json/json_functions/json_valid.cpp +16 -6
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +65 -0
- package/src/duckdb/extension/json/json_functions.cpp +200 -0
- package/src/duckdb/extension/json/json_scan.cpp +501 -0
- package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +3838 -3398
- package/src/duckdb/extension/json/yyjson/yyjson.cpp +6580 -6411
- package/src/duckdb/extension/parquet/column_reader.cpp +0 -1
- package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
- package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/parquet_writer.cpp +0 -2
- package/src/duckdb/src/catalog/default/default_types.cpp +0 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +0 -1
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +0 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +3 -3
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -2
- package/src/duckdb/src/common/types/conflict_info.cpp +18 -0
- package/src/duckdb/src/common/types/conflict_manager.cpp +257 -0
- package/src/duckdb/src/common/types/value.cpp +0 -18
- package/src/duckdb/src/common/types/vector.cpp +2 -6
- package/src/duckdb/src/common/types.cpp +13 -9
- package/src/duckdb/src/common/vector_operations/vector_cast.cpp +6 -2
- package/src/duckdb/src/execution/column_binding_resolver.cpp +20 -0
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +10 -4
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +80 -47
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +247 -8
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +17 -6
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -1
- package/src/duckdb/src/function/cast/default_casts.cpp +10 -9
- package/src/duckdb/src/function/cast/enum_casts.cpp +0 -1
- package/src/duckdb/src/function/cast/list_casts.cpp +0 -1
- package/src/duckdb/src/function/cast/map_cast.cpp +0 -1
- package/src/duckdb/src/function/cast/numeric_casts.cpp +0 -1
- package/src/duckdb/src/function/cast/string_cast.cpp +0 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +0 -1
- package/src/duckdb/src/function/cast/time_casts.cpp +0 -9
- package/src/duckdb/src/function/cast/union_casts.cpp +2 -2
- package/src/duckdb/src/function/cast/uuid_casts.cpp +0 -1
- package/src/duckdb/src/function/cast_rules.cpp +2 -7
- package/src/duckdb/src/function/table/arrow_conversion.cpp +0 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +3 -3
- package/src/duckdb/src/function/table/read_csv.cpp +2 -1
- package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/serializer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +71 -0
- package/src/duckdb/src/include/duckdb/common/types/constraint_conflict_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +82 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -5
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +37 -2
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +16 -10
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/column_list.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +33 -0
- package/src/duckdb/src/include/duckdb/parser/statement/update_statement.hpp +20 -3
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +21 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +9 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +25 -1
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +12 -7
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +8 -4
- package/src/duckdb/src/include/duckdb/storage/index.hpp +12 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +8 -2
- package/src/duckdb/src/include/duckdb.h +0 -2
- package/src/duckdb/src/main/capi/helper-c.cpp +0 -4
- package/src/duckdb/src/main/relation/update_relation.cpp +5 -3
- package/src/duckdb/src/parser/column_list.cpp +18 -0
- package/src/duckdb/src/parser/parser.cpp +2 -2
- package/src/duckdb/src/parser/statement/insert_statement.cpp +87 -1
- package/src/duckdb/src/parser/statement/update_statement.cpp +21 -6
- package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +23 -16
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +18 -3
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +15 -7
- package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +95 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +1 -0
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +343 -9
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +52 -39
- package/src/duckdb/src/planner/binder.cpp +20 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +10 -0
- package/src/duckdb/src/planner/operator/logical_aggregate.cpp +1 -0
- package/src/duckdb/src/planner/operator/logical_insert.cpp +3 -0
- package/src/duckdb/src/planner/table_binding.cpp +38 -17
- package/src/duckdb/src/storage/arena_allocator.cpp +30 -0
- package/src/duckdb/src/storage/data_table.cpp +201 -47
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/row_group_collection.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +10 -8
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +12 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +8758 -8769
- package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +4 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
package/binding.gyp
CHANGED
|
@@ -235,8 +235,11 @@
|
|
|
235
235
|
"src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
|
|
236
236
|
"src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
|
|
237
237
|
"src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
|
|
238
|
+
"src/duckdb/extension/json/buffered_json_reader.cpp",
|
|
238
239
|
"src/duckdb/extension/json/json-extension.cpp",
|
|
239
240
|
"src/duckdb/extension/json/json_common.cpp",
|
|
241
|
+
"src/duckdb/extension/json/json_functions.cpp",
|
|
242
|
+
"src/duckdb/extension/json/json_scan.cpp",
|
|
240
243
|
"src/duckdb/ub_extension_json_json_functions.cpp",
|
|
241
244
|
"src/duckdb/extension/json/yyjson/yyjson.cpp"
|
|
242
245
|
],
|
package/package.json
CHANGED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
#include "buffered_json_reader.hpp"
|
|
2
|
+
|
|
3
|
+
#include "duckdb/common/field_writer.hpp"
|
|
4
|
+
#include "duckdb/common/file_opener.hpp"
|
|
5
|
+
#include "duckdb/common/file_system.hpp"
|
|
6
|
+
#include "duckdb/common/printer.hpp"
|
|
7
|
+
|
|
8
|
+
namespace duckdb {
|
|
9
|
+
|
|
10
|
+
void BufferedJSONReaderOptions::Serialize(FieldWriter &writer) {
|
|
11
|
+
writer.WriteString(file_path);
|
|
12
|
+
writer.WriteField<JSONFormat>(format);
|
|
13
|
+
writer.WriteField<FileCompressionType>(compression);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
void BufferedJSONReaderOptions::Deserialize(FieldReader &reader) {
|
|
17
|
+
file_path = reader.ReadRequired<string>();
|
|
18
|
+
format = reader.ReadRequired<JSONFormat>();
|
|
19
|
+
compression = reader.ReadRequired<FileCompressionType>();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, AllocatedData &&buffer_p, idx_t buffer_size_p)
|
|
23
|
+
: buffer_index(buffer_index_p), readers(readers_p), buffer(move(buffer_p)), buffer_size(buffer_size_p) {
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p)
|
|
27
|
+
: file_handle(move(file_handle_p)), can_seek(file_handle->CanSeek()),
|
|
28
|
+
plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()),
|
|
29
|
+
read_position(0) {
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
idx_t JSONFileHandle::FileSize() const {
|
|
33
|
+
return file_size;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
idx_t JSONFileHandle::Remaining() const {
|
|
37
|
+
return file_size - read_position;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
bool JSONFileHandle::CanSeek() const {
|
|
41
|
+
return can_seek;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
bool JSONFileHandle::PlainFileSource() const {
|
|
45
|
+
return plain_file_source;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size) {
|
|
49
|
+
D_ASSERT(requested_size != 0);
|
|
50
|
+
position = read_position;
|
|
51
|
+
auto actual_size = MinValue<idx_t>(requested_size, Remaining());
|
|
52
|
+
read_position += actual_size;
|
|
53
|
+
return actual_size;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t position) {
|
|
57
|
+
D_ASSERT(size != 0);
|
|
58
|
+
file_handle->Read((void *)pointer, size, position);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size) {
|
|
62
|
+
D_ASSERT(requested_size != 0);
|
|
63
|
+
auto actual_size = file_handle->Read((void *)pointer, requested_size);
|
|
64
|
+
read_position += actual_size;
|
|
65
|
+
return actual_size;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, idx_t file_index_p,
|
|
69
|
+
string file_path_p)
|
|
70
|
+
: file_index(file_index_p), file_path(std::move(file_path_p)), context(context), options(std::move(options_p)),
|
|
71
|
+
buffer_index(0) {
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
void BufferedJSONReader::OpenJSONFile() {
|
|
75
|
+
lock_guard<mutex> guard(lock);
|
|
76
|
+
auto &file_system = FileSystem::GetFileSystem(context);
|
|
77
|
+
auto file_opener = FileOpener::Get(context);
|
|
78
|
+
auto regular_file_handle = file_system.OpenFile(file_path.c_str(), FileFlags::FILE_FLAGS_READ,
|
|
79
|
+
FileLockType::NO_LOCK, options.compression, file_opener);
|
|
80
|
+
file_handle = make_unique<JSONFileHandle>(std::move(regular_file_handle));
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
bool BufferedJSONReader::IsOpen() {
|
|
84
|
+
return file_handle != nullptr;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() {
|
|
88
|
+
return options;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
JSONFileHandle &BufferedJSONReader::GetFileHandle() const {
|
|
92
|
+
return *file_handle;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
void BufferedJSONReader::InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer) {
|
|
96
|
+
lock_guard<mutex> guard(lock);
|
|
97
|
+
buffer_map.insert(make_pair(buffer_idx, std::move(buffer)));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
JSONBufferHandle *BufferedJSONReader::GetBuffer(idx_t buffer_idx) {
|
|
101
|
+
lock_guard<mutex> guard(lock);
|
|
102
|
+
auto it = buffer_map.find(buffer_idx);
|
|
103
|
+
return it == buffer_map.end() ? nullptr : it->second.get();
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
AllocatedData BufferedJSONReader::RemoveBuffer(idx_t buffer_idx) {
|
|
107
|
+
lock_guard<mutex> guard(lock);
|
|
108
|
+
auto it = buffer_map.find(buffer_idx);
|
|
109
|
+
D_ASSERT(it != buffer_map.end());
|
|
110
|
+
auto result = std::move(it->second->buffer);
|
|
111
|
+
buffer_map.erase(it);
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
idx_t BufferedJSONReader::GetBufferIndex() {
|
|
116
|
+
return buffer_index++;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
double BufferedJSONReader::GetProgress() const {
|
|
120
|
+
if (file_handle) {
|
|
121
|
+
return 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
|
|
122
|
+
} else {
|
|
123
|
+
return 0;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
} // namespace duckdb
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
// DuckDB
|
|
3
|
+
//
|
|
4
|
+
// buffered_json_reader.hpp
|
|
5
|
+
//
|
|
6
|
+
//
|
|
7
|
+
//===----------------------------------------------------------------------===//
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include "duckdb/common/atomic.hpp"
|
|
12
|
+
#include "duckdb/common/enums/file_compression_type.hpp"
|
|
13
|
+
#include "duckdb/common/mutex.hpp"
|
|
14
|
+
#include "json_common.hpp"
|
|
15
|
+
|
|
16
|
+
namespace duckdb {
|
|
17
|
+
|
|
18
|
+
struct FileHandle;
|
|
19
|
+
|
|
20
|
+
enum class JSONFormat : uint8_t {
|
|
21
|
+
//! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
|
|
22
|
+
AUTO_DETECT = 0,
|
|
23
|
+
//! One object after another, newlines can be anywhere
|
|
24
|
+
UNSTRUCTURED = 1,
|
|
25
|
+
//! Objects are separated by newlines, newlines do not occur within objects (NDJSON)
|
|
26
|
+
NEWLINE_DELIMITED = 2,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
struct BufferedJSONReaderOptions {
|
|
30
|
+
public:
|
|
31
|
+
//! The file path of the JSON file to read
|
|
32
|
+
string file_path;
|
|
33
|
+
//! The format of the JSON
|
|
34
|
+
JSONFormat format = JSONFormat::AUTO_DETECT;
|
|
35
|
+
//! Whether file is compressed or not, and if so which compression type
|
|
36
|
+
FileCompressionType compression = FileCompressionType::AUTO_DETECT;
|
|
37
|
+
|
|
38
|
+
public:
|
|
39
|
+
void Serialize(FieldWriter &writer);
|
|
40
|
+
void Deserialize(FieldReader &reader);
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
struct JSONBufferHandle {
|
|
44
|
+
public:
|
|
45
|
+
JSONBufferHandle(idx_t buffer_index, idx_t readers, AllocatedData &&buffer, idx_t buffer_size);
|
|
46
|
+
|
|
47
|
+
public:
|
|
48
|
+
//! Buffer index (within same file)
|
|
49
|
+
const idx_t buffer_index;
|
|
50
|
+
|
|
51
|
+
//! Number of readers for this buffer
|
|
52
|
+
atomic<idx_t> readers;
|
|
53
|
+
//! The buffer
|
|
54
|
+
AllocatedData buffer;
|
|
55
|
+
//! The size of the data in the buffer (can be less than buffer.GetSize())
|
|
56
|
+
const idx_t buffer_size;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
struct JSONFileHandle {
|
|
60
|
+
public:
|
|
61
|
+
explicit JSONFileHandle(unique_ptr<FileHandle> file_handle);
|
|
62
|
+
|
|
63
|
+
idx_t FileSize() const;
|
|
64
|
+
idx_t Remaining() const;
|
|
65
|
+
|
|
66
|
+
bool CanSeek() const;
|
|
67
|
+
bool PlainFileSource() const;
|
|
68
|
+
|
|
69
|
+
idx_t GetPositionAndSize(idx_t &position, idx_t requested_size);
|
|
70
|
+
void ReadAtPosition(const char *pointer, idx_t size, idx_t position);
|
|
71
|
+
idx_t Read(const char *pointer, idx_t requested_size);
|
|
72
|
+
|
|
73
|
+
private:
|
|
74
|
+
//! The JSON file handle
|
|
75
|
+
unique_ptr<FileHandle> file_handle;
|
|
76
|
+
|
|
77
|
+
//! File properties
|
|
78
|
+
const bool can_seek;
|
|
79
|
+
const bool plain_file_source;
|
|
80
|
+
const idx_t file_size;
|
|
81
|
+
|
|
82
|
+
//! Read properties
|
|
83
|
+
idx_t read_position;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
class BufferedJSONReader {
|
|
87
|
+
public:
|
|
88
|
+
BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, idx_t file_index, string file_path);
|
|
89
|
+
|
|
90
|
+
void OpenJSONFile();
|
|
91
|
+
bool IsOpen();
|
|
92
|
+
|
|
93
|
+
BufferedJSONReaderOptions &GetOptions();
|
|
94
|
+
JSONFileHandle &GetFileHandle() const;
|
|
95
|
+
|
|
96
|
+
void InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer);
|
|
97
|
+
JSONBufferHandle *GetBuffer(idx_t buffer_idx);
|
|
98
|
+
AllocatedData RemoveBuffer(idx_t buffer_idx);
|
|
99
|
+
idx_t GetBufferIndex();
|
|
100
|
+
|
|
101
|
+
double GetProgress() const;
|
|
102
|
+
|
|
103
|
+
public:
|
|
104
|
+
mutex lock;
|
|
105
|
+
|
|
106
|
+
//! File index / path
|
|
107
|
+
const idx_t file_index;
|
|
108
|
+
const string file_path;
|
|
109
|
+
|
|
110
|
+
private:
|
|
111
|
+
ClientContext &context;
|
|
112
|
+
BufferedJSONReaderOptions options;
|
|
113
|
+
|
|
114
|
+
//! File handle
|
|
115
|
+
unique_ptr<JSONFileHandle> file_handle;
|
|
116
|
+
|
|
117
|
+
//! Next buffer index within the file
|
|
118
|
+
idx_t buffer_index;
|
|
119
|
+
//! Mapping from batch index to currently held buffers
|
|
120
|
+
unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
} // namespace duckdb
|
|
@@ -11,112 +11,75 @@
|
|
|
11
11
|
#include "duckdb/common/operator/cast_operators.hpp"
|
|
12
12
|
#include "duckdb/common/operator/decimal_cast_operators.hpp"
|
|
13
13
|
#include "duckdb/common/operator/string_cast.hpp"
|
|
14
|
-
#include "duckdb/execution/expression_executor.hpp"
|
|
15
14
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
|
16
15
|
#include "yyjson.hpp"
|
|
17
16
|
|
|
18
17
|
namespace duckdb {
|
|
19
18
|
|
|
20
|
-
|
|
19
|
+
class JSONAllocator {
|
|
21
20
|
public:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
static unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
|
|
26
|
-
vector<unique_ptr<Expression>> &arguments);
|
|
27
|
-
|
|
28
|
-
public:
|
|
29
|
-
const bool constant;
|
|
30
|
-
const string path;
|
|
31
|
-
const char *ptr;
|
|
32
|
-
const size_t len;
|
|
33
|
-
};
|
|
34
|
-
|
|
35
|
-
struct JSONReadManyFunctionData : public FunctionData {
|
|
36
|
-
public:
|
|
37
|
-
JSONReadManyFunctionData(vector<string> paths_p, vector<size_t> lens_p);
|
|
38
|
-
unique_ptr<FunctionData> Copy() const override;
|
|
39
|
-
bool Equals(const FunctionData &other_p) const override;
|
|
40
|
-
static unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
|
|
41
|
-
vector<unique_ptr<Expression>> &arguments);
|
|
42
|
-
|
|
43
|
-
public:
|
|
44
|
-
const vector<string> paths;
|
|
45
|
-
vector<const char *> ptrs;
|
|
46
|
-
const vector<size_t> lens;
|
|
47
|
-
};
|
|
48
|
-
|
|
49
|
-
template <class YYJSON_DOC_T>
|
|
50
|
-
static inline void CleanupDoc(YYJSON_DOC_T *doc) {
|
|
51
|
-
throw InternalException("Unknown yyjson document type");
|
|
52
|
-
}
|
|
21
|
+
explicit JSONAllocator(Allocator &allocator)
|
|
22
|
+
: arena_allocator(allocator), yyjson_allocator({Allocate, Reallocate, Free, &arena_allocator}) {
|
|
23
|
+
}
|
|
53
24
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
}
|
|
25
|
+
inline yyjson_alc *GetYYJSONAllocator() {
|
|
26
|
+
return &yyjson_allocator;
|
|
27
|
+
}
|
|
58
28
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
}
|
|
29
|
+
void Reset() {
|
|
30
|
+
arena_allocator.Reset();
|
|
31
|
+
}
|
|
63
32
|
|
|
64
|
-
template <class YYJSON_DOC_T>
|
|
65
|
-
class DocPointer {
|
|
66
33
|
private:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
explicit DocPointer(YYJSON_DOC_T *doc) : doc(doc) {
|
|
34
|
+
static inline void *Allocate(void *ctx, size_t size) {
|
|
35
|
+
auto alloc = (ArenaAllocator *)ctx;
|
|
36
|
+
return alloc->AllocateAligned(size);
|
|
71
37
|
}
|
|
72
38
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
DocPointer(DocPointer &&other) noexcept {
|
|
77
|
-
this->doc = other.doc;
|
|
78
|
-
other.doc = nullptr;
|
|
39
|
+
static inline void *Reallocate(void *ctx, void *ptr, size_t old_size, size_t size) {
|
|
40
|
+
auto alloc = (ArenaAllocator *)ctx;
|
|
41
|
+
return alloc->ReallocateAligned((data_ptr_t)ptr, old_size, size);
|
|
79
42
|
}
|
|
80
43
|
|
|
81
|
-
void
|
|
82
|
-
|
|
83
|
-
this->ptr = other.ptr;
|
|
84
|
-
other.ptr = nullptr;
|
|
44
|
+
static inline void Free(void *ctx, void *ptr) {
|
|
45
|
+
// NOP because ArenaAllocator can't free
|
|
85
46
|
}
|
|
86
47
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
48
|
+
private:
|
|
49
|
+
ArenaAllocator arena_allocator;
|
|
50
|
+
yyjson_alc yyjson_allocator;
|
|
51
|
+
};
|
|
90
52
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
53
|
+
struct JSONCommon {
|
|
54
|
+
public:
|
|
55
|
+
static constexpr auto JSON_TYPE_NAME = "JSON";
|
|
94
56
|
|
|
95
|
-
|
|
96
|
-
|
|
57
|
+
static const LogicalType JSONType() {
|
|
58
|
+
auto json_type = LogicalType(LogicalTypeId::VARCHAR);
|
|
59
|
+
json_type.SetAlias(JSON_TYPE_NAME);
|
|
60
|
+
return json_type;
|
|
97
61
|
}
|
|
98
62
|
|
|
99
|
-
|
|
100
|
-
|
|
63
|
+
static bool LogicalTypeIsJSON(const LogicalType &type) {
|
|
64
|
+
return type.id() == LogicalTypeId::VARCHAR && type.HasAlias() && type.GetAlias() == JSON_TYPE_NAME;
|
|
101
65
|
}
|
|
102
|
-
};
|
|
103
66
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
//! Read/Write flag that make sense for us
|
|
67
|
+
public:
|
|
68
|
+
//! Read/Write flags
|
|
107
69
|
static constexpr auto READ_FLAG = YYJSON_READ_ALLOW_INF_AND_NAN | YYJSON_READ_ALLOW_TRAILING_COMMAS;
|
|
70
|
+
static constexpr auto STOP_READ_FLAG = READ_FLAG | YYJSON_READ_STOP_WHEN_DONE | YYJSON_READ_INSITU;
|
|
108
71
|
static constexpr auto WRITE_FLAG = YYJSON_WRITE_ALLOW_INF_AND_NAN;
|
|
109
72
|
|
|
110
73
|
public:
|
|
111
74
|
//! Constant JSON type strings
|
|
112
|
-
static constexpr
|
|
113
|
-
static constexpr
|
|
114
|
-
static constexpr
|
|
115
|
-
static constexpr
|
|
116
|
-
static constexpr
|
|
117
|
-
static constexpr
|
|
118
|
-
static constexpr
|
|
119
|
-
static constexpr
|
|
75
|
+
static constexpr char const *TYPE_STRING_NULL = "NULL";
|
|
76
|
+
static constexpr char const *TYPE_STRING_BOOLEAN = "BOOLEAN";
|
|
77
|
+
static constexpr char const *TYPE_STRING_BIGINT = "BIGINT";
|
|
78
|
+
static constexpr char const *TYPE_STRING_UBIGINT = "UBIGINT";
|
|
79
|
+
static constexpr char const *TYPE_STRING_DOUBLE = "DOUBLE";
|
|
80
|
+
static constexpr char const *TYPE_STRING_VARCHAR = "VARCHAR";
|
|
81
|
+
static constexpr char const *TYPE_STRING_ARRAY = "ARRAY";
|
|
82
|
+
static constexpr char const *TYPE_STRING_OBJECT = "OBJECT";
|
|
120
83
|
|
|
121
84
|
template <class YYJSON_VAL_T>
|
|
122
85
|
static inline const char *const ValTypeToString(YYJSON_VAL_T *val) {
|
|
@@ -143,59 +106,62 @@ public:
|
|
|
143
106
|
}
|
|
144
107
|
}
|
|
145
108
|
|
|
146
|
-
|
|
147
|
-
static inline
|
|
148
|
-
return
|
|
109
|
+
template <class YYJSON_VAL_T>
|
|
110
|
+
static inline constexpr string_t ValTypeToStringT(YYJSON_VAL_T *val) {
|
|
111
|
+
return string_t(ValTypeToString<YYJSON_VAL_T>(val));
|
|
149
112
|
}
|
|
150
113
|
|
|
151
|
-
|
|
152
|
-
static inline
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
static inline
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
114
|
+
public:
|
|
115
|
+
static inline yyjson_mut_doc *CreateDocument(yyjson_alc *alc) {
|
|
116
|
+
D_ASSERT(alc);
|
|
117
|
+
return yyjson_mut_doc_new(alc);
|
|
118
|
+
}
|
|
119
|
+
static inline yyjson_doc *ReadDocumentUnsafe(char *data, idx_t size, const yyjson_read_flag flg, yyjson_alc *alc,
|
|
120
|
+
yyjson_read_err *err = nullptr) {
|
|
121
|
+
D_ASSERT(alc);
|
|
122
|
+
return yyjson_read_opts(data, size, flg, alc, err);
|
|
123
|
+
}
|
|
124
|
+
static inline yyjson_doc *ReadDocumentUnsafe(const string_t &input, const yyjson_read_flag flg, yyjson_alc *alc,
|
|
125
|
+
yyjson_read_err *err = nullptr) {
|
|
126
|
+
return ReadDocumentUnsafe(input.GetDataWriteable(), input.GetSize(), flg, alc, err);
|
|
127
|
+
}
|
|
128
|
+
static inline yyjson_doc *ReadDocument(char *data, idx_t size, const yyjson_read_flag flg, yyjson_alc *alc) {
|
|
129
|
+
yyjson_read_err error;
|
|
130
|
+
auto result = ReadDocumentUnsafe(data, size, flg, alc, &error);
|
|
131
|
+
if (error.code != YYJSON_READ_SUCCESS) {
|
|
132
|
+
ThrowParseError(data, size, error);
|
|
160
133
|
}
|
|
161
134
|
return result;
|
|
162
135
|
}
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
return unique_ptr<char, decltype(free) *>(
|
|
166
|
-
reinterpret_cast<char *>(yyjson_val_write(val, WRITE_FLAG, (size_t *)&len)), free);
|
|
167
|
-
}
|
|
168
|
-
static inline unique_ptr<char, void (*)(void *)> WriteVal(yyjson_mut_val *val, idx_t &len) {
|
|
169
|
-
return unique_ptr<char, decltype(free) *>(
|
|
170
|
-
reinterpret_cast<char *>(yyjson_mut_val_write(val, WRITE_FLAG, (size_t *)&len)), free);
|
|
136
|
+
static inline yyjson_doc *ReadDocument(const string_t &input, const yyjson_read_flag flg, yyjson_alc *alc) {
|
|
137
|
+
return ReadDocument(input.GetDataWriteable(), input.GetSize(), flg, alc);
|
|
171
138
|
}
|
|
172
|
-
static
|
|
173
|
-
|
|
174
|
-
|
|
139
|
+
static string FormatParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") {
|
|
140
|
+
D_ASSERT(error.code != YYJSON_READ_SUCCESS);
|
|
141
|
+
// Truncate, so we don't print megabytes worth of JSON
|
|
142
|
+
string input = length > 50 ? string(data, 47) + "..." : string(data, length);
|
|
143
|
+
// Have to replace \r, otherwise output is unreadable
|
|
144
|
+
input = StringUtil::Replace(input, "\r", "\\r");
|
|
145
|
+
return StringUtil::Format("Malformed JSON at byte %lld of input: %s. %s Input: %s", error.pos, error.msg, extra,
|
|
146
|
+
input);
|
|
175
147
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
idx_t len;
|
|
179
|
-
auto data = WriteVal(val, len);
|
|
180
|
-
return StringVector::AddString(vector, data.get(), len);
|
|
148
|
+
static void ThrowParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") {
|
|
149
|
+
throw InvalidInputException(FormatParseError(data, length, error, extra));
|
|
181
150
|
}
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
151
|
+
|
|
152
|
+
template <class YYJSON_VAL_T>
|
|
153
|
+
static inline char *WriteVal(YYJSON_VAL_T *val, yyjson_alc *alc, idx_t &len) {
|
|
154
|
+
throw InternalException("Unknown yyjson val type");
|
|
186
155
|
}
|
|
187
|
-
|
|
156
|
+
template <class YYJSON_VAL_T>
|
|
157
|
+
static inline string_t WriteVal(YYJSON_VAL_T *val, yyjson_alc *alc) {
|
|
158
|
+
D_ASSERT(alc);
|
|
188
159
|
idx_t len;
|
|
189
|
-
auto data =
|
|
190
|
-
return
|
|
160
|
+
auto data = WriteVal<YYJSON_VAL_T>(val, alc, len);
|
|
161
|
+
return string_t(data, len);
|
|
191
162
|
}
|
|
192
163
|
//! Throw an error with the printed yyjson_val
|
|
193
|
-
static void ThrowValFormatError(string error_string, yyjson_val *val)
|
|
194
|
-
idx_t len;
|
|
195
|
-
auto data = WriteVal(val, len);
|
|
196
|
-
error_string = StringUtil::Format(error_string, string(data.get(), len));
|
|
197
|
-
throw InvalidInputException(error_string);
|
|
198
|
-
}
|
|
164
|
+
static void ThrowValFormatError(string error_string, yyjson_val *val);
|
|
199
165
|
|
|
200
166
|
public:
|
|
201
167
|
//! Validate path with $ syntax
|
|
@@ -241,116 +207,6 @@ public:
|
|
|
241
207
|
}
|
|
242
208
|
}
|
|
243
209
|
|
|
244
|
-
public:
|
|
245
|
-
//! Single-argument JSON read function, i.e. json_type('[1, 2, 3]')
|
|
246
|
-
template <class T>
|
|
247
|
-
static void UnaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
|
|
248
|
-
std::function<T(yyjson_val *, Vector &)> fun) {
|
|
249
|
-
auto &inputs = args.data[0];
|
|
250
|
-
UnaryExecutor::Execute<string_t, T>(inputs, result, args.size(), [&](string_t input) {
|
|
251
|
-
auto doc = JSONCommon::ReadDocument(input);
|
|
252
|
-
return fun(doc->root, result);
|
|
253
|
-
});
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
//! Two-argument JSON read function (with path query), i.e. json_type('[1, 2, 3]', '$[0]')
|
|
257
|
-
template <class T>
|
|
258
|
-
static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
|
|
259
|
-
std::function<T(yyjson_val *, Vector &)> fun) {
|
|
260
|
-
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
261
|
-
const auto &info = (JSONReadFunctionData &)*func_expr.bind_info;
|
|
262
|
-
|
|
263
|
-
auto &inputs = args.data[0];
|
|
264
|
-
if (info.constant) {
|
|
265
|
-
// Constant path
|
|
266
|
-
const char *ptr = info.ptr;
|
|
267
|
-
const idx_t &len = info.len;
|
|
268
|
-
UnaryExecutor::ExecuteWithNulls<string_t, T>(
|
|
269
|
-
inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
|
|
270
|
-
auto doc = ReadDocument(input);
|
|
271
|
-
yyjson_val *val;
|
|
272
|
-
if (!(val = GetPointerUnsafe<yyjson_val>(doc->root, ptr, len))) {
|
|
273
|
-
mask.SetInvalid(idx);
|
|
274
|
-
return T {};
|
|
275
|
-
} else {
|
|
276
|
-
return fun(val, result);
|
|
277
|
-
}
|
|
278
|
-
});
|
|
279
|
-
} else {
|
|
280
|
-
// Columnref path
|
|
281
|
-
auto &paths = args.data[1];
|
|
282
|
-
BinaryExecutor::ExecuteWithNulls<string_t, string_t, T>(
|
|
283
|
-
inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
|
|
284
|
-
auto doc = ReadDocument(input);
|
|
285
|
-
yyjson_val *val;
|
|
286
|
-
if (!(val = GetPointer<yyjson_val>(doc->root, path))) {
|
|
287
|
-
mask.SetInvalid(idx);
|
|
288
|
-
return T {};
|
|
289
|
-
} else {
|
|
290
|
-
return fun(val, result);
|
|
291
|
-
}
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
if (args.AllConstant()) {
|
|
295
|
-
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
//! JSON read function with list of path queries, i.e. json_type('[1, 2, 3]', ['$[0]', '$[1]'])
|
|
300
|
-
template <class T>
|
|
301
|
-
static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result,
|
|
302
|
-
std::function<T(yyjson_val *, Vector &)> fun) {
|
|
303
|
-
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
304
|
-
const auto &info = (JSONReadManyFunctionData &)*func_expr.bind_info;
|
|
305
|
-
D_ASSERT(info.ptrs.size() == info.lens.size());
|
|
306
|
-
|
|
307
|
-
const auto count = args.size();
|
|
308
|
-
const idx_t num_paths = info.ptrs.size();
|
|
309
|
-
const idx_t list_size = count * num_paths;
|
|
310
|
-
|
|
311
|
-
UnifiedVectorFormat input_data;
|
|
312
|
-
auto &input_vector = args.data[0];
|
|
313
|
-
input_vector.ToUnifiedFormat(count, input_data);
|
|
314
|
-
auto inputs = (string_t *)input_data.data;
|
|
315
|
-
|
|
316
|
-
ListVector::Reserve(result, list_size);
|
|
317
|
-
auto list_entries = FlatVector::GetData<list_entry_t>(result);
|
|
318
|
-
auto &list_validity = FlatVector::Validity(result);
|
|
319
|
-
|
|
320
|
-
auto &child = ListVector::GetEntry(result);
|
|
321
|
-
auto child_data = FlatVector::GetData<T>(child);
|
|
322
|
-
auto &child_validity = FlatVector::Validity(child);
|
|
323
|
-
|
|
324
|
-
idx_t offset = 0;
|
|
325
|
-
yyjson_val *val;
|
|
326
|
-
for (idx_t i = 0; i < count; i++) {
|
|
327
|
-
auto idx = input_data.sel->get_index(i);
|
|
328
|
-
if (!input_data.validity.RowIsValid(idx)) {
|
|
329
|
-
list_validity.SetInvalid(i);
|
|
330
|
-
continue;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
auto doc = ReadDocument(inputs[idx]);
|
|
334
|
-
for (idx_t path_i = 0; path_i < num_paths; path_i++) {
|
|
335
|
-
auto child_idx = offset + path_i;
|
|
336
|
-
if (!(val = GetPointerUnsafe<yyjson_val>(doc->root, info.ptrs[path_i], info.lens[path_i]))) {
|
|
337
|
-
child_validity.SetInvalid(child_idx);
|
|
338
|
-
} else {
|
|
339
|
-
child_data[child_idx] = fun(val, child);
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
list_entries[i].offset = offset;
|
|
344
|
-
list_entries[i].length = num_paths;
|
|
345
|
-
offset += num_paths;
|
|
346
|
-
}
|
|
347
|
-
ListVector::SetListSize(result, offset);
|
|
348
|
-
|
|
349
|
-
if (args.AllConstant()) {
|
|
350
|
-
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
|
|
354
210
|
private:
|
|
355
211
|
//! Get JSON pointer using /field/index/... notation
|
|
356
212
|
template <class YYJSON_VAL_T>
|
|
@@ -502,6 +358,15 @@ private:
|
|
|
502
358
|
}
|
|
503
359
|
};
|
|
504
360
|
|
|
361
|
+
template <>
|
|
362
|
+
inline char *JSONCommon::WriteVal(yyjson_val *val, yyjson_alc *alc, idx_t &len) {
|
|
363
|
+
return yyjson_val_write_opts(val, JSONCommon::WRITE_FLAG, alc, (size_t *)&len, nullptr);
|
|
364
|
+
}
|
|
365
|
+
template <>
|
|
366
|
+
inline char *JSONCommon::WriteVal(yyjson_mut_val *val, yyjson_alc *alc, idx_t &len) {
|
|
367
|
+
return yyjson_mut_val_write_opts(val, JSONCommon::WRITE_FLAG, alc, (size_t *)&len, nullptr);
|
|
368
|
+
}
|
|
369
|
+
|
|
505
370
|
template <>
|
|
506
371
|
inline yyjson_val *JSONCommon::TemplatedGetPointer(yyjson_val *root, const char *ptr, const idx_t &len) {
|
|
507
372
|
return len == 1 ? root : unsafe_yyjson_get_pointer(root, ptr, len);
|