duckdb 0.6.2-dev1376.0 → 0.6.2-dev1568.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/binding.gyp +3 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +127 -0
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +123 -0
  5. package/src/duckdb/extension/json/include/json_common.hpp +95 -230
  6. package/src/duckdb/extension/json/include/json_executors.hpp +139 -0
  7. package/src/duckdb/extension/json/include/json_functions.hpp +57 -30
  8. package/src/duckdb/extension/json/include/json_scan.hpp +196 -0
  9. package/src/duckdb/extension/json/json-extension.cpp +24 -15
  10. package/src/duckdb/extension/json/json_common.cpp +6 -91
  11. package/src/duckdb/extension/json/json_functions/json_array_length.cpp +17 -12
  12. package/src/duckdb/extension/json/json_functions/json_contains.cpp +94 -43
  13. package/src/duckdb/extension/json/json_functions/json_create.cpp +38 -25
  14. package/src/duckdb/extension/json/json_functions/json_extract.cpp +29 -20
  15. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +33 -11
  16. package/src/duckdb/extension/json/json_functions/json_structure.cpp +16 -11
  17. package/src/duckdb/extension/json/json_functions/json_transform.cpp +60 -41
  18. package/src/duckdb/extension/json/json_functions/json_type.cpp +18 -13
  19. package/src/duckdb/extension/json/json_functions/json_valid.cpp +16 -6
  20. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +65 -0
  21. package/src/duckdb/extension/json/json_functions.cpp +200 -0
  22. package/src/duckdb/extension/json/json_scan.cpp +501 -0
  23. package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +3838 -3398
  24. package/src/duckdb/extension/json/yyjson/yyjson.cpp +6580 -6411
  25. package/src/duckdb/extension/parquet/column_reader.cpp +0 -1
  26. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  27. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -1
  28. package/src/duckdb/extension/parquet/parquet_writer.cpp +0 -2
  29. package/src/duckdb/src/catalog/default/default_types.cpp +0 -1
  30. package/src/duckdb/src/common/arrow/arrow_appender.cpp +0 -1
  31. package/src/duckdb/src/common/arrow/arrow_converter.cpp +0 -1
  32. package/src/duckdb/src/common/radix_partitioning.cpp +3 -3
  33. package/src/duckdb/src/common/sort/sort_state.cpp +5 -2
  34. package/src/duckdb/src/common/types/conflict_info.cpp +18 -0
  35. package/src/duckdb/src/common/types/conflict_manager.cpp +257 -0
  36. package/src/duckdb/src/common/types/value.cpp +0 -18
  37. package/src/duckdb/src/common/types/vector.cpp +2 -6
  38. package/src/duckdb/src/common/types.cpp +13 -9
  39. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +6 -2
  40. package/src/duckdb/src/execution/column_binding_resolver.cpp +20 -0
  41. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +10 -4
  42. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +1 -1
  43. package/src/duckdb/src/execution/index/art/art.cpp +80 -47
  44. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -1
  45. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +2 -2
  46. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -0
  47. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +247 -8
  48. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +17 -6
  49. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +1 -1
  50. package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -1
  51. package/src/duckdb/src/function/cast/default_casts.cpp +10 -9
  52. package/src/duckdb/src/function/cast/enum_casts.cpp +0 -1
  53. package/src/duckdb/src/function/cast/list_casts.cpp +0 -1
  54. package/src/duckdb/src/function/cast/map_cast.cpp +0 -1
  55. package/src/duckdb/src/function/cast/numeric_casts.cpp +0 -1
  56. package/src/duckdb/src/function/cast/string_cast.cpp +0 -1
  57. package/src/duckdb/src/function/cast/struct_cast.cpp +0 -1
  58. package/src/duckdb/src/function/cast/time_casts.cpp +0 -9
  59. package/src/duckdb/src/function/cast/union_casts.cpp +2 -2
  60. package/src/duckdb/src/function/cast/uuid_casts.cpp +0 -1
  61. package/src/duckdb/src/function/cast_rules.cpp +2 -7
  62. package/src/duckdb/src/function/table/arrow_conversion.cpp +0 -1
  63. package/src/duckdb/src/function/table/copy_csv.cpp +3 -3
  64. package/src/duckdb/src/function/table/read_csv.cpp +2 -1
  65. package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -2
  66. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  67. package/src/duckdb/src/include/duckdb/common/allocator.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +4 -4
  69. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -1
  70. package/src/duckdb/src/include/duckdb/common/serializer.hpp +2 -2
  71. package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +71 -0
  72. package/src/duckdb/src/include/duckdb/common/types/constraint_conflict_info.hpp +27 -0
  73. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +82 -2
  74. package/src/duckdb/src/include/duckdb/common/types/value.hpp +0 -4
  75. package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
  76. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +2 -2
  77. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -5
  78. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -0
  79. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +37 -2
  80. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +16 -10
  81. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +3 -3
  82. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +2 -0
  83. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +33 -0
  84. package/src/duckdb/src/include/duckdb/parser/statement/update_statement.hpp +20 -3
  85. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +21 -0
  86. package/src/duckdb/src/include/duckdb/planner/binder.hpp +9 -0
  87. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +1 -0
  88. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +25 -1
  89. package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +6 -0
  90. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +12 -7
  91. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +8 -4
  93. package/src/duckdb/src/include/duckdb/storage/index.hpp +12 -2
  94. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -1
  95. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +8 -2
  96. package/src/duckdb/src/include/duckdb.h +0 -2
  97. package/src/duckdb/src/main/capi/helper-c.cpp +0 -4
  98. package/src/duckdb/src/main/relation/update_relation.cpp +5 -3
  99. package/src/duckdb/src/parser/column_list.cpp +18 -0
  100. package/src/duckdb/src/parser/parser.cpp +2 -2
  101. package/src/duckdb/src/parser/statement/insert_statement.cpp +87 -1
  102. package/src/duckdb/src/parser/statement/update_statement.cpp +21 -6
  103. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +23 -16
  104. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +18 -3
  105. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +15 -7
  106. package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +95 -0
  107. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +1 -0
  108. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +343 -9
  109. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +52 -39
  110. package/src/duckdb/src/planner/binder.cpp +20 -0
  111. package/src/duckdb/src/planner/logical_operator_visitor.cpp +10 -0
  112. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +1 -0
  113. package/src/duckdb/src/planner/operator/logical_insert.cpp +3 -0
  114. package/src/duckdb/src/planner/table_binding.cpp +38 -17
  115. package/src/duckdb/src/storage/arena_allocator.cpp +30 -0
  116. package/src/duckdb/src/storage/data_table.cpp +201 -47
  117. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  118. package/src/duckdb/src/storage/table/row_group_collection.cpp +1 -1
  119. package/src/duckdb/src/storage/table_index_list.cpp +10 -8
  120. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +12 -0
  121. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  122. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  123. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +8758 -8769
  124. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  125. package/src/duckdb/ub_src_common_types.cpp +4 -0
  126. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
package/binding.gyp CHANGED
@@ -235,8 +235,11 @@
235
235
  "src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
236
236
  "src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
237
237
  "src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
238
+ "src/duckdb/extension/json/buffered_json_reader.cpp",
238
239
  "src/duckdb/extension/json/json-extension.cpp",
239
240
  "src/duckdb/extension/json/json_common.cpp",
241
+ "src/duckdb/extension/json/json_functions.cpp",
242
+ "src/duckdb/extension/json/json_scan.cpp",
240
243
  "src/duckdb/ub_extension_json_json_functions.cpp",
241
244
  "src/duckdb/extension/json/yyjson/yyjson.cpp"
242
245
  ],
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev1376.0",
5
+ "version": "0.6.2-dev1568.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -0,0 +1,127 @@
1
+ #include "buffered_json_reader.hpp"
2
+
3
+ #include "duckdb/common/field_writer.hpp"
4
+ #include "duckdb/common/file_opener.hpp"
5
+ #include "duckdb/common/file_system.hpp"
6
+ #include "duckdb/common/printer.hpp"
7
+
8
+ namespace duckdb {
9
+
10
+ void BufferedJSONReaderOptions::Serialize(FieldWriter &writer) {
11
+ writer.WriteString(file_path);
12
+ writer.WriteField<JSONFormat>(format);
13
+ writer.WriteField<FileCompressionType>(compression);
14
+ }
15
+
16
+ void BufferedJSONReaderOptions::Deserialize(FieldReader &reader) {
17
+ file_path = reader.ReadRequired<string>();
18
+ format = reader.ReadRequired<JSONFormat>();
19
+ compression = reader.ReadRequired<FileCompressionType>();
20
+ }
21
+
22
+ JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, AllocatedData &&buffer_p, idx_t buffer_size_p)
23
+ : buffer_index(buffer_index_p), readers(readers_p), buffer(move(buffer_p)), buffer_size(buffer_size_p) {
24
+ }
25
+
26
+ JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p)
27
+ : file_handle(move(file_handle_p)), can_seek(file_handle->CanSeek()),
28
+ plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()),
29
+ read_position(0) {
30
+ }
31
+
32
+ idx_t JSONFileHandle::FileSize() const {
33
+ return file_size;
34
+ }
35
+
36
+ idx_t JSONFileHandle::Remaining() const {
37
+ return file_size - read_position;
38
+ }
39
+
40
+ bool JSONFileHandle::CanSeek() const {
41
+ return can_seek;
42
+ }
43
+
44
+ bool JSONFileHandle::PlainFileSource() const {
45
+ return plain_file_source;
46
+ }
47
+
48
+ idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size) {
49
+ D_ASSERT(requested_size != 0);
50
+ position = read_position;
51
+ auto actual_size = MinValue<idx_t>(requested_size, Remaining());
52
+ read_position += actual_size;
53
+ return actual_size;
54
+ }
55
+
56
+ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t position) {
57
+ D_ASSERT(size != 0);
58
+ file_handle->Read((void *)pointer, size, position);
59
+ }
60
+
61
+ idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size) {
62
+ D_ASSERT(requested_size != 0);
63
+ auto actual_size = file_handle->Read((void *)pointer, requested_size);
64
+ read_position += actual_size;
65
+ return actual_size;
66
+ }
67
+
68
+ BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, idx_t file_index_p,
69
+ string file_path_p)
70
+ : file_index(file_index_p), file_path(std::move(file_path_p)), context(context), options(std::move(options_p)),
71
+ buffer_index(0) {
72
+ }
73
+
74
+ void BufferedJSONReader::OpenJSONFile() {
75
+ lock_guard<mutex> guard(lock);
76
+ auto &file_system = FileSystem::GetFileSystem(context);
77
+ auto file_opener = FileOpener::Get(context);
78
+ auto regular_file_handle = file_system.OpenFile(file_path.c_str(), FileFlags::FILE_FLAGS_READ,
79
+ FileLockType::NO_LOCK, options.compression, file_opener);
80
+ file_handle = make_unique<JSONFileHandle>(std::move(regular_file_handle));
81
+ }
82
+
83
+ bool BufferedJSONReader::IsOpen() {
84
+ return file_handle != nullptr;
85
+ }
86
+
87
+ BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() {
88
+ return options;
89
+ }
90
+
91
+ JSONFileHandle &BufferedJSONReader::GetFileHandle() const {
92
+ return *file_handle;
93
+ }
94
+
95
+ void BufferedJSONReader::InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer) {
96
+ lock_guard<mutex> guard(lock);
97
+ buffer_map.insert(make_pair(buffer_idx, std::move(buffer)));
98
+ }
99
+
100
+ JSONBufferHandle *BufferedJSONReader::GetBuffer(idx_t buffer_idx) {
101
+ lock_guard<mutex> guard(lock);
102
+ auto it = buffer_map.find(buffer_idx);
103
+ return it == buffer_map.end() ? nullptr : it->second.get();
104
+ }
105
+
106
+ AllocatedData BufferedJSONReader::RemoveBuffer(idx_t buffer_idx) {
107
+ lock_guard<mutex> guard(lock);
108
+ auto it = buffer_map.find(buffer_idx);
109
+ D_ASSERT(it != buffer_map.end());
110
+ auto result = std::move(it->second->buffer);
111
+ buffer_map.erase(it);
112
+ return result;
113
+ }
114
+
115
+ idx_t BufferedJSONReader::GetBufferIndex() {
116
+ return buffer_index++;
117
+ }
118
+
119
+ double BufferedJSONReader::GetProgress() const {
120
+ if (file_handle) {
121
+ return 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
122
+ } else {
123
+ return 0;
124
+ }
125
+ }
126
+
127
+ } // namespace duckdb
@@ -0,0 +1,123 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // buffered_json_reader.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/atomic.hpp"
12
+ #include "duckdb/common/enums/file_compression_type.hpp"
13
+ #include "duckdb/common/mutex.hpp"
14
+ #include "json_common.hpp"
15
+
16
+ namespace duckdb {
17
+
18
+ struct FileHandle;
19
+
20
+ enum class JSONFormat : uint8_t {
21
+ //! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
22
+ AUTO_DETECT = 0,
23
+ //! One object after another, newlines can be anywhere
24
+ UNSTRUCTURED = 1,
25
+ //! Objects are separated by newlines, newlines do not occur within objects (NDJSON)
26
+ NEWLINE_DELIMITED = 2,
27
+ };
28
+
29
+ struct BufferedJSONReaderOptions {
30
+ public:
31
+ //! The file path of the JSON file to read
32
+ string file_path;
33
+ //! The format of the JSON
34
+ JSONFormat format = JSONFormat::AUTO_DETECT;
35
+ //! Whether file is compressed or not, and if so which compression type
36
+ FileCompressionType compression = FileCompressionType::AUTO_DETECT;
37
+
38
+ public:
39
+ void Serialize(FieldWriter &writer);
40
+ void Deserialize(FieldReader &reader);
41
+ };
42
+
43
+ struct JSONBufferHandle {
44
+ public:
45
+ JSONBufferHandle(idx_t buffer_index, idx_t readers, AllocatedData &&buffer, idx_t buffer_size);
46
+
47
+ public:
48
+ //! Buffer index (within same file)
49
+ const idx_t buffer_index;
50
+
51
+ //! Number of readers for this buffer
52
+ atomic<idx_t> readers;
53
+ //! The buffer
54
+ AllocatedData buffer;
55
+ //! The size of the data in the buffer (can be less than buffer.GetSize())
56
+ const idx_t buffer_size;
57
+ };
58
+
59
+ struct JSONFileHandle {
60
+ public:
61
+ explicit JSONFileHandle(unique_ptr<FileHandle> file_handle);
62
+
63
+ idx_t FileSize() const;
64
+ idx_t Remaining() const;
65
+
66
+ bool CanSeek() const;
67
+ bool PlainFileSource() const;
68
+
69
+ idx_t GetPositionAndSize(idx_t &position, idx_t requested_size);
70
+ void ReadAtPosition(const char *pointer, idx_t size, idx_t position);
71
+ idx_t Read(const char *pointer, idx_t requested_size);
72
+
73
+ private:
74
+ //! The JSON file handle
75
+ unique_ptr<FileHandle> file_handle;
76
+
77
+ //! File properties
78
+ const bool can_seek;
79
+ const bool plain_file_source;
80
+ const idx_t file_size;
81
+
82
+ //! Read properties
83
+ idx_t read_position;
84
+ };
85
+
86
+ class BufferedJSONReader {
87
+ public:
88
+ BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, idx_t file_index, string file_path);
89
+
90
+ void OpenJSONFile();
91
+ bool IsOpen();
92
+
93
+ BufferedJSONReaderOptions &GetOptions();
94
+ JSONFileHandle &GetFileHandle() const;
95
+
96
+ void InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer);
97
+ JSONBufferHandle *GetBuffer(idx_t buffer_idx);
98
+ AllocatedData RemoveBuffer(idx_t buffer_idx);
99
+ idx_t GetBufferIndex();
100
+
101
+ double GetProgress() const;
102
+
103
+ public:
104
+ mutex lock;
105
+
106
+ //! File index / path
107
+ const idx_t file_index;
108
+ const string file_path;
109
+
110
+ private:
111
+ ClientContext &context;
112
+ BufferedJSONReaderOptions options;
113
+
114
+ //! File handle
115
+ unique_ptr<JSONFileHandle> file_handle;
116
+
117
+ //! Next buffer index within the file
118
+ idx_t buffer_index;
119
+ //! Mapping from batch index to currently held buffers
120
+ unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
121
+ };
122
+
123
+ } // namespace duckdb
@@ -11,112 +11,75 @@
11
11
  #include "duckdb/common/operator/cast_operators.hpp"
12
12
  #include "duckdb/common/operator/decimal_cast_operators.hpp"
13
13
  #include "duckdb/common/operator/string_cast.hpp"
14
- #include "duckdb/execution/expression_executor.hpp"
15
14
  #include "duckdb/planner/expression/bound_function_expression.hpp"
16
15
  #include "yyjson.hpp"
17
16
 
18
17
  namespace duckdb {
19
18
 
20
- struct JSONReadFunctionData : public FunctionData {
19
+ class JSONAllocator {
21
20
  public:
22
- JSONReadFunctionData(bool constant, string path_p, idx_t len);
23
- unique_ptr<FunctionData> Copy() const override;
24
- bool Equals(const FunctionData &other_p) const override;
25
- static unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
26
- vector<unique_ptr<Expression>> &arguments);
27
-
28
- public:
29
- const bool constant;
30
- const string path;
31
- const char *ptr;
32
- const size_t len;
33
- };
34
-
35
- struct JSONReadManyFunctionData : public FunctionData {
36
- public:
37
- JSONReadManyFunctionData(vector<string> paths_p, vector<size_t> lens_p);
38
- unique_ptr<FunctionData> Copy() const override;
39
- bool Equals(const FunctionData &other_p) const override;
40
- static unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
41
- vector<unique_ptr<Expression>> &arguments);
42
-
43
- public:
44
- const vector<string> paths;
45
- vector<const char *> ptrs;
46
- const vector<size_t> lens;
47
- };
48
-
49
- template <class YYJSON_DOC_T>
50
- static inline void CleanupDoc(YYJSON_DOC_T *doc) {
51
- throw InternalException("Unknown yyjson document type");
52
- }
21
+ explicit JSONAllocator(Allocator &allocator)
22
+ : arena_allocator(allocator), yyjson_allocator({Allocate, Reallocate, Free, &arena_allocator}) {
23
+ }
53
24
 
54
- template <>
55
- inline void CleanupDoc(yyjson_doc *doc) {
56
- yyjson_doc_free(doc);
57
- }
25
+ inline yyjson_alc *GetYYJSONAllocator() {
26
+ return &yyjson_allocator;
27
+ }
58
28
 
59
- template <>
60
- inline void CleanupDoc(yyjson_mut_doc *doc) {
61
- yyjson_mut_doc_free(doc);
62
- }
29
+ void Reset() {
30
+ arena_allocator.Reset();
31
+ }
63
32
 
64
- template <class YYJSON_DOC_T>
65
- class DocPointer {
66
33
  private:
67
- YYJSON_DOC_T *doc;
68
-
69
- public:
70
- explicit DocPointer(YYJSON_DOC_T *doc) : doc(doc) {
34
+ static inline void *Allocate(void *ctx, size_t size) {
35
+ auto alloc = (ArenaAllocator *)ctx;
36
+ return alloc->AllocateAligned(size);
71
37
  }
72
38
 
73
- DocPointer(const DocPointer &obj) = delete;
74
- DocPointer &operator=(const DocPointer &obj) = delete;
75
-
76
- DocPointer(DocPointer &&other) noexcept {
77
- this->doc = other.doc;
78
- other.doc = nullptr;
39
+ static inline void *Reallocate(void *ctx, void *ptr, size_t old_size, size_t size) {
40
+ auto alloc = (ArenaAllocator *)ctx;
41
+ return alloc->ReallocateAligned((data_ptr_t)ptr, old_size, size);
79
42
  }
80
43
 
81
- void operator=(DocPointer &&other) noexcept {
82
- CleanupDoc<YYJSON_DOC_T>(doc);
83
- this->ptr = other.ptr;
84
- other.ptr = nullptr;
44
+ static inline void Free(void *ctx, void *ptr) {
45
+ // NOP because ArenaAllocator can't free
85
46
  }
86
47
 
87
- inline YYJSON_DOC_T *operator*() const {
88
- return doc;
89
- }
48
+ private:
49
+ ArenaAllocator arena_allocator;
50
+ yyjson_alc yyjson_allocator;
51
+ };
90
52
 
91
- inline YYJSON_DOC_T *operator->() const {
92
- return doc;
93
- }
53
+ struct JSONCommon {
54
+ public:
55
+ static constexpr auto JSON_TYPE_NAME = "JSON";
94
56
 
95
- inline bool IsNull() const {
96
- return doc == nullptr;
57
+ static const LogicalType JSONType() {
58
+ auto json_type = LogicalType(LogicalTypeId::VARCHAR);
59
+ json_type.SetAlias(JSON_TYPE_NAME);
60
+ return json_type;
97
61
  }
98
62
 
99
- ~DocPointer() {
100
- CleanupDoc<YYJSON_DOC_T>(doc);
63
+ static bool LogicalTypeIsJSON(const LogicalType &type) {
64
+ return type.id() == LogicalTypeId::VARCHAR && type.HasAlias() && type.GetAlias() == JSON_TYPE_NAME;
101
65
  }
102
- };
103
66
 
104
- struct JSONCommon {
105
- private:
106
- //! Read/Write flag that make sense for us
67
+ public:
68
+ //! Read/Write flags
107
69
  static constexpr auto READ_FLAG = YYJSON_READ_ALLOW_INF_AND_NAN | YYJSON_READ_ALLOW_TRAILING_COMMAS;
70
+ static constexpr auto STOP_READ_FLAG = READ_FLAG | YYJSON_READ_STOP_WHEN_DONE | YYJSON_READ_INSITU;
108
71
  static constexpr auto WRITE_FLAG = YYJSON_WRITE_ALLOW_INF_AND_NAN;
109
72
 
110
73
  public:
111
74
  //! Constant JSON type strings
112
- static constexpr auto TYPE_STRING_NULL = "NULL";
113
- static constexpr auto TYPE_STRING_BOOLEAN = "BOOLEAN";
114
- static constexpr auto TYPE_STRING_BIGINT = "BIGINT";
115
- static constexpr auto TYPE_STRING_UBIGINT = "UBIGINT";
116
- static constexpr auto TYPE_STRING_DOUBLE = "DOUBLE";
117
- static constexpr auto TYPE_STRING_VARCHAR = "VARCHAR";
118
- static constexpr auto TYPE_STRING_ARRAY = "ARRAY";
119
- static constexpr auto TYPE_STRING_OBJECT = "OBJECT";
75
+ static constexpr char const *TYPE_STRING_NULL = "NULL";
76
+ static constexpr char const *TYPE_STRING_BOOLEAN = "BOOLEAN";
77
+ static constexpr char const *TYPE_STRING_BIGINT = "BIGINT";
78
+ static constexpr char const *TYPE_STRING_UBIGINT = "UBIGINT";
79
+ static constexpr char const *TYPE_STRING_DOUBLE = "DOUBLE";
80
+ static constexpr char const *TYPE_STRING_VARCHAR = "VARCHAR";
81
+ static constexpr char const *TYPE_STRING_ARRAY = "ARRAY";
82
+ static constexpr char const *TYPE_STRING_OBJECT = "OBJECT";
120
83
 
121
84
  template <class YYJSON_VAL_T>
122
85
  static inline const char *const ValTypeToString(YYJSON_VAL_T *val) {
@@ -143,59 +106,62 @@ public:
143
106
  }
144
107
  }
145
108
 
146
- public:
147
- static inline DocPointer<yyjson_mut_doc> CreateDocument() {
148
- return DocPointer<yyjson_mut_doc>(yyjson_mut_doc_new(nullptr));
109
+ template <class YYJSON_VAL_T>
110
+ static inline constexpr string_t ValTypeToStringT(YYJSON_VAL_T *val) {
111
+ return string_t(ValTypeToString<YYJSON_VAL_T>(val));
149
112
  }
150
113
 
151
- //! Read JSON document (returns nullptr if invalid JSON)
152
- static inline DocPointer<yyjson_doc> ReadDocumentUnsafe(const string_t &input) {
153
- return DocPointer<yyjson_doc>(yyjson_read(input.GetDataUnsafe(), input.GetSize(), READ_FLAG));
154
- }
155
- //! Read JSON document (throws error if malformed JSON)
156
- static inline DocPointer<yyjson_doc> ReadDocument(const string_t &input) {
157
- auto result = ReadDocumentUnsafe(input);
158
- if (result.IsNull()) {
159
- throw InvalidInputException("malformed JSON");
114
+ public:
115
+ static inline yyjson_mut_doc *CreateDocument(yyjson_alc *alc) {
116
+ D_ASSERT(alc);
117
+ return yyjson_mut_doc_new(alc);
118
+ }
119
+ static inline yyjson_doc *ReadDocumentUnsafe(char *data, idx_t size, const yyjson_read_flag flg, yyjson_alc *alc,
120
+ yyjson_read_err *err = nullptr) {
121
+ D_ASSERT(alc);
122
+ return yyjson_read_opts(data, size, flg, alc, err);
123
+ }
124
+ static inline yyjson_doc *ReadDocumentUnsafe(const string_t &input, const yyjson_read_flag flg, yyjson_alc *alc,
125
+ yyjson_read_err *err = nullptr) {
126
+ return ReadDocumentUnsafe(input.GetDataWriteable(), input.GetSize(), flg, alc, err);
127
+ }
128
+ static inline yyjson_doc *ReadDocument(char *data, idx_t size, const yyjson_read_flag flg, yyjson_alc *alc) {
129
+ yyjson_read_err error;
130
+ auto result = ReadDocumentUnsafe(data, size, flg, alc, &error);
131
+ if (error.code != YYJSON_READ_SUCCESS) {
132
+ ThrowParseError(data, size, error);
160
133
  }
161
134
  return result;
162
135
  }
163
- //! Some wrappers around writes so we don't have to free the malloc'ed char[]
164
- static inline unique_ptr<char, void (*)(void *)> WriteVal(yyjson_val *val, idx_t &len) {
165
- return unique_ptr<char, decltype(free) *>(
166
- reinterpret_cast<char *>(yyjson_val_write(val, WRITE_FLAG, (size_t *)&len)), free);
167
- }
168
- static inline unique_ptr<char, void (*)(void *)> WriteVal(yyjson_mut_val *val, idx_t &len) {
169
- return unique_ptr<char, decltype(free) *>(
170
- reinterpret_cast<char *>(yyjson_mut_val_write(val, WRITE_FLAG, (size_t *)&len)), free);
136
+ static inline yyjson_doc *ReadDocument(const string_t &input, const yyjson_read_flag flg, yyjson_alc *alc) {
137
+ return ReadDocument(input.GetDataWriteable(), input.GetSize(), flg, alc);
171
138
  }
172
- static unique_ptr<char, void (*)(void *)> WriteMutDoc(yyjson_mut_doc *doc, idx_t &len) {
173
- return unique_ptr<char, decltype(free) *>(
174
- reinterpret_cast<char *>(yyjson_mut_write(doc, WRITE_FLAG, (size_t *)&len)), free);
139
+ static string FormatParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") {
140
+ D_ASSERT(error.code != YYJSON_READ_SUCCESS);
141
+ // Truncate, so we don't print megabytes worth of JSON
142
+ string input = length > 50 ? string(data, 47) + "..." : string(data, length);
143
+ // Have to replace \r, otherwise output is unreadable
144
+ input = StringUtil::Replace(input, "\r", "\\r");
145
+ return StringUtil::Format("Malformed JSON at byte %lld of input: %s. %s Input: %s", error.pos, error.msg, extra,
146
+ input);
175
147
  }
176
- //! Vector writes
177
- static inline string_t WriteVal(yyjson_val *val, Vector &vector) {
178
- idx_t len;
179
- auto data = WriteVal(val, len);
180
- return StringVector::AddString(vector, data.get(), len);
148
+ static void ThrowParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") {
149
+ throw InvalidInputException(FormatParseError(data, length, error, extra));
181
150
  }
182
- static inline string_t WriteVal(yyjson_mut_val *val, Vector &vector) {
183
- idx_t len;
184
- auto data = WriteVal(val, len);
185
- return StringVector::AddString(vector, data.get(), len);
151
+
152
+ template <class YYJSON_VAL_T>
153
+ static inline char *WriteVal(YYJSON_VAL_T *val, yyjson_alc *alc, idx_t &len) {
154
+ throw InternalException("Unknown yyjson val type");
186
155
  }
187
- static inline string_t WriteDoc(yyjson_mut_doc *doc, Vector &vector) {
156
+ template <class YYJSON_VAL_T>
157
+ static inline string_t WriteVal(YYJSON_VAL_T *val, yyjson_alc *alc) {
158
+ D_ASSERT(alc);
188
159
  idx_t len;
189
- auto data = WriteMutDoc(doc, len);
190
- return StringVector::AddString(vector, data.get(), len);
160
+ auto data = WriteVal<YYJSON_VAL_T>(val, alc, len);
161
+ return string_t(data, len);
191
162
  }
192
163
  //! Throw an error with the printed yyjson_val
193
- static void ThrowValFormatError(string error_string, yyjson_val *val) {
194
- idx_t len;
195
- auto data = WriteVal(val, len);
196
- error_string = StringUtil::Format(error_string, string(data.get(), len));
197
- throw InvalidInputException(error_string);
198
- }
164
+ static void ThrowValFormatError(string error_string, yyjson_val *val);
199
165
 
200
166
  public:
201
167
  //! Validate path with $ syntax
@@ -241,116 +207,6 @@ public:
241
207
  }
242
208
  }
243
209
 
244
- public:
245
- //! Single-argument JSON read function, i.e. json_type('[1, 2, 3]')
246
- template <class T>
247
- static void UnaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
248
- std::function<T(yyjson_val *, Vector &)> fun) {
249
- auto &inputs = args.data[0];
250
- UnaryExecutor::Execute<string_t, T>(inputs, result, args.size(), [&](string_t input) {
251
- auto doc = JSONCommon::ReadDocument(input);
252
- return fun(doc->root, result);
253
- });
254
- }
255
-
256
- //! Two-argument JSON read function (with path query), i.e. json_type('[1, 2, 3]', '$[0]')
257
- template <class T>
258
- static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
259
- std::function<T(yyjson_val *, Vector &)> fun) {
260
- auto &func_expr = (BoundFunctionExpression &)state.expr;
261
- const auto &info = (JSONReadFunctionData &)*func_expr.bind_info;
262
-
263
- auto &inputs = args.data[0];
264
- if (info.constant) {
265
- // Constant path
266
- const char *ptr = info.ptr;
267
- const idx_t &len = info.len;
268
- UnaryExecutor::ExecuteWithNulls<string_t, T>(
269
- inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
270
- auto doc = ReadDocument(input);
271
- yyjson_val *val;
272
- if (!(val = GetPointerUnsafe<yyjson_val>(doc->root, ptr, len))) {
273
- mask.SetInvalid(idx);
274
- return T {};
275
- } else {
276
- return fun(val, result);
277
- }
278
- });
279
- } else {
280
- // Columnref path
281
- auto &paths = args.data[1];
282
- BinaryExecutor::ExecuteWithNulls<string_t, string_t, T>(
283
- inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
284
- auto doc = ReadDocument(input);
285
- yyjson_val *val;
286
- if (!(val = GetPointer<yyjson_val>(doc->root, path))) {
287
- mask.SetInvalid(idx);
288
- return T {};
289
- } else {
290
- return fun(val, result);
291
- }
292
- });
293
- }
294
- if (args.AllConstant()) {
295
- result.SetVectorType(VectorType::CONSTANT_VECTOR);
296
- }
297
- }
298
-
299
- //! JSON read function with list of path queries, i.e. json_type('[1, 2, 3]', ['$[0]', '$[1]'])
300
- template <class T>
301
- static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result,
302
- std::function<T(yyjson_val *, Vector &)> fun) {
303
- auto &func_expr = (BoundFunctionExpression &)state.expr;
304
- const auto &info = (JSONReadManyFunctionData &)*func_expr.bind_info;
305
- D_ASSERT(info.ptrs.size() == info.lens.size());
306
-
307
- const auto count = args.size();
308
- const idx_t num_paths = info.ptrs.size();
309
- const idx_t list_size = count * num_paths;
310
-
311
- UnifiedVectorFormat input_data;
312
- auto &input_vector = args.data[0];
313
- input_vector.ToUnifiedFormat(count, input_data);
314
- auto inputs = (string_t *)input_data.data;
315
-
316
- ListVector::Reserve(result, list_size);
317
- auto list_entries = FlatVector::GetData<list_entry_t>(result);
318
- auto &list_validity = FlatVector::Validity(result);
319
-
320
- auto &child = ListVector::GetEntry(result);
321
- auto child_data = FlatVector::GetData<T>(child);
322
- auto &child_validity = FlatVector::Validity(child);
323
-
324
- idx_t offset = 0;
325
- yyjson_val *val;
326
- for (idx_t i = 0; i < count; i++) {
327
- auto idx = input_data.sel->get_index(i);
328
- if (!input_data.validity.RowIsValid(idx)) {
329
- list_validity.SetInvalid(i);
330
- continue;
331
- }
332
-
333
- auto doc = ReadDocument(inputs[idx]);
334
- for (idx_t path_i = 0; path_i < num_paths; path_i++) {
335
- auto child_idx = offset + path_i;
336
- if (!(val = GetPointerUnsafe<yyjson_val>(doc->root, info.ptrs[path_i], info.lens[path_i]))) {
337
- child_validity.SetInvalid(child_idx);
338
- } else {
339
- child_data[child_idx] = fun(val, child);
340
- }
341
- }
342
-
343
- list_entries[i].offset = offset;
344
- list_entries[i].length = num_paths;
345
- offset += num_paths;
346
- }
347
- ListVector::SetListSize(result, offset);
348
-
349
- if (args.AllConstant()) {
350
- result.SetVectorType(VectorType::CONSTANT_VECTOR);
351
- }
352
- }
353
-
354
210
  private:
355
211
  //! Get JSON pointer using /field/index/... notation
356
212
  template <class YYJSON_VAL_T>
@@ -502,6 +358,15 @@ private:
502
358
  }
503
359
  };
504
360
 
361
+ template <>
362
+ inline char *JSONCommon::WriteVal(yyjson_val *val, yyjson_alc *alc, idx_t &len) {
363
+ return yyjson_val_write_opts(val, JSONCommon::WRITE_FLAG, alc, (size_t *)&len, nullptr);
364
+ }
365
+ template <>
366
+ inline char *JSONCommon::WriteVal(yyjson_mut_val *val, yyjson_alc *alc, idx_t &len) {
367
+ return yyjson_mut_val_write_opts(val, JSONCommon::WRITE_FLAG, alc, (size_t *)&len, nullptr);
368
+ }
369
+
505
370
  template <>
506
371
  inline yyjson_val *JSONCommon::TemplatedGetPointer(yyjson_val *root, const char *ptr, const idx_t &len) {
507
372
  return len == 1 ? root : unsafe_yyjson_get_pointer(root, ptr, len);