duckdb 0.7.2-dev3515.0 → 0.7.2-dev3666.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/configure.py +2 -0
  2. package/package.json +1 -1
  3. package/src/database.cpp +1 -0
  4. package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
  5. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
  6. package/src/duckdb/extension/json/include/json_common.hpp +5 -4
  7. package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
  8. package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
  9. package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
  10. package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
  11. package/src/duckdb/extension/json/json_common.cpp +1 -1
  12. package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
  13. package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
  14. package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
  15. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
  16. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
  17. package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
  18. package/src/duckdb/extension/json/json_functions/json_transform.cpp +10 -8
  19. package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
  20. package/src/duckdb/extension/json/json_functions/read_json.cpp +167 -169
  21. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
  22. package/src/duckdb/extension/json/json_functions.cpp +11 -4
  23. package/src/duckdb/extension/json/json_scan.cpp +593 -374
  24. package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
  25. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +42 -0
  26. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -0
  27. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  28. package/src/duckdb/src/common/constants.cpp +1 -0
  29. package/src/duckdb/src/common/file_system.cpp +26 -6
  30. package/src/duckdb/src/common/local_file_system.cpp +0 -13
  31. package/src/duckdb/src/common/types/vector.cpp +3 -3
  32. package/src/duckdb/src/common/types/vector_buffer.cpp +11 -3
  33. package/src/duckdb/src/common/types/vector_cache.cpp +5 -5
  34. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
  35. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
  36. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -2
  37. package/src/duckdb/src/function/macro_function.cpp +43 -0
  38. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -3
  39. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -0
  40. package/src/duckdb/src/function/scalar_macro_function.cpp +10 -0
  41. package/src/duckdb/src/function/table/copy_csv.cpp +68 -18
  42. package/src/duckdb/src/function/table/read_csv.cpp +30 -3
  43. package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
  44. package/src/duckdb/src/function/table_macro_function.cpp +10 -0
  45. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -1
  47. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp +0 -6
  48. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp +0 -6
  49. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  52. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
  53. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +3 -3
  54. package/src/duckdb/src/include/duckdb/common/file_system.hpp +5 -0
  55. package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
  56. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
  57. package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
  58. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
  59. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
  60. package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
  62. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  66. package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -7
  67. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
  68. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
  69. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
  70. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +12 -2
  72. package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
  73. package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +9 -5
  76. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
  78. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +7 -1
  79. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +3 -4
  80. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +7 -2
  81. package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +5 -0
  83. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
  84. package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
  85. package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
  86. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
  87. package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
  88. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  89. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
  90. package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
  91. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
  93. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +4 -7
  94. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +8 -12
  95. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +6 -20
  96. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +6 -18
  97. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +4 -8
  98. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +4 -38
  99. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +5 -2
  100. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -10
  101. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
  102. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
  103. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
  104. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
  105. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
  106. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +42 -0
  107. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +0 -7
  108. package/src/duckdb/src/parser/parsed_data/create_info.cpp +19 -8
  109. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +46 -0
  110. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +56 -0
  111. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +47 -0
  112. package/src/duckdb/src/parser/parsed_data/detach_info.cpp +34 -0
  113. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +46 -0
  114. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +24 -0
  115. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +37 -0
  116. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +27 -9
  117. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
  118. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -1
  119. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  120. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
  121. package/src/duckdb/src/planner/logical_operator.cpp +1 -2
  122. package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -25
  123. package/src/duckdb/src/planner/operator/logical_insert.cpp +30 -0
  124. package/src/duckdb/src/planner/operator/logical_simple.cpp +33 -5
  125. package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +6 -16
  126. package/src/duckdb/src/planner/planner.cpp +4 -13
  127. package/src/duckdb/src/storage/checkpoint_manager.cpp +12 -6
  128. package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
  129. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  130. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
  131. package/src/duckdb/ub_src_catalog_catalog_entry.cpp +1 -1
  132. package/src/duckdb/ub_src_parser_parsed_data.cpp +16 -0
  133. package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +0 -104
package/configure.py CHANGED
@@ -63,6 +63,8 @@ elif 'DUCKDB_NODE_BINDIR' in os.environ:
63
63
  cflags += ['-g']
64
64
  if '-O0' in os.environ['DUCKDB_NODE_CFLAGS']:
65
65
  cflags += ['-O0']
66
+ if '-DNDEBUG' in os.environ['DUCKDB_NODE_CFLAGS']:
67
+ defines += ['NDEBUG']
66
68
 
67
69
  if 'DUCKDB_NODE_BUILD_CACHE' in os.environ:
68
70
  cache = {
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev3515.0",
5
+ "version": "0.7.2-dev3666.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/database.cpp CHANGED
@@ -328,6 +328,7 @@ ScanReplacement(duckdb::ClientContext &context, const std::string &table_name, d
328
328
  children.push_back(duckdb::make_uniq<duckdb::ConstantExpression>(std::move(param)));
329
329
  }
330
330
  table_function->function = duckdb::make_uniq<duckdb::FunctionExpression>(jsargs.function, std::move(children));
331
+ table_function->alias = table_name;
331
332
  return std::move(table_function);
332
333
  }
333
334
  return nullptr;
@@ -6,30 +6,35 @@
6
6
 
7
7
  namespace duckdb {
8
8
 
9
- void BufferedJSONReaderOptions::Serialize(FieldWriter &writer) {
10
- writer.WriteString(file_path);
9
+ void BufferedJSONReaderOptions::Serialize(FieldWriter &writer) const {
11
10
  writer.WriteField<JSONFormat>(format);
11
+ writer.WriteField<JSONRecordType>(record_type);
12
12
  writer.WriteField<FileCompressionType>(compression);
13
+ writer.WriteSerializable(file_options);
13
14
  }
14
15
 
15
16
  void BufferedJSONReaderOptions::Deserialize(FieldReader &reader) {
16
- file_path = reader.ReadRequired<string>();
17
17
  format = reader.ReadRequired<JSONFormat>();
18
+ record_type = reader.ReadRequired<JSONRecordType>();
18
19
  compression = reader.ReadRequired<FileCompressionType>();
20
+ file_options = reader.ReadRequiredSerializable<MultiFileReaderOptions, MultiFileReaderOptions>();
19
21
  }
20
22
 
21
23
  JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, AllocatedData &&buffer_p, idx_t buffer_size_p)
22
24
  : buffer_index(buffer_index_p), readers(readers_p), buffer(std::move(buffer_p)), buffer_size(buffer_size_p) {
23
25
  }
24
26
 
25
- JSONFileHandle::JSONFileHandle(duckdb::unique_ptr<FileHandle> file_handle_p, Allocator &allocator_p)
27
+ JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p, Allocator &allocator_p)
26
28
  : file_handle(std::move(file_handle_p)), allocator(allocator_p), can_seek(file_handle->CanSeek()),
27
29
  plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()), read_position(0),
28
30
  requested_reads(0), actual_reads(0), cached_size(0) {
29
31
  }
30
32
 
31
33
  void JSONFileHandle::Close() {
32
- file_handle->Close();
34
+ if (file_handle) {
35
+ file_handle->Close();
36
+ file_handle = nullptr;
37
+ }
33
38
  cached_buffers.clear();
34
39
  }
35
40
 
@@ -155,15 +160,15 @@ idx_t JSONFileHandle::ReadInternal(const char *pointer, const idx_t requested_si
155
160
  return total_read_size;
156
161
  }
157
162
 
158
- BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string file_path_p)
159
- : file_path(std::move(file_path_p)), context(context), options(std::move(options_p)), buffer_index(0) {
163
+ BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string file_name_p)
164
+ : context(context), options(options_p), file_name(std::move(file_name_p)), buffer_index(0) {
160
165
  }
161
166
 
162
167
  void BufferedJSONReader::OpenJSONFile() {
163
168
  lock_guard<mutex> guard(lock);
164
169
  auto &file_system = FileSystem::GetFileSystem(context);
165
170
  auto regular_file_handle =
166
- file_system.OpenFile(file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, options.compression);
171
+ file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, options.compression);
167
172
  file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
168
173
  }
169
174
 
@@ -177,7 +182,7 @@ void BufferedJSONReader::CloseJSONFile() {
177
182
  }
178
183
  }
179
184
 
180
- bool BufferedJSONReader::IsOpen() {
185
+ bool BufferedJSONReader::IsOpen() const {
181
186
  return file_handle != nullptr;
182
187
  }
183
188
 
@@ -185,11 +190,41 @@ BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() {
185
190
  return options;
186
191
  }
187
192
 
193
+ const BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() const {
194
+ return options;
195
+ }
196
+
197
+ JSONFormat BufferedJSONReader::GetFormat() const {
198
+ return options.format;
199
+ }
200
+
201
+ void BufferedJSONReader::SetFormat(JSONFormat format) {
202
+ D_ASSERT(options.format == JSONFormat::AUTO_DETECT);
203
+ options.format = format;
204
+ }
205
+
206
+ JSONRecordType BufferedJSONReader::GetRecordType() const {
207
+ return options.record_type;
208
+ }
209
+
210
+ void BufferedJSONReader::SetRecordType(duckdb::JSONRecordType type) {
211
+ D_ASSERT(options.record_type == JSONRecordType::AUTO_DETECT);
212
+ options.record_type = type;
213
+ }
214
+
215
+ bool BufferedJSONReader::IsParallel() const {
216
+ return options.format == JSONFormat::NEWLINE_DELIMITED && file_handle->CanSeek();
217
+ }
218
+
219
+ const string &BufferedJSONReader::GetFileName() const {
220
+ return file_name;
221
+ }
222
+
188
223
  JSONFileHandle &BufferedJSONReader::GetFileHandle() const {
189
224
  return *file_handle;
190
225
  }
191
226
 
192
- void BufferedJSONReader::InsertBuffer(idx_t buffer_idx, duckdb::unique_ptr<JSONBufferHandle> &&buffer) {
227
+ void BufferedJSONReader::InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer) {
193
228
  lock_guard<mutex> guard(lock);
194
229
  buffer_map.insert(make_pair(buffer_idx, std::move(buffer)));
195
230
  }
@@ -220,7 +255,7 @@ void BufferedJSONReader::SetBufferLineOrObjectCount(idx_t index, idx_t count) {
220
255
  }
221
256
 
222
257
  idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf) {
223
- D_ASSERT(options.format == JSONFormat::UNSTRUCTURED || options.format == JSONFormat::NEWLINE_DELIMITED);
258
+ D_ASSERT(options.format != JSONFormat::AUTO_DETECT);
224
259
  while (true) {
225
260
  lock_guard<mutex> guard(lock);
226
261
  idx_t line = line_or_object_in_buf;
@@ -243,23 +278,23 @@ idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in
243
278
 
244
279
  void BufferedJSONReader::ThrowParseError(idx_t buf_index, idx_t line_or_object_in_buf, yyjson_read_err &err,
245
280
  const string &extra) {
246
- string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "object";
281
+ string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
247
282
  auto line = GetLineNumber(buf_index, line_or_object_in_buf);
248
- throw InvalidInputException("Malformed JSON in file \"%s\", at byte %llu in %s %llu: %s. %s", file_path,
283
+ throw InvalidInputException("Malformed JSON in file \"%s\", at byte %llu in %s %llu: %s. %s", file_name,
249
284
  err.pos + 1, unit, line + 1, err.msg, extra);
250
285
  }
251
286
 
252
287
  void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_object_in_buf,
253
288
  const string &error_message) {
254
- string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "object";
289
+ string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
255
290
  auto line = GetLineNumber(buf_index, line_or_object_in_buf);
256
- throw InvalidInputException("JSON transform error in file \"%s\", in %s %llu: %s", file_path, unit, line,
291
+ throw InvalidInputException("JSON transform error in file \"%s\", in %s %llu: %s.", file_name, unit, line,
257
292
  error_message);
258
293
  }
259
294
 
260
295
  double BufferedJSONReader::GetProgress() const {
261
- if (file_handle) {
262
- return 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
296
+ if (IsOpen()) {
297
+ return 100.0 - 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
263
298
  } else {
264
299
  return 0;
265
300
  }
@@ -270,6 +305,10 @@ void BufferedJSONReader::Reset() {
270
305
  buffer_map.clear();
271
306
  buffer_line_or_object_counts.clear();
272
307
 
308
+ if (!file_handle) {
309
+ return;
310
+ }
311
+
273
312
  if (file_handle->CanSeek()) {
274
313
  file_handle->Seek(0);
275
314
  } else {
@@ -11,6 +11,7 @@
11
11
  #include "duckdb/common/atomic.hpp"
12
12
  #include "duckdb/common/enums/file_compression_type.hpp"
13
13
  #include "duckdb/common/file_system.hpp"
14
+ #include "duckdb/common/multi_file_reader.hpp"
14
15
  #include "duckdb/common/mutex.hpp"
15
16
  #include "json_common.hpp"
16
17
 
@@ -19,23 +20,35 @@ namespace duckdb {
19
20
  enum class JSONFormat : uint8_t {
20
21
  //! Auto-detect format (UNSTRUCTURED / NEWLINE_DELIMITED)
21
22
  AUTO_DETECT = 0,
22
- //! One object after another, newlines can be anywhere
23
+ //! One unit after another, newlines can be anywhere
23
24
  UNSTRUCTURED = 1,
24
- //! Objects are separated by newlines, newlines do not occur within values (NDJSON)
25
+ //! Units are separated by newlines, newlines do not occur within Units (NDJSON)
25
26
  NEWLINE_DELIMITED = 2,
27
+ //! File is one big array of units
28
+ ARRAY = 3,
29
+ };
30
+
31
+ enum class JSONRecordType : uint8_t {
32
+ AUTO_DETECT = 0,
33
+ //! Sequential objects that are unpacked
34
+ RECORDS = 1,
35
+ //! Any other JSON type, e.g., ARRAY
36
+ VALUES = 2,
26
37
  };
27
38
 
28
39
  struct BufferedJSONReaderOptions {
29
40
  public:
30
- //! The file path of the JSON file to read
31
- string file_path;
32
41
  //! The format of the JSON
33
42
  JSONFormat format = JSONFormat::AUTO_DETECT;
43
+ //! Whether record types in the JSON
44
+ JSONRecordType record_type = JSONRecordType::AUTO_DETECT;
34
45
  //! Whether file is compressed or not, and if so which compression type
35
46
  FileCompressionType compression = FileCompressionType::AUTO_DETECT;
47
+ //! Multi-file reader options
48
+ MultiFileReaderOptions file_options;
36
49
 
37
50
  public:
38
- void Serialize(FieldWriter &writer);
51
+ void Serialize(FieldWriter &writer) const;
39
52
  void Deserialize(FieldReader &reader);
40
53
  };
41
54
 
@@ -57,7 +70,7 @@ public:
57
70
 
58
71
  struct JSONFileHandle {
59
72
  public:
60
- JSONFileHandle(duckdb::unique_ptr<FileHandle> file_handle, Allocator &allocator);
73
+ JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
61
74
  void Close();
62
75
 
63
76
  idx_t FileSize() const;
@@ -79,7 +92,7 @@ private:
79
92
 
80
93
  private:
81
94
  //! The JSON file handle
82
- duckdb::unique_ptr<FileHandle> file_handle;
95
+ unique_ptr<FileHandle> file_handle;
83
96
  Allocator &allocator;
84
97
 
85
98
  //! File properties
@@ -99,17 +112,50 @@ private:
99
112
 
100
113
  class BufferedJSONReader {
101
114
  public:
102
- BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_path);
115
+ BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_name);
103
116
 
117
+ private:
118
+ ClientContext &context;
119
+ BufferedJSONReaderOptions options;
120
+
121
+ //! File name
122
+ const string file_name;
123
+ //! File handle
124
+ unique_ptr<JSONFileHandle> file_handle;
125
+
126
+ //! Next buffer index within the file
127
+ idx_t buffer_index;
128
+ //! Mapping from batch index to currently held buffers
129
+ unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
130
+
131
+ //! Line count per buffer
132
+ vector<int64_t> buffer_line_or_object_counts;
133
+
134
+ public:
135
+ mutex lock;
136
+ MultiFileReaderData reader_data;
137
+
138
+ public:
104
139
  void OpenJSONFile();
105
140
  void CloseJSONFile();
106
- bool IsOpen();
141
+ bool IsOpen() const;
107
142
 
108
143
  BufferedJSONReaderOptions &GetOptions();
144
+ const BufferedJSONReaderOptions &GetOptions() const;
145
+
146
+ JSONFormat GetFormat() const;
147
+ void SetFormat(JSONFormat format);
148
+ JSONRecordType GetRecordType() const;
149
+ void SetRecordType(JSONRecordType type);
150
+
151
+ bool IsParallel() const;
152
+
153
+ const string &GetFileName() const;
109
154
  JSONFileHandle &GetFileHandle() const;
110
155
 
156
+ public:
111
157
  //! Insert/get/remove buffer (grabs the lock)
112
- void InsertBuffer(idx_t buffer_idx, duckdb::unique_ptr<JSONBufferHandle> &&buffer);
158
+ void InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer);
113
159
  JSONBufferHandle *GetBuffer(idx_t buffer_idx);
114
160
  AllocatedData RemoveBuffer(idx_t buffer_idx);
115
161
 
@@ -127,27 +173,6 @@ public:
127
173
 
128
174
  private:
129
175
  idx_t GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf);
130
-
131
- public:
132
- mutex lock;
133
-
134
- //! File path
135
- const string file_path;
136
-
137
- private:
138
- ClientContext &context;
139
- BufferedJSONReaderOptions options;
140
-
141
- //! File handle
142
- duckdb::unique_ptr<JSONFileHandle> file_handle;
143
-
144
- //! Next buffer index within the file
145
- idx_t buffer_index;
146
- //! Mapping from batch index to currently held buffers
147
- unordered_map<idx_t, duckdb::unique_ptr<JSONBufferHandle>> buffer_map;
148
-
149
- //! Line count per buffer
150
- vector<int64_t> buffer_line_or_object_counts;
151
176
  };
152
177
 
153
178
  } // namespace duckdb
@@ -22,7 +22,7 @@ public:
22
22
  : arena_allocator(allocator), yyjson_allocator({Allocate, Reallocate, Free, &arena_allocator}) {
23
23
  }
24
24
 
25
- inline yyjson_alc *GetYYJSONAllocator() {
25
+ inline yyjson_alc *GetYYAlc() {
26
26
  return &yyjson_allocator;
27
27
  }
28
28
 
@@ -62,7 +62,7 @@ struct JSONKeyHash {
62
62
  memcpy(&result, k.ptr + k.len - sizeof(size_t), sizeof(size_t));
63
63
  } else {
64
64
  result = 0;
65
- duckdb::FastMemcpy(&result, k.ptr, k.len);
65
+ FastMemcpy(&result, k.ptr, k.len);
66
66
  }
67
67
  return result;
68
68
  }
@@ -73,7 +73,7 @@ struct JSONKeyEquality {
73
73
  if (a.len != b.len) {
74
74
  return false;
75
75
  }
76
- return duckdb::FastMemcmp(a.ptr, b.ptr, a.len) == 0;
76
+ return FastMemcmp(a.ptr, b.ptr, a.len) == 0;
77
77
  }
78
78
  };
79
79
 
@@ -98,7 +98,8 @@ public:
98
98
  public:
99
99
  //! Read/Write flags
100
100
  static constexpr auto READ_FLAG = YYJSON_READ_ALLOW_INF_AND_NAN | YYJSON_READ_ALLOW_TRAILING_COMMAS;
101
- static constexpr auto STOP_READ_FLAG = READ_FLAG | YYJSON_READ_STOP_WHEN_DONE | YYJSON_READ_INSITU;
101
+ static constexpr auto READ_STOP_FLAG = READ_FLAG | YYJSON_READ_STOP_WHEN_DONE;
102
+ static constexpr auto READ_INSITU_FLAG = READ_STOP_FLAG | YYJSON_READ_INSITU;
102
103
  static constexpr auto WRITE_FLAG = YYJSON_WRITE_ALLOW_INF_AND_NAN;
103
104
  static constexpr auto WRITE_PRETTY_FLAG = YYJSON_WRITE_ALLOW_INF_AND_NAN | YYJSON_WRITE_PRETTY;
104
105
 
@@ -20,7 +20,7 @@ public:
20
20
  static void UnaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
21
21
  std::function<T(yyjson_val *, yyjson_alc *, Vector &)> fun) {
22
22
  auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
23
- auto alc = lstate.json_allocator.GetYYJSONAllocator();
23
+ auto alc = lstate.json_allocator.GetYYAlc();
24
24
 
25
25
  auto &inputs = args.data[0];
26
26
  UnaryExecutor::Execute<string_t, T>(inputs, result, args.size(), [&](string_t input) {
@@ -34,36 +34,32 @@ public:
34
34
  static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result,
35
35
  std::function<T(yyjson_val *, yyjson_alc *, Vector &)> fun) {
36
36
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
37
- const auto &info = (JSONReadFunctionData &)*func_expr.bind_info;
37
+ const auto &info = func_expr.bind_info->Cast<JSONReadFunctionData>();
38
38
  auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
39
- auto alc = lstate.json_allocator.GetYYJSONAllocator();
39
+ auto alc = lstate.json_allocator.GetYYAlc();
40
40
 
41
41
  auto &inputs = args.data[0];
42
- if (info.constant) {
43
- // Constant path
42
+ if (info.constant) { // Constant path
44
43
  const char *ptr = info.ptr;
45
44
  const idx_t &len = info.len;
46
45
  UnaryExecutor::ExecuteWithNulls<string_t, T>(
47
46
  inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
48
- auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG,
49
- lstate.json_allocator.GetYYJSONAllocator());
47
+ auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
50
48
  auto val = JSONCommon::GetPointerUnsafe<yyjson_val>(doc->root, ptr, len);
51
- if (!val) {
49
+ if (!val || unsafe_yyjson_is_null(val)) {
52
50
  mask.SetInvalid(idx);
53
51
  return T {};
54
52
  } else {
55
53
  return fun(val, alc, result);
56
54
  }
57
55
  });
58
- } else {
59
- // Columnref path
56
+ } else { // Columnref path
60
57
  auto &paths = args.data[1];
61
58
  BinaryExecutor::ExecuteWithNulls<string_t, string_t, T>(
62
59
  inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
63
- auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG,
64
- lstate.json_allocator.GetYYJSONAllocator());
60
+ auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
65
61
  auto val = JSONCommon::GetPointer<yyjson_val>(doc->root, path);
66
- if (!val) {
62
+ if (!val || unsafe_yyjson_is_null(val)) {
67
63
  mask.SetInvalid(idx);
68
64
  return T {};
69
65
  } else {
@@ -81,9 +77,9 @@ public:
81
77
  static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result,
82
78
  std::function<T(yyjson_val *, yyjson_alc *, Vector &)> fun) {
83
79
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
84
- const auto &info = (JSONReadManyFunctionData &)*func_expr.bind_info;
80
+ const auto &info = func_expr.bind_info->Cast<JSONReadManyFunctionData>();
85
81
  auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
86
- auto alc = lstate.json_allocator.GetYYJSONAllocator();
82
+ auto alc = lstate.json_allocator.GetYYAlc();
87
83
  D_ASSERT(info.ptrs.size() == info.lens.size());
88
84
 
89
85
  const auto count = args.size();
@@ -112,12 +108,11 @@ public:
112
108
  continue;
113
109
  }
114
110
 
115
- auto doc = JSONCommon::ReadDocument(inputs[idx], JSONCommon::READ_FLAG,
116
- lstate.json_allocator.GetYYJSONAllocator());
111
+ auto doc = JSONCommon::ReadDocument(inputs[idx], JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
117
112
  for (idx_t path_i = 0; path_i < num_paths; path_i++) {
118
113
  auto child_idx = offset + path_i;
119
114
  val = JSONCommon::GetPointerUnsafe<yyjson_val>(doc->root, info.ptrs[path_i], info.lens[path_i]);
120
- if (!val) {
115
+ if (!val || unsafe_yyjson_is_null(val)) {
121
116
  child_validity.SetInvalid(child_idx);
122
117
  } else {
123
118
  child_data[child_idx] = fun(val, alc, child);
@@ -115,10 +115,13 @@ private:
115
115
  // Table functions
116
116
  static TableFunctionSet GetReadJSONObjectsFunction();
117
117
  static TableFunctionSet GetReadNDJSONObjectsFunction();
118
+ static TableFunctionSet GetReadJSONObjectsAutoFunction();
119
+
118
120
  static TableFunctionSet GetReadJSONFunction();
119
121
  static TableFunctionSet GetReadNDJSONFunction();
120
122
  static TableFunctionSet GetReadJSONAutoFunction();
121
123
  static TableFunctionSet GetReadNDJSONAutoFunction();
124
+
122
125
  static TableFunctionSet GetExecuteJsonSerializedSqlFunction();
123
126
  };
124
127