duckdb 0.8.2-dev3989.0 → 0.8.2-dev4126.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/binding.gyp +8 -7
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
  5. package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
  6. package/src/duckdb/extension/json/json_scan.cpp +124 -121
  7. package/src/duckdb/extension/parquet/parquet_extension.cpp +23 -13
  8. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
  9. package/src/duckdb/src/common/crypto/md5.cpp +2 -12
  10. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  11. package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
  12. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
  13. package/src/duckdb/src/core_functions/function_list.cpp +8 -0
  14. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
  15. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
  16. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
  17. package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +32 -0
  18. package/src/duckdb/src/execution/index/art/art.cpp +111 -92
  19. package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
  20. package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
  21. package/src/duckdb/src/execution/index/art/node.cpp +109 -203
  22. package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
  23. package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
  24. package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
  25. package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
  26. package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
  27. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
  28. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
  29. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  30. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
  31. package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
  32. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  34. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
  37. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
  38. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
  39. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
  40. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
  41. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
  42. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
  43. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
  44. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
  45. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
  46. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
  47. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
  48. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
  49. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
  50. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
  53. package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
  54. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
  55. package/src/duckdb/src/main/extension/extension_helper.cpp +15 -1
  56. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
  57. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  58. package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
  59. package/src/duckdb/src/storage/data_table.cpp +3 -3
  60. package/src/duckdb/src/storage/index.cpp +7 -1
  61. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
  62. package/src/duckdb/src/storage/standard_buffer_manager.cpp +10 -16
  63. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  64. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  65. package/src/duckdb/src/transaction/commit_state.cpp +5 -1
  66. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +4 -1
  67. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +24 -2
  68. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  69. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
  70. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  71. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  72. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  73. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  74. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
package/binding.gyp CHANGED
@@ -61,6 +61,7 @@
61
61
  "src/duckdb/ub_src_execution.cpp",
62
62
  "src/duckdb/ub_src_execution_expression_executor.cpp",
63
63
  "src/duckdb/ub_src_execution_index_art.cpp",
64
+ "src/duckdb/ub_src_execution_index.cpp",
64
65
  "src/duckdb/ub_src_execution_nested_loop_join.cpp",
65
66
  "src/duckdb/ub_src_execution_operator_aggregate.cpp",
66
67
  "src/duckdb/ub_src_execution_operator_csv_scanner.cpp",
@@ -249,18 +250,18 @@
249
250
  "src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
250
251
  "src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
251
252
  "src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
252
- "src/duckdb/extension/icu/./icu-strptime.cpp",
253
+ "src/duckdb/extension/icu/./icu-timezone.cpp",
253
254
  "src/duckdb/extension/icu/./icu-datepart.cpp",
254
- "src/duckdb/extension/icu/./icu-datesub.cpp",
255
- "src/duckdb/extension/icu/./icu-table-range.cpp",
256
- "src/duckdb/extension/icu/./icu-datetrunc.cpp",
257
255
  "src/duckdb/extension/icu/./icu-timebucket.cpp",
258
- "src/duckdb/extension/icu/./icu-dateadd.cpp",
256
+ "src/duckdb/extension/icu/./icu-datesub.cpp",
259
257
  "src/duckdb/extension/icu/./icu-list-range.cpp",
260
- "src/duckdb/extension/icu/./icu-timezone.cpp",
258
+ "src/duckdb/extension/icu/./icu-makedate.cpp",
261
259
  "src/duckdb/extension/icu/./icu-datefunc.cpp",
260
+ "src/duckdb/extension/icu/./icu-datetrunc.cpp",
261
+ "src/duckdb/extension/icu/./icu-dateadd.cpp",
262
+ "src/duckdb/extension/icu/./icu-table-range.cpp",
262
263
  "src/duckdb/extension/icu/./icu_extension.cpp",
263
- "src/duckdb/extension/icu/./icu-makedate.cpp",
264
+ "src/duckdb/extension/icu/./icu-strptime.cpp",
264
265
  "src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
265
266
  "src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
266
267
  "src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev3989.0",
5
+ "version": "0.8.2-dev4126.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -2,9 +2,10 @@
2
2
 
3
3
  #include "duckdb/common/field_writer.hpp"
4
4
  #include "duckdb/common/file_opener.hpp"
5
- #include "duckdb/common/printer.hpp"
6
- #include "duckdb/common/serializer/format_serializer.hpp"
7
5
  #include "duckdb/common/serializer/format_deserializer.hpp"
6
+ #include "duckdb/common/serializer/format_serializer.hpp"
7
+
8
+ #include <utility>
8
9
 
9
10
  namespace duckdb {
10
11
 
@@ -37,11 +38,24 @@ bool JSONFileHandle::IsOpen() const {
37
38
  }
38
39
 
39
40
  void JSONFileHandle::Close() {
40
- if (file_handle) {
41
+ if (IsOpen()) {
41
42
  file_handle->Close();
42
43
  file_handle = nullptr;
43
44
  }
44
- cached_buffers.clear();
45
+ }
46
+
47
+ void JSONFileHandle::Reset() {
48
+ D_ASSERT(RequestedReadsComplete());
49
+ read_position = 0;
50
+ requested_reads = 0;
51
+ actual_reads = 0;
52
+ if (IsOpen() && plain_file_source) {
53
+ file_handle->Reset();
54
+ }
55
+ }
56
+
57
+ bool JSONFileHandle::RequestedReadsComplete() {
58
+ return requested_reads == actual_reads;
45
59
  }
46
60
 
47
61
  idx_t JSONFileHandle::FileSize() const {
@@ -56,12 +70,9 @@ bool JSONFileHandle::CanSeek() const {
56
70
  return can_seek;
57
71
  }
58
72
 
59
- void JSONFileHandle::Seek(idx_t position) {
60
- file_handle->Seek(position);
61
- }
62
-
63
73
  idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size) {
64
74
  D_ASSERT(requested_size != 0);
75
+
65
76
  position = read_position;
66
77
  auto actual_size = MinValue<idx_t>(requested_size, Remaining());
67
78
  read_position += actual_size;
@@ -77,15 +88,18 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
77
88
  if (plain_file_source) {
78
89
  file_handle->Read(pointer, size, position);
79
90
  actual_reads++;
91
+
80
92
  return;
81
93
  }
82
94
 
83
95
  if (sample_run) { // Cache the buffer
84
96
  file_handle->Read(pointer, size, position);
85
97
  actual_reads++;
98
+
86
99
  cached_buffers.emplace_back(allocator.Allocate(size));
87
100
  memcpy(cached_buffers.back().get(), pointer, size);
88
101
  cached_size += size;
102
+
89
103
  return;
90
104
  }
91
105
 
@@ -93,6 +107,7 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
93
107
  ReadFromCache(pointer, size, position);
94
108
  actual_reads++;
95
109
  }
110
+
96
111
  if (size != 0) {
97
112
  file_handle->Read(pointer, size, position);
98
113
  actual_reads++;
@@ -128,6 +143,19 @@ idx_t JSONFileHandle::Read(char *pointer, idx_t requested_size, bool sample_run)
128
143
  return actual_size;
129
144
  }
130
145
 
146
+ idx_t JSONFileHandle::ReadInternal(char *pointer, const idx_t requested_size) {
147
+ // Deal with reading from pipes
148
+ idx_t total_read_size = 0;
149
+ while (total_read_size < requested_size) {
150
+ auto read_size = file_handle->Read(pointer + total_read_size, requested_size - total_read_size);
151
+ if (read_size == 0) {
152
+ break;
153
+ }
154
+ total_read_size += read_size;
155
+ }
156
+ return total_read_size;
157
+ }
158
+
131
159
  idx_t JSONFileHandle::ReadFromCache(char *&pointer, idx_t &size, idx_t &position) {
132
160
  idx_t read_size = 0;
133
161
  idx_t total_offset = 0;
@@ -154,35 +182,27 @@ idx_t JSONFileHandle::ReadFromCache(char *&pointer, idx_t &size, idx_t &position
154
182
  return read_size;
155
183
  }
156
184
 
157
- idx_t JSONFileHandle::ReadInternal(char *pointer, const idx_t requested_size) {
158
- // Deal with reading from pipes
159
- idx_t total_read_size = 0;
160
- while (total_read_size < requested_size) {
161
- auto read_size = file_handle->Read(pointer + total_read_size, requested_size - total_read_size);
162
- if (read_size == 0) {
163
- break;
164
- }
165
- total_read_size += read_size;
166
- }
167
- return total_read_size;
168
- }
169
-
170
185
  BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string file_name_p)
171
- : context(context), options(options_p), file_name(std::move(file_name_p)), buffer_index(0) {
186
+ : context(context), options(std::move(options_p)), file_name(std::move(file_name_p)), buffer_index(0),
187
+ thrown(false) {
172
188
  }
173
189
 
174
190
  void BufferedJSONReader::OpenJSONFile() {
175
- D_ASSERT(!IsDone());
191
+ D_ASSERT(!IsOpen());
176
192
  lock_guard<mutex> guard(lock);
177
193
  auto &file_system = FileSystem::GetFileSystem(context);
178
194
  auto regular_file_handle =
179
195
  file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, options.compression);
180
196
  file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
197
+ Reset();
181
198
  }
182
199
 
183
200
  void BufferedJSONReader::CloseJSONFile() {
184
201
  while (true) {
185
202
  lock_guard<mutex> guard(lock);
203
+ if (!file_handle->IsOpen()) {
204
+ return; // Already closed
205
+ }
186
206
  if (file_handle->RequestedReadsComplete()) {
187
207
  file_handle->Close();
188
208
  break;
@@ -190,13 +210,22 @@ void BufferedJSONReader::CloseJSONFile() {
190
210
  }
191
211
  }
192
212
 
193
- bool BufferedJSONReader::IsOpen() const {
213
+ void BufferedJSONReader::Reset() {
214
+ buffer_index = 0;
215
+ buffer_map.clear();
216
+ buffer_line_or_object_counts.clear();
217
+ if (HasFileHandle()) {
218
+ file_handle->Reset();
219
+ }
220
+ }
221
+
222
+ bool BufferedJSONReader::HasFileHandle() const {
194
223
  return file_handle != nullptr;
195
224
  }
196
225
 
197
- bool BufferedJSONReader::IsDone() const {
198
- if (file_handle) {
199
- return !file_handle->IsOpen();
226
+ bool BufferedJSONReader::IsOpen() const {
227
+ if (HasFileHandle()) {
228
+ return file_handle->IsOpen();
200
229
  }
201
230
  return false;
202
231
  }
@@ -205,10 +234,6 @@ BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() {
205
234
  return options;
206
235
  }
207
236
 
208
- const BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() const {
209
- return options;
210
- }
211
-
212
237
  JSONFormat BufferedJSONReader::GetFormat() const {
213
238
  return options.format;
214
239
  }
@@ -232,6 +257,7 @@ const string &BufferedJSONReader::GetFileName() const {
232
257
  }
233
258
 
234
259
  JSONFileHandle &BufferedJSONReader::GetFileHandle() const {
260
+ D_ASSERT(HasFileHandle());
235
261
  return *file_handle;
236
262
  }
237
263
 
@@ -240,7 +266,7 @@ void BufferedJSONReader::InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHan
240
266
  buffer_map.insert(make_pair(buffer_idx, std::move(buffer)));
241
267
  }
242
268
 
243
- JSONBufferHandle *BufferedJSONReader::GetBuffer(idx_t buffer_idx) {
269
+ optional_ptr<JSONBufferHandle> BufferedJSONReader::GetBuffer(idx_t buffer_idx) {
244
270
  lock_guard<mutex> guard(lock);
245
271
  auto it = buffer_map.find(buffer_idx);
246
272
  return it == buffer_map.end() ? nullptr : it->second.get();
@@ -268,22 +294,28 @@ void BufferedJSONReader::SetBufferLineOrObjectCount(idx_t index, idx_t count) {
268
294
  idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf) {
269
295
  D_ASSERT(options.format != JSONFormat::AUTO_DETECT);
270
296
  while (true) {
271
- lock_guard<mutex> guard(lock);
272
297
  idx_t line = line_or_object_in_buf;
273
298
  bool can_throw = true;
274
- for (idx_t b_idx = 0; b_idx < buf_index; b_idx++) {
275
- if (buffer_line_or_object_counts[b_idx] == -1) {
276
- can_throw = false;
277
- break;
278
- } else {
279
- line += buffer_line_or_object_counts[b_idx];
299
+ {
300
+ lock_guard<mutex> guard(lock);
301
+ if (thrown) {
302
+ return DConstants::INVALID_INDEX;
303
+ }
304
+ for (idx_t b_idx = 0; b_idx < buf_index; b_idx++) {
305
+ if (buffer_line_or_object_counts[b_idx] == -1) {
306
+ can_throw = false;
307
+ break;
308
+ } else {
309
+ line += buffer_line_or_object_counts[b_idx];
310
+ thrown = true;
311
+ }
280
312
  }
281
313
  }
282
- if (!can_throw) {
283
- continue;
314
+ if (can_throw) {
315
+ // SQL uses 1-based indexing so I guess we will do that in our exception here as well
316
+ return line + 1;
284
317
  }
285
- // SQL uses 1-based indexing so I guess we will do that in our exception here as well
286
- return line + 1;
318
+ TaskScheduler::YieldThread();
287
319
  }
288
320
  }
289
321
 
@@ -304,41 +336,11 @@ void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_obje
304
336
  }
305
337
 
306
338
  double BufferedJSONReader::GetProgress() const {
307
- if (IsOpen()) {
339
+ if (HasFileHandle()) {
308
340
  return 100.0 - 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
309
341
  } else {
310
342
  return 0;
311
343
  }
312
344
  }
313
345
 
314
- void BufferedJSONReader::Reset() {
315
- buffer_index = 0;
316
- buffer_map.clear();
317
- buffer_line_or_object_counts.clear();
318
-
319
- if (!file_handle) {
320
- return;
321
- }
322
-
323
- if (file_handle->CanSeek()) {
324
- file_handle->Seek(0);
325
- } else {
326
- file_handle->Reset();
327
- }
328
- file_handle->Reset();
329
- }
330
-
331
- void JSONFileHandle::Reset() {
332
- read_position = 0;
333
- requested_reads = 0;
334
- actual_reads = 0;
335
- if (plain_file_source) {
336
- file_handle->Reset();
337
- }
338
- }
339
-
340
- bool JSONFileHandle::RequestedReadsComplete() {
341
- return requested_reads == actual_reads;
342
- }
343
-
344
346
  } // namespace duckdb
@@ -9,13 +9,13 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/atomic.hpp"
12
+ #include "duckdb/common/enum_util.hpp"
12
13
  #include "duckdb/common/enums/file_compression_type.hpp"
13
14
  #include "duckdb/common/file_system.hpp"
14
15
  #include "duckdb/common/multi_file_reader.hpp"
15
16
  #include "duckdb/common/mutex.hpp"
16
17
  #include "json_common.hpp"
17
18
  #include "json_enums.hpp"
18
- #include "duckdb/common/enum_util.hpp"
19
19
 
20
20
  namespace duckdb {
21
21
 
@@ -57,25 +57,25 @@ public:
57
57
  struct JSONFileHandle {
58
58
  public:
59
59
  JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
60
+
60
61
  bool IsOpen() const;
61
62
  void Close();
62
63
 
64
+ void Reset();
65
+ bool RequestedReadsComplete();
66
+
63
67
  idx_t FileSize() const;
64
68
  idx_t Remaining() const;
65
69
 
66
70
  bool CanSeek() const;
67
- void Seek(idx_t position);
68
71
 
69
72
  idx_t GetPositionAndSize(idx_t &position, idx_t requested_size);
70
73
  void ReadAtPosition(char *pointer, idx_t size, idx_t position, bool sample_run);
71
74
  idx_t Read(char *pointer, idx_t requested_size, bool sample_run);
72
75
 
73
- void Reset();
74
- bool RequestedReadsComplete();
75
-
76
76
  private:
77
- idx_t ReadFromCache(char *&pointer, idx_t &size, idx_t &position);
78
77
  idx_t ReadInternal(char *pointer, const idx_t requested_size);
78
+ idx_t ReadFromCache(char *&pointer, idx_t &size, idx_t &position);
79
79
 
80
80
  private:
81
81
  //! The JSON file handle
@@ -101,38 +101,18 @@ class BufferedJSONReader {
101
101
  public:
102
102
  BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_name);
103
103
 
104
- private:
105
- ClientContext &context;
106
- BufferedJSONReaderOptions options;
107
-
108
- //! File name
109
- const string file_name;
110
- //! File handle
111
- unique_ptr<JSONFileHandle> file_handle;
112
-
113
- //! Next buffer index within the file
114
- idx_t buffer_index;
115
- //! Mapping from batch index to currently held buffers
116
- unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
117
-
118
- //! Line count per buffer
119
- vector<int64_t> buffer_line_or_object_counts;
120
-
121
- public:
122
- mutex lock;
123
- MultiFileReaderData reader_data;
124
-
125
- public:
126
104
  void OpenJSONFile();
127
105
  void CloseJSONFile();
106
+ void Reset();
107
+
108
+ bool HasFileHandle() const;
128
109
  bool IsOpen() const;
129
- bool IsDone() const;
130
110
 
131
111
  BufferedJSONReaderOptions &GetOptions();
132
- const BufferedJSONReaderOptions &GetOptions() const;
133
112
 
134
113
  JSONFormat GetFormat() const;
135
114
  void SetFormat(JSONFormat format);
115
+
136
116
  JSONRecordType GetRecordType() const;
137
117
  void SetRecordType(JSONRecordType type);
138
118
 
@@ -142,7 +122,7 @@ public:
142
122
  public:
143
123
  //! Insert/get/remove buffer (grabs the lock)
144
124
  void InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer);
145
- JSONBufferHandle *GetBuffer(idx_t buffer_idx);
125
+ optional_ptr<JSONBufferHandle> GetBuffer(idx_t buffer_idx);
146
126
  AllocatedData RemoveBuffer(idx_t buffer_idx);
147
127
 
148
128
  //! Get a new buffer index (must hold the lock)
@@ -154,11 +134,34 @@ public:
154
134
  //! Throws a transform error that mentions the file name and line number
155
135
  void ThrowTransformError(idx_t buf_index, idx_t line_or_object_in_buf, const string &error_message);
156
136
 
137
+ //! Scan progress
157
138
  double GetProgress() const;
158
- void Reset();
159
139
 
160
140
  private:
161
141
  idx_t GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf);
142
+
143
+ private:
144
+ ClientContext &context;
145
+ BufferedJSONReaderOptions options;
146
+
147
+ //! File name
148
+ const string file_name;
149
+ //! File handle
150
+ unique_ptr<JSONFileHandle> file_handle;
151
+
152
+ //! Next buffer index within the file
153
+ idx_t buffer_index;
154
+ //! Mapping from batch index to currently held buffers
155
+ unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
156
+
157
+ //! Line count per buffer
158
+ vector<int64_t> buffer_line_or_object_counts;
159
+ //! Whether any of the reading threads has thrown an error
160
+ bool thrown;
161
+
162
+ public:
163
+ mutex lock;
164
+ MultiFileReaderData reader_data;
162
165
  };
163
166
 
164
167
  } // namespace duckdb
@@ -182,11 +182,13 @@ public:
182
182
  //! One JSON reader per file
183
183
  vector<optional_ptr<BufferedJSONReader>> json_readers;
184
184
  //! Current file/batch index
185
- idx_t file_index;
185
+ atomic<idx_t> file_index;
186
186
  atomic<idx_t> batch_index;
187
187
 
188
188
  //! Current number of threads active
189
189
  idx_t system_threads;
190
+ //! Whether we enable parallel scans (only if less files than threads)
191
+ bool enable_parallel_scans;
190
192
  };
191
193
 
192
194
  struct JSONScanLocalState {
@@ -219,19 +221,20 @@ public:
219
221
 
220
222
  private:
221
223
  bool ReadNextBuffer(JSONScanGlobalState &gstate);
222
- void ReadNextBufferInternal(JSONScanGlobalState &gstate, idx_t &buffer_index);
223
- void ReadNextBufferSeek(JSONScanGlobalState &gstate, idx_t &buffer_index);
224
- void ReadNextBufferNoSeek(JSONScanGlobalState &gstate, idx_t &buffer_index);
224
+ void ReadNextBufferInternal(JSONScanGlobalState &gstate, optional_idx &buffer_index);
225
+ void ReadNextBufferSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index);
226
+ void ReadNextBufferNoSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index);
225
227
  void SkipOverArrayStart();
226
228
 
227
- bool ReadAndAutoDetect(JSONScanGlobalState &gstate, idx_t &buffer_index, const bool already_incremented_file_idx);
228
- void ReconstructFirstObject(JSONScanGlobalState &gstate);
229
+ void ReadAndAutoDetect(JSONScanGlobalState &gstate, optional_idx &buffer_index);
230
+ void ReconstructFirstObject();
229
231
  void ParseNextChunk();
230
232
 
231
233
  void ParseJSON(char *const json_start, const idx_t json_size, const idx_t remaining);
232
234
  void ThrowObjectSizeError(const idx_t object_size);
233
235
  void ThrowInvalidAtEndError();
234
236
 
237
+ void TryIncrementFileIndex(JSONScanGlobalState &gstate) const;
235
238
  bool IsParallel(JSONScanGlobalState &gstate) const;
236
239
 
237
240
  private: