duckdb 0.8.2-dev3989.0 → 0.8.2-dev4126.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +8 -7
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/extension/parquet/parquet_extension.cpp +23 -13
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/crypto/md5.cpp +2 -12
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +8 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +32 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +15 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +10 -16
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +4 -1
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +24 -2
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
package/binding.gyp
CHANGED
@@ -61,6 +61,7 @@
|
|
61
61
|
"src/duckdb/ub_src_execution.cpp",
|
62
62
|
"src/duckdb/ub_src_execution_expression_executor.cpp",
|
63
63
|
"src/duckdb/ub_src_execution_index_art.cpp",
|
64
|
+
"src/duckdb/ub_src_execution_index.cpp",
|
64
65
|
"src/duckdb/ub_src_execution_nested_loop_join.cpp",
|
65
66
|
"src/duckdb/ub_src_execution_operator_aggregate.cpp",
|
66
67
|
"src/duckdb/ub_src_execution_operator_csv_scanner.cpp",
|
@@ -249,18 +250,18 @@
|
|
249
250
|
"src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
|
250
251
|
"src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
|
251
252
|
"src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
|
252
|
-
"src/duckdb/extension/icu/./icu-
|
253
|
+
"src/duckdb/extension/icu/./icu-timezone.cpp",
|
253
254
|
"src/duckdb/extension/icu/./icu-datepart.cpp",
|
254
|
-
"src/duckdb/extension/icu/./icu-datesub.cpp",
|
255
|
-
"src/duckdb/extension/icu/./icu-table-range.cpp",
|
256
|
-
"src/duckdb/extension/icu/./icu-datetrunc.cpp",
|
257
255
|
"src/duckdb/extension/icu/./icu-timebucket.cpp",
|
258
|
-
"src/duckdb/extension/icu/./icu-
|
256
|
+
"src/duckdb/extension/icu/./icu-datesub.cpp",
|
259
257
|
"src/duckdb/extension/icu/./icu-list-range.cpp",
|
260
|
-
"src/duckdb/extension/icu/./icu-
|
258
|
+
"src/duckdb/extension/icu/./icu-makedate.cpp",
|
261
259
|
"src/duckdb/extension/icu/./icu-datefunc.cpp",
|
260
|
+
"src/duckdb/extension/icu/./icu-datetrunc.cpp",
|
261
|
+
"src/duckdb/extension/icu/./icu-dateadd.cpp",
|
262
|
+
"src/duckdb/extension/icu/./icu-table-range.cpp",
|
262
263
|
"src/duckdb/extension/icu/./icu_extension.cpp",
|
263
|
-
"src/duckdb/extension/icu/./icu-
|
264
|
+
"src/duckdb/extension/icu/./icu-strptime.cpp",
|
264
265
|
"src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
|
265
266
|
"src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
|
266
267
|
"src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
|
package/package.json
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
#include "duckdb/common/field_writer.hpp"
|
4
4
|
#include "duckdb/common/file_opener.hpp"
|
5
|
-
#include "duckdb/common/printer.hpp"
|
6
|
-
#include "duckdb/common/serializer/format_serializer.hpp"
|
7
5
|
#include "duckdb/common/serializer/format_deserializer.hpp"
|
6
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
7
|
+
|
8
|
+
#include <utility>
|
8
9
|
|
9
10
|
namespace duckdb {
|
10
11
|
|
@@ -37,11 +38,24 @@ bool JSONFileHandle::IsOpen() const {
|
|
37
38
|
}
|
38
39
|
|
39
40
|
void JSONFileHandle::Close() {
|
40
|
-
if (
|
41
|
+
if (IsOpen()) {
|
41
42
|
file_handle->Close();
|
42
43
|
file_handle = nullptr;
|
43
44
|
}
|
44
|
-
|
45
|
+
}
|
46
|
+
|
47
|
+
void JSONFileHandle::Reset() {
|
48
|
+
D_ASSERT(RequestedReadsComplete());
|
49
|
+
read_position = 0;
|
50
|
+
requested_reads = 0;
|
51
|
+
actual_reads = 0;
|
52
|
+
if (IsOpen() && plain_file_source) {
|
53
|
+
file_handle->Reset();
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
bool JSONFileHandle::RequestedReadsComplete() {
|
58
|
+
return requested_reads == actual_reads;
|
45
59
|
}
|
46
60
|
|
47
61
|
idx_t JSONFileHandle::FileSize() const {
|
@@ -56,12 +70,9 @@ bool JSONFileHandle::CanSeek() const {
|
|
56
70
|
return can_seek;
|
57
71
|
}
|
58
72
|
|
59
|
-
void JSONFileHandle::Seek(idx_t position) {
|
60
|
-
file_handle->Seek(position);
|
61
|
-
}
|
62
|
-
|
63
73
|
idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size) {
|
64
74
|
D_ASSERT(requested_size != 0);
|
75
|
+
|
65
76
|
position = read_position;
|
66
77
|
auto actual_size = MinValue<idx_t>(requested_size, Remaining());
|
67
78
|
read_position += actual_size;
|
@@ -77,15 +88,18 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
77
88
|
if (plain_file_source) {
|
78
89
|
file_handle->Read(pointer, size, position);
|
79
90
|
actual_reads++;
|
91
|
+
|
80
92
|
return;
|
81
93
|
}
|
82
94
|
|
83
95
|
if (sample_run) { // Cache the buffer
|
84
96
|
file_handle->Read(pointer, size, position);
|
85
97
|
actual_reads++;
|
98
|
+
|
86
99
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
87
100
|
memcpy(cached_buffers.back().get(), pointer, size);
|
88
101
|
cached_size += size;
|
102
|
+
|
89
103
|
return;
|
90
104
|
}
|
91
105
|
|
@@ -93,6 +107,7 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
93
107
|
ReadFromCache(pointer, size, position);
|
94
108
|
actual_reads++;
|
95
109
|
}
|
110
|
+
|
96
111
|
if (size != 0) {
|
97
112
|
file_handle->Read(pointer, size, position);
|
98
113
|
actual_reads++;
|
@@ -128,6 +143,19 @@ idx_t JSONFileHandle::Read(char *pointer, idx_t requested_size, bool sample_run)
|
|
128
143
|
return actual_size;
|
129
144
|
}
|
130
145
|
|
146
|
+
idx_t JSONFileHandle::ReadInternal(char *pointer, const idx_t requested_size) {
|
147
|
+
// Deal with reading from pipes
|
148
|
+
idx_t total_read_size = 0;
|
149
|
+
while (total_read_size < requested_size) {
|
150
|
+
auto read_size = file_handle->Read(pointer + total_read_size, requested_size - total_read_size);
|
151
|
+
if (read_size == 0) {
|
152
|
+
break;
|
153
|
+
}
|
154
|
+
total_read_size += read_size;
|
155
|
+
}
|
156
|
+
return total_read_size;
|
157
|
+
}
|
158
|
+
|
131
159
|
idx_t JSONFileHandle::ReadFromCache(char *&pointer, idx_t &size, idx_t &position) {
|
132
160
|
idx_t read_size = 0;
|
133
161
|
idx_t total_offset = 0;
|
@@ -154,35 +182,27 @@ idx_t JSONFileHandle::ReadFromCache(char *&pointer, idx_t &size, idx_t &position
|
|
154
182
|
return read_size;
|
155
183
|
}
|
156
184
|
|
157
|
-
idx_t JSONFileHandle::ReadInternal(char *pointer, const idx_t requested_size) {
|
158
|
-
// Deal with reading from pipes
|
159
|
-
idx_t total_read_size = 0;
|
160
|
-
while (total_read_size < requested_size) {
|
161
|
-
auto read_size = file_handle->Read(pointer + total_read_size, requested_size - total_read_size);
|
162
|
-
if (read_size == 0) {
|
163
|
-
break;
|
164
|
-
}
|
165
|
-
total_read_size += read_size;
|
166
|
-
}
|
167
|
-
return total_read_size;
|
168
|
-
}
|
169
|
-
|
170
185
|
BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string file_name_p)
|
171
|
-
: context(context), options(options_p), file_name(std::move(file_name_p)), buffer_index(0)
|
186
|
+
: context(context), options(std::move(options_p)), file_name(std::move(file_name_p)), buffer_index(0),
|
187
|
+
thrown(false) {
|
172
188
|
}
|
173
189
|
|
174
190
|
void BufferedJSONReader::OpenJSONFile() {
|
175
|
-
D_ASSERT(!
|
191
|
+
D_ASSERT(!IsOpen());
|
176
192
|
lock_guard<mutex> guard(lock);
|
177
193
|
auto &file_system = FileSystem::GetFileSystem(context);
|
178
194
|
auto regular_file_handle =
|
179
195
|
file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, options.compression);
|
180
196
|
file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
|
197
|
+
Reset();
|
181
198
|
}
|
182
199
|
|
183
200
|
void BufferedJSONReader::CloseJSONFile() {
|
184
201
|
while (true) {
|
185
202
|
lock_guard<mutex> guard(lock);
|
203
|
+
if (!file_handle->IsOpen()) {
|
204
|
+
return; // Already closed
|
205
|
+
}
|
186
206
|
if (file_handle->RequestedReadsComplete()) {
|
187
207
|
file_handle->Close();
|
188
208
|
break;
|
@@ -190,13 +210,22 @@ void BufferedJSONReader::CloseJSONFile() {
|
|
190
210
|
}
|
191
211
|
}
|
192
212
|
|
193
|
-
|
213
|
+
void BufferedJSONReader::Reset() {
|
214
|
+
buffer_index = 0;
|
215
|
+
buffer_map.clear();
|
216
|
+
buffer_line_or_object_counts.clear();
|
217
|
+
if (HasFileHandle()) {
|
218
|
+
file_handle->Reset();
|
219
|
+
}
|
220
|
+
}
|
221
|
+
|
222
|
+
bool BufferedJSONReader::HasFileHandle() const {
|
194
223
|
return file_handle != nullptr;
|
195
224
|
}
|
196
225
|
|
197
|
-
bool BufferedJSONReader::
|
198
|
-
if (
|
199
|
-
return
|
226
|
+
bool BufferedJSONReader::IsOpen() const {
|
227
|
+
if (HasFileHandle()) {
|
228
|
+
return file_handle->IsOpen();
|
200
229
|
}
|
201
230
|
return false;
|
202
231
|
}
|
@@ -205,10 +234,6 @@ BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() {
|
|
205
234
|
return options;
|
206
235
|
}
|
207
236
|
|
208
|
-
const BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() const {
|
209
|
-
return options;
|
210
|
-
}
|
211
|
-
|
212
237
|
JSONFormat BufferedJSONReader::GetFormat() const {
|
213
238
|
return options.format;
|
214
239
|
}
|
@@ -232,6 +257,7 @@ const string &BufferedJSONReader::GetFileName() const {
|
|
232
257
|
}
|
233
258
|
|
234
259
|
JSONFileHandle &BufferedJSONReader::GetFileHandle() const {
|
260
|
+
D_ASSERT(HasFileHandle());
|
235
261
|
return *file_handle;
|
236
262
|
}
|
237
263
|
|
@@ -240,7 +266,7 @@ void BufferedJSONReader::InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHan
|
|
240
266
|
buffer_map.insert(make_pair(buffer_idx, std::move(buffer)));
|
241
267
|
}
|
242
268
|
|
243
|
-
JSONBufferHandle
|
269
|
+
optional_ptr<JSONBufferHandle> BufferedJSONReader::GetBuffer(idx_t buffer_idx) {
|
244
270
|
lock_guard<mutex> guard(lock);
|
245
271
|
auto it = buffer_map.find(buffer_idx);
|
246
272
|
return it == buffer_map.end() ? nullptr : it->second.get();
|
@@ -268,22 +294,28 @@ void BufferedJSONReader::SetBufferLineOrObjectCount(idx_t index, idx_t count) {
|
|
268
294
|
idx_t BufferedJSONReader::GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf) {
|
269
295
|
D_ASSERT(options.format != JSONFormat::AUTO_DETECT);
|
270
296
|
while (true) {
|
271
|
-
lock_guard<mutex> guard(lock);
|
272
297
|
idx_t line = line_or_object_in_buf;
|
273
298
|
bool can_throw = true;
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
}
|
279
|
-
|
299
|
+
{
|
300
|
+
lock_guard<mutex> guard(lock);
|
301
|
+
if (thrown) {
|
302
|
+
return DConstants::INVALID_INDEX;
|
303
|
+
}
|
304
|
+
for (idx_t b_idx = 0; b_idx < buf_index; b_idx++) {
|
305
|
+
if (buffer_line_or_object_counts[b_idx] == -1) {
|
306
|
+
can_throw = false;
|
307
|
+
break;
|
308
|
+
} else {
|
309
|
+
line += buffer_line_or_object_counts[b_idx];
|
310
|
+
thrown = true;
|
311
|
+
}
|
280
312
|
}
|
281
313
|
}
|
282
|
-
if (
|
283
|
-
|
314
|
+
if (can_throw) {
|
315
|
+
// SQL uses 1-based indexing so I guess we will do that in our exception here as well
|
316
|
+
return line + 1;
|
284
317
|
}
|
285
|
-
|
286
|
-
return line + 1;
|
318
|
+
TaskScheduler::YieldThread();
|
287
319
|
}
|
288
320
|
}
|
289
321
|
|
@@ -304,41 +336,11 @@ void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_obje
|
|
304
336
|
}
|
305
337
|
|
306
338
|
double BufferedJSONReader::GetProgress() const {
|
307
|
-
if (
|
339
|
+
if (HasFileHandle()) {
|
308
340
|
return 100.0 - 100.0 * double(file_handle->Remaining()) / double(file_handle->FileSize());
|
309
341
|
} else {
|
310
342
|
return 0;
|
311
343
|
}
|
312
344
|
}
|
313
345
|
|
314
|
-
void BufferedJSONReader::Reset() {
|
315
|
-
buffer_index = 0;
|
316
|
-
buffer_map.clear();
|
317
|
-
buffer_line_or_object_counts.clear();
|
318
|
-
|
319
|
-
if (!file_handle) {
|
320
|
-
return;
|
321
|
-
}
|
322
|
-
|
323
|
-
if (file_handle->CanSeek()) {
|
324
|
-
file_handle->Seek(0);
|
325
|
-
} else {
|
326
|
-
file_handle->Reset();
|
327
|
-
}
|
328
|
-
file_handle->Reset();
|
329
|
-
}
|
330
|
-
|
331
|
-
void JSONFileHandle::Reset() {
|
332
|
-
read_position = 0;
|
333
|
-
requested_reads = 0;
|
334
|
-
actual_reads = 0;
|
335
|
-
if (plain_file_source) {
|
336
|
-
file_handle->Reset();
|
337
|
-
}
|
338
|
-
}
|
339
|
-
|
340
|
-
bool JSONFileHandle::RequestedReadsComplete() {
|
341
|
-
return requested_reads == actual_reads;
|
342
|
-
}
|
343
|
-
|
344
346
|
} // namespace duckdb
|
@@ -9,13 +9,13 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/atomic.hpp"
|
12
|
+
#include "duckdb/common/enum_util.hpp"
|
12
13
|
#include "duckdb/common/enums/file_compression_type.hpp"
|
13
14
|
#include "duckdb/common/file_system.hpp"
|
14
15
|
#include "duckdb/common/multi_file_reader.hpp"
|
15
16
|
#include "duckdb/common/mutex.hpp"
|
16
17
|
#include "json_common.hpp"
|
17
18
|
#include "json_enums.hpp"
|
18
|
-
#include "duckdb/common/enum_util.hpp"
|
19
19
|
|
20
20
|
namespace duckdb {
|
21
21
|
|
@@ -57,25 +57,25 @@ public:
|
|
57
57
|
struct JSONFileHandle {
|
58
58
|
public:
|
59
59
|
JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
|
60
|
+
|
60
61
|
bool IsOpen() const;
|
61
62
|
void Close();
|
62
63
|
|
64
|
+
void Reset();
|
65
|
+
bool RequestedReadsComplete();
|
66
|
+
|
63
67
|
idx_t FileSize() const;
|
64
68
|
idx_t Remaining() const;
|
65
69
|
|
66
70
|
bool CanSeek() const;
|
67
|
-
void Seek(idx_t position);
|
68
71
|
|
69
72
|
idx_t GetPositionAndSize(idx_t &position, idx_t requested_size);
|
70
73
|
void ReadAtPosition(char *pointer, idx_t size, idx_t position, bool sample_run);
|
71
74
|
idx_t Read(char *pointer, idx_t requested_size, bool sample_run);
|
72
75
|
|
73
|
-
void Reset();
|
74
|
-
bool RequestedReadsComplete();
|
75
|
-
|
76
76
|
private:
|
77
|
-
idx_t ReadFromCache(char *&pointer, idx_t &size, idx_t &position);
|
78
77
|
idx_t ReadInternal(char *pointer, const idx_t requested_size);
|
78
|
+
idx_t ReadFromCache(char *&pointer, idx_t &size, idx_t &position);
|
79
79
|
|
80
80
|
private:
|
81
81
|
//! The JSON file handle
|
@@ -101,38 +101,18 @@ class BufferedJSONReader {
|
|
101
101
|
public:
|
102
102
|
BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_name);
|
103
103
|
|
104
|
-
private:
|
105
|
-
ClientContext &context;
|
106
|
-
BufferedJSONReaderOptions options;
|
107
|
-
|
108
|
-
//! File name
|
109
|
-
const string file_name;
|
110
|
-
//! File handle
|
111
|
-
unique_ptr<JSONFileHandle> file_handle;
|
112
|
-
|
113
|
-
//! Next buffer index within the file
|
114
|
-
idx_t buffer_index;
|
115
|
-
//! Mapping from batch index to currently held buffers
|
116
|
-
unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
|
117
|
-
|
118
|
-
//! Line count per buffer
|
119
|
-
vector<int64_t> buffer_line_or_object_counts;
|
120
|
-
|
121
|
-
public:
|
122
|
-
mutex lock;
|
123
|
-
MultiFileReaderData reader_data;
|
124
|
-
|
125
|
-
public:
|
126
104
|
void OpenJSONFile();
|
127
105
|
void CloseJSONFile();
|
106
|
+
void Reset();
|
107
|
+
|
108
|
+
bool HasFileHandle() const;
|
128
109
|
bool IsOpen() const;
|
129
|
-
bool IsDone() const;
|
130
110
|
|
131
111
|
BufferedJSONReaderOptions &GetOptions();
|
132
|
-
const BufferedJSONReaderOptions &GetOptions() const;
|
133
112
|
|
134
113
|
JSONFormat GetFormat() const;
|
135
114
|
void SetFormat(JSONFormat format);
|
115
|
+
|
136
116
|
JSONRecordType GetRecordType() const;
|
137
117
|
void SetRecordType(JSONRecordType type);
|
138
118
|
|
@@ -142,7 +122,7 @@ public:
|
|
142
122
|
public:
|
143
123
|
//! Insert/get/remove buffer (grabs the lock)
|
144
124
|
void InsertBuffer(idx_t buffer_idx, unique_ptr<JSONBufferHandle> &&buffer);
|
145
|
-
JSONBufferHandle
|
125
|
+
optional_ptr<JSONBufferHandle> GetBuffer(idx_t buffer_idx);
|
146
126
|
AllocatedData RemoveBuffer(idx_t buffer_idx);
|
147
127
|
|
148
128
|
//! Get a new buffer index (must hold the lock)
|
@@ -154,11 +134,34 @@ public:
|
|
154
134
|
//! Throws a transform error that mentions the file name and line number
|
155
135
|
void ThrowTransformError(idx_t buf_index, idx_t line_or_object_in_buf, const string &error_message);
|
156
136
|
|
137
|
+
//! Scan progress
|
157
138
|
double GetProgress() const;
|
158
|
-
void Reset();
|
159
139
|
|
160
140
|
private:
|
161
141
|
idx_t GetLineNumber(idx_t buf_index, idx_t line_or_object_in_buf);
|
142
|
+
|
143
|
+
private:
|
144
|
+
ClientContext &context;
|
145
|
+
BufferedJSONReaderOptions options;
|
146
|
+
|
147
|
+
//! File name
|
148
|
+
const string file_name;
|
149
|
+
//! File handle
|
150
|
+
unique_ptr<JSONFileHandle> file_handle;
|
151
|
+
|
152
|
+
//! Next buffer index within the file
|
153
|
+
idx_t buffer_index;
|
154
|
+
//! Mapping from batch index to currently held buffers
|
155
|
+
unordered_map<idx_t, unique_ptr<JSONBufferHandle>> buffer_map;
|
156
|
+
|
157
|
+
//! Line count per buffer
|
158
|
+
vector<int64_t> buffer_line_or_object_counts;
|
159
|
+
//! Whether any of the reading threads has thrown an error
|
160
|
+
bool thrown;
|
161
|
+
|
162
|
+
public:
|
163
|
+
mutex lock;
|
164
|
+
MultiFileReaderData reader_data;
|
162
165
|
};
|
163
166
|
|
164
167
|
} // namespace duckdb
|
@@ -182,11 +182,13 @@ public:
|
|
182
182
|
//! One JSON reader per file
|
183
183
|
vector<optional_ptr<BufferedJSONReader>> json_readers;
|
184
184
|
//! Current file/batch index
|
185
|
-
idx_t file_index;
|
185
|
+
atomic<idx_t> file_index;
|
186
186
|
atomic<idx_t> batch_index;
|
187
187
|
|
188
188
|
//! Current number of threads active
|
189
189
|
idx_t system_threads;
|
190
|
+
//! Whether we enable parallel scans (only if less files than threads)
|
191
|
+
bool enable_parallel_scans;
|
190
192
|
};
|
191
193
|
|
192
194
|
struct JSONScanLocalState {
|
@@ -219,19 +221,20 @@ public:
|
|
219
221
|
|
220
222
|
private:
|
221
223
|
bool ReadNextBuffer(JSONScanGlobalState &gstate);
|
222
|
-
void ReadNextBufferInternal(JSONScanGlobalState &gstate,
|
223
|
-
void ReadNextBufferSeek(JSONScanGlobalState &gstate,
|
224
|
-
void ReadNextBufferNoSeek(JSONScanGlobalState &gstate,
|
224
|
+
void ReadNextBufferInternal(JSONScanGlobalState &gstate, optional_idx &buffer_index);
|
225
|
+
void ReadNextBufferSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index);
|
226
|
+
void ReadNextBufferNoSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index);
|
225
227
|
void SkipOverArrayStart();
|
226
228
|
|
227
|
-
|
228
|
-
void ReconstructFirstObject(
|
229
|
+
void ReadAndAutoDetect(JSONScanGlobalState &gstate, optional_idx &buffer_index);
|
230
|
+
void ReconstructFirstObject();
|
229
231
|
void ParseNextChunk();
|
230
232
|
|
231
233
|
void ParseJSON(char *const json_start, const idx_t json_size, const idx_t remaining);
|
232
234
|
void ThrowObjectSizeError(const idx_t object_size);
|
233
235
|
void ThrowInvalidAtEndError();
|
234
236
|
|
237
|
+
void TryIncrementFileIndex(JSONScanGlobalState &gstate) const;
|
235
238
|
bool IsParallel(JSONScanGlobalState &gstate) const;
|
236
239
|
|
237
240
|
private:
|