duckdb 0.8.1-dev253.0 → 0.8.1-dev276.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +8 -8
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +23 -14
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +6 -6
- package/src/duckdb/extension/json/include/json_common.hpp +12 -2
- package/src/duckdb/extension/json/include/json_scan.hpp +3 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -2
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +2 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +5 -3
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +11 -11
- package/src/duckdb/extension/json/json_functions/read_json.cpp +2 -1
- package/src/duckdb/extension/json/json_functions.cpp +3 -3
- package/src/duckdb/extension/json/json_scan.cpp +40 -25
- package/src/duckdb/src/function/table/system/test_vector_types.cpp +81 -25
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
package/binding.gyp
CHANGED
@@ -237,18 +237,18 @@
|
|
237
237
|
"src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
|
238
238
|
"src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
|
239
239
|
"src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
|
240
|
-
"src/duckdb/extension/icu/./icu-dateadd.cpp",
|
241
|
-
"src/duckdb/extension/icu/./icu-datetrunc.cpp",
|
242
|
-
"src/duckdb/extension/icu/./icu-datesub.cpp",
|
243
|
-
"src/duckdb/extension/icu/./icu-table-range.cpp",
|
244
|
-
"src/duckdb/extension/icu/./icu-timebucket.cpp",
|
245
240
|
"src/duckdb/extension/icu/./icu-list-range.cpp",
|
241
|
+
"src/duckdb/extension/icu/./icu-datefunc.cpp",
|
246
242
|
"src/duckdb/extension/icu/./icu-datepart.cpp",
|
247
|
-
"src/duckdb/extension/icu/./icu-
|
243
|
+
"src/duckdb/extension/icu/./icu-datetrunc.cpp",
|
244
|
+
"src/duckdb/extension/icu/./icu-table-range.cpp",
|
245
|
+
"src/duckdb/extension/icu/./icu-dateadd.cpp",
|
248
246
|
"src/duckdb/extension/icu/./icu-extension.cpp",
|
249
|
-
"src/duckdb/extension/icu/./icu-makedate.cpp",
|
250
|
-
"src/duckdb/extension/icu/./icu-datefunc.cpp",
|
251
247
|
"src/duckdb/extension/icu/./icu-strptime.cpp",
|
248
|
+
"src/duckdb/extension/icu/./icu-datesub.cpp",
|
249
|
+
"src/duckdb/extension/icu/./icu-makedate.cpp",
|
250
|
+
"src/duckdb/extension/icu/./icu-timezone.cpp",
|
251
|
+
"src/duckdb/extension/icu/./icu-timebucket.cpp",
|
252
252
|
"src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
|
253
253
|
"src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
|
254
254
|
"src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
|
package/package.json
CHANGED
@@ -30,6 +30,10 @@ JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p, Allocator &
|
|
30
30
|
requested_reads(0), actual_reads(0), cached_size(0) {
|
31
31
|
}
|
32
32
|
|
33
|
+
bool JSONFileHandle::IsOpen() const {
|
34
|
+
return file_handle != nullptr;
|
35
|
+
}
|
36
|
+
|
33
37
|
void JSONFileHandle::Close() {
|
34
38
|
if (file_handle) {
|
35
39
|
file_handle->Close();
|
@@ -62,19 +66,20 @@ idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size)
|
|
62
66
|
if (actual_size != 0) {
|
63
67
|
requested_reads++;
|
64
68
|
}
|
69
|
+
|
65
70
|
return actual_size;
|
66
71
|
}
|
67
72
|
|
68
|
-
void JSONFileHandle::ReadAtPosition(
|
73
|
+
void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, bool sample_run) {
|
69
74
|
D_ASSERT(size != 0);
|
70
75
|
if (plain_file_source) {
|
71
|
-
file_handle->Read(
|
76
|
+
file_handle->Read(pointer, size, position);
|
72
77
|
actual_reads++;
|
73
78
|
return;
|
74
79
|
}
|
75
80
|
|
76
81
|
if (sample_run) { // Cache the buffer
|
77
|
-
file_handle->Read(
|
82
|
+
file_handle->Read(pointer, size, position);
|
78
83
|
actual_reads++;
|
79
84
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
80
85
|
memcpy(cached_buffers.back().get(), pointer, size);
|
@@ -87,12 +92,12 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
|
|
87
92
|
actual_reads++;
|
88
93
|
}
|
89
94
|
if (size != 0) {
|
90
|
-
file_handle->Read(
|
95
|
+
file_handle->Read(pointer, size, position);
|
91
96
|
actual_reads++;
|
92
97
|
}
|
93
98
|
}
|
94
99
|
|
95
|
-
idx_t JSONFileHandle::Read(
|
100
|
+
idx_t JSONFileHandle::Read(char *pointer, idx_t requested_size, bool sample_run) {
|
96
101
|
D_ASSERT(requested_size != 0);
|
97
102
|
if (plain_file_source) {
|
98
103
|
auto actual_size = ReadInternal(pointer, requested_size);
|
@@ -121,7 +126,7 @@ idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size, bool sampl
|
|
121
126
|
return actual_size;
|
122
127
|
}
|
123
128
|
|
124
|
-
idx_t JSONFileHandle::ReadFromCache(
|
129
|
+
idx_t JSONFileHandle::ReadFromCache(char *&pointer, idx_t &size, idx_t &position) {
|
125
130
|
idx_t read_size = 0;
|
126
131
|
idx_t total_offset = 0;
|
127
132
|
|
@@ -134,7 +139,7 @@ idx_t JSONFileHandle::ReadFromCache(const char *&pointer, idx_t &size, idx_t &po
|
|
134
139
|
if (position < total_offset + cached_buffer.GetSize()) {
|
135
140
|
idx_t within_buffer_offset = position - total_offset;
|
136
141
|
idx_t copy_size = MinValue<idx_t>(size, cached_buffer.GetSize() - within_buffer_offset);
|
137
|
-
memcpy(
|
142
|
+
memcpy(pointer, cached_buffer.get() + within_buffer_offset, copy_size);
|
138
143
|
|
139
144
|
read_size += copy_size;
|
140
145
|
pointer += copy_size;
|
@@ -147,11 +152,11 @@ idx_t JSONFileHandle::ReadFromCache(const char *&pointer, idx_t &size, idx_t &po
|
|
147
152
|
return read_size;
|
148
153
|
}
|
149
154
|
|
150
|
-
idx_t JSONFileHandle::ReadInternal(
|
155
|
+
idx_t JSONFileHandle::ReadInternal(char *pointer, const idx_t requested_size) {
|
151
156
|
// Deal with reading from pipes
|
152
157
|
idx_t total_read_size = 0;
|
153
158
|
while (total_read_size < requested_size) {
|
154
|
-
auto read_size = file_handle->Read(
|
159
|
+
auto read_size = file_handle->Read(pointer + total_read_size, requested_size - total_read_size);
|
155
160
|
if (read_size == 0) {
|
156
161
|
break;
|
157
162
|
}
|
@@ -165,6 +170,7 @@ BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReade
|
|
165
170
|
}
|
166
171
|
|
167
172
|
void BufferedJSONReader::OpenJSONFile() {
|
173
|
+
D_ASSERT(!IsDone());
|
168
174
|
lock_guard<mutex> guard(lock);
|
169
175
|
auto &file_system = FileSystem::GetFileSystem(context);
|
170
176
|
auto regular_file_handle =
|
@@ -186,6 +192,13 @@ bool BufferedJSONReader::IsOpen() const {
|
|
186
192
|
return file_handle != nullptr;
|
187
193
|
}
|
188
194
|
|
195
|
+
bool BufferedJSONReader::IsDone() const {
|
196
|
+
if (file_handle) {
|
197
|
+
return !file_handle->IsOpen();
|
198
|
+
}
|
199
|
+
return false;
|
200
|
+
}
|
201
|
+
|
189
202
|
BufferedJSONReaderOptions &BufferedJSONReader::GetOptions() {
|
190
203
|
return options;
|
191
204
|
}
|
@@ -212,10 +225,6 @@ void BufferedJSONReader::SetRecordType(duckdb::JSONRecordType type) {
|
|
212
225
|
options.record_type = type;
|
213
226
|
}
|
214
227
|
|
215
|
-
bool BufferedJSONReader::IsParallel() const {
|
216
|
-
return options.format == JSONFormat::NEWLINE_DELIMITED && file_handle->CanSeek();
|
217
|
-
}
|
218
|
-
|
219
228
|
const string &BufferedJSONReader::GetFileName() const {
|
220
229
|
return file_name;
|
221
230
|
}
|
@@ -288,7 +297,7 @@ void BufferedJSONReader::ThrowTransformError(idx_t buf_index, idx_t line_or_obje
|
|
288
297
|
const string &error_message) {
|
289
298
|
string unit = options.format == JSONFormat::NEWLINE_DELIMITED ? "line" : "record/value";
|
290
299
|
auto line = GetLineNumber(buf_index, line_or_object_in_buf);
|
291
|
-
throw InvalidInputException("JSON transform error in file \"%s\", in %s %llu: %s
|
300
|
+
throw InvalidInputException("JSON transform error in file \"%s\", in %s %llu: %s", file_name, unit, line,
|
292
301
|
error_message);
|
293
302
|
}
|
294
303
|
|
@@ -71,6 +71,7 @@ public:
|
|
71
71
|
struct JSONFileHandle {
|
72
72
|
public:
|
73
73
|
JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
|
74
|
+
bool IsOpen() const;
|
74
75
|
void Close();
|
75
76
|
|
76
77
|
idx_t FileSize() const;
|
@@ -80,15 +81,15 @@ public:
|
|
80
81
|
void Seek(idx_t position);
|
81
82
|
|
82
83
|
idx_t GetPositionAndSize(idx_t &position, idx_t requested_size);
|
83
|
-
void ReadAtPosition(
|
84
|
-
idx_t Read(
|
84
|
+
void ReadAtPosition(char *pointer, idx_t size, idx_t position, bool sample_run);
|
85
|
+
idx_t Read(char *pointer, idx_t requested_size, bool sample_run);
|
85
86
|
|
86
87
|
void Reset();
|
87
88
|
bool RequestedReadsComplete();
|
88
89
|
|
89
90
|
private:
|
90
|
-
idx_t ReadFromCache(
|
91
|
-
idx_t ReadInternal(
|
91
|
+
idx_t ReadFromCache(char *&pointer, idx_t &size, idx_t &position);
|
92
|
+
idx_t ReadInternal(char *pointer, const idx_t requested_size);
|
92
93
|
|
93
94
|
private:
|
94
95
|
//! The JSON file handle
|
@@ -139,6 +140,7 @@ public:
|
|
139
140
|
void OpenJSONFile();
|
140
141
|
void CloseJSONFile();
|
141
142
|
bool IsOpen() const;
|
143
|
+
bool IsDone() const;
|
142
144
|
|
143
145
|
BufferedJSONReaderOptions &GetOptions();
|
144
146
|
const BufferedJSONReaderOptions &GetOptions() const;
|
@@ -148,8 +150,6 @@ public:
|
|
148
150
|
JSONRecordType GetRecordType() const;
|
149
151
|
void SetRecordType(JSONRecordType type);
|
150
152
|
|
151
|
-
bool IsParallel() const;
|
152
|
-
|
153
153
|
const string &GetFileName() const;
|
154
154
|
JSONFileHandle &GetFileHandle() const;
|
155
155
|
|
@@ -170,6 +170,16 @@ public:
|
|
170
170
|
}
|
171
171
|
|
172
172
|
public:
|
173
|
+
template <class T>
|
174
|
+
static T *AllocateArray(yyjson_alc *alc, idx_t count) {
|
175
|
+
return reinterpret_cast<T *>(alc->malloc(alc->ctx, sizeof(T) * count));
|
176
|
+
}
|
177
|
+
|
178
|
+
template <class T>
|
179
|
+
static T *AllocateArray(yyjson_mut_doc *doc, idx_t count) {
|
180
|
+
return AllocateArray<T>(&doc->alc, count);
|
181
|
+
}
|
182
|
+
|
173
183
|
static inline yyjson_mut_doc *CreateDocument(yyjson_alc *alc) {
|
174
184
|
D_ASSERT(alc);
|
175
185
|
return yyjson_mut_doc_new(alc);
|
@@ -419,11 +429,11 @@ private:
|
|
419
429
|
|
420
430
|
template <>
|
421
431
|
inline char *JSONCommon::WriteVal(yyjson_val *val, yyjson_alc *alc, idx_t &len) {
|
422
|
-
return yyjson_val_write_opts(val, JSONCommon::WRITE_FLAG, alc,
|
432
|
+
return yyjson_val_write_opts(val, JSONCommon::WRITE_FLAG, alc, reinterpret_cast<size_t *>(&len), nullptr);
|
423
433
|
}
|
424
434
|
template <>
|
425
435
|
inline char *JSONCommon::WriteVal(yyjson_mut_val *val, yyjson_alc *alc, idx_t &len) {
|
426
|
-
return yyjson_mut_val_write_opts(val, JSONCommon::WRITE_FLAG, alc,
|
436
|
+
return yyjson_mut_val_write_opts(val, JSONCommon::WRITE_FLAG, alc, reinterpret_cast<size_t *>(&len), nullptr);
|
427
437
|
}
|
428
438
|
|
429
439
|
template <>
|
@@ -232,6 +232,8 @@ private:
|
|
232
232
|
void ThrowObjectSizeError(const idx_t object_size);
|
233
233
|
void ThrowInvalidAtEndError();
|
234
234
|
|
235
|
+
bool IsParallel(JSONScanGlobalState &gstate) const;
|
236
|
+
|
235
237
|
private:
|
236
238
|
//! Bind data
|
237
239
|
const JSONScanData &bind_data;
|
@@ -245,7 +247,7 @@ private:
|
|
245
247
|
bool is_last;
|
246
248
|
|
247
249
|
//! Current buffer read info
|
248
|
-
|
250
|
+
char *buffer_ptr;
|
249
251
|
idx_t buffer_size;
|
250
252
|
idx_t buffer_offset;
|
251
253
|
idx_t prev_buffer_remainder;
|
@@ -276,7 +276,7 @@ static void CreateValuesStruct(const StructNames &names, yyjson_mut_doc *doc, yy
|
|
276
276
|
vals[i] = yyjson_mut_obj(doc);
|
277
277
|
}
|
278
278
|
// Initialize re-usable array for the nested values
|
279
|
-
auto nested_vals =
|
279
|
+
auto nested_vals = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, count);
|
280
280
|
|
281
281
|
// Add the key/value pairs to the values
|
282
282
|
auto &entries = StructVector::GetEntries(value_v);
|
@@ -301,12 +301,12 @@ static void CreateValuesMap(const StructNames &names, yyjson_mut_doc *doc, yyjso
|
|
301
301
|
// Create nested keys
|
302
302
|
auto &map_key_v = MapVector::GetKeys(value_v);
|
303
303
|
auto map_key_count = ListVector::GetListSize(value_v);
|
304
|
-
auto nested_keys =
|
304
|
+
auto nested_keys = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, map_key_count);
|
305
305
|
TemplatedCreateValues<string_t, string_t>(doc, nested_keys, map_key_v, map_key_count);
|
306
306
|
// Create nested values
|
307
307
|
auto &map_val_v = MapVector::GetValues(value_v);
|
308
308
|
auto map_val_count = ListVector::GetListSize(value_v);
|
309
|
-
auto nested_vals =
|
309
|
+
auto nested_vals = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, map_val_count);
|
310
310
|
CreateValues(names, doc, nested_vals, map_val_v, map_val_count);
|
311
311
|
// Add the key/value pairs to the values
|
312
312
|
UnifiedVectorFormat map_data;
|
@@ -338,7 +338,7 @@ static void CreateValuesUnion(const StructNames &names, yyjson_mut_doc *doc, yyj
|
|
338
338
|
}
|
339
339
|
|
340
340
|
// Initialize re-usable array for the nested values
|
341
|
-
auto nested_vals =
|
341
|
+
auto nested_vals = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, count);
|
342
342
|
|
343
343
|
auto &tag_v = UnionVector::GetTags(value_v);
|
344
344
|
UnifiedVectorFormat tag_data;
|
@@ -384,7 +384,7 @@ static void CreateValuesList(const StructNames &names, yyjson_mut_doc *doc, yyjs
|
|
384
384
|
// Initialize array for the nested values
|
385
385
|
auto &child_v = ListVector::GetEntry(value_v);
|
386
386
|
auto child_count = ListVector::GetListSize(value_v);
|
387
|
-
auto nested_vals =
|
387
|
+
auto nested_vals = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, child_count);
|
388
388
|
// Fill nested_vals with list values
|
389
389
|
CreateValues(names, doc, nested_vals, child_v, child_count);
|
390
390
|
// Now we add the values to the appropriate JSON arrays
|
@@ -501,12 +501,12 @@ static void ObjectFunction(DataChunk &args, ExpressionState &state, Vector &resu
|
|
501
501
|
// Initialize values
|
502
502
|
const idx_t count = args.size();
|
503
503
|
auto doc = JSONCommon::CreateDocument(alc);
|
504
|
-
auto objs =
|
504
|
+
auto objs = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, count);
|
505
505
|
for (idx_t i = 0; i < count; i++) {
|
506
506
|
objs[i] = yyjson_mut_obj(doc);
|
507
507
|
}
|
508
508
|
// Initialize a re-usable value array
|
509
|
-
auto vals =
|
509
|
+
auto vals = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, count);
|
510
510
|
// Loop through key/value pairs
|
511
511
|
for (idx_t pair_idx = 0; pair_idx < args.data.size() / 2; pair_idx++) {
|
512
512
|
Vector &key_v = args.data[pair_idx * 2];
|
@@ -533,12 +533,12 @@ static void ArrayFunction(DataChunk &args, ExpressionState &state, Vector &resul
|
|
533
533
|
// Initialize arrays
|
534
534
|
const idx_t count = args.size();
|
535
535
|
auto doc = JSONCommon::CreateDocument(alc);
|
536
|
-
auto arrs =
|
536
|
+
auto arrs = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, count);
|
537
537
|
for (idx_t i = 0; i < count; i++) {
|
538
538
|
arrs[i] = yyjson_mut_arr(doc);
|
539
539
|
}
|
540
540
|
// Initialize a re-usable value array
|
541
|
-
auto vals =
|
541
|
+
auto vals = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, count);
|
542
542
|
// Loop through args
|
543
543
|
for (auto &v : args.data) {
|
544
544
|
CreateValues(info.const_struct_names, doc, vals, v, count);
|
@@ -561,7 +561,7 @@ static void ToJSONFunctionInternal(const StructNames &names, Vector &input, cons
|
|
561
561
|
yyjson_alc *alc) {
|
562
562
|
// Initialize array for values
|
563
563
|
auto doc = JSONCommon::CreateDocument(alc);
|
564
|
-
auto vals =
|
564
|
+
auto vals = JSONCommon::AllocateArray<yyjson_mut_val *>(doc, count);
|
565
565
|
CreateValues(names, doc, vals, input, count);
|
566
566
|
|
567
567
|
// Write JSON values to string
|
@@ -59,11 +59,11 @@ static void MergePatchFunction(DataChunk &args, ExpressionState &state, Vector &
|
|
59
59
|
const auto count = args.size();
|
60
60
|
|
61
61
|
// Read the first json arg
|
62
|
-
auto origs =
|
62
|
+
auto origs = JSONCommon::AllocateArray<yyjson_mut_val *>(alc, count);
|
63
63
|
ReadObjects(doc, args.data[0], origs, count);
|
64
64
|
|
65
65
|
// Read the next json args one by one and merge them into the first json arg
|
66
|
-
auto patches =
|
66
|
+
auto patches = JSONCommon::AllocateArray<yyjson_mut_val *>(alc, count);
|
67
67
|
for (idx_t arg_idx = 1; arg_idx < args.data.size(); arg_idx++) {
|
68
68
|
ReadObjects(doc, args.data[arg_idx], patches, count);
|
69
69
|
for (idx_t i = 0; i < count; i++) {
|
@@ -108,7 +108,7 @@ static void JsonSerializeFunction(DataChunk &args, ExpressionState &state, Vecto
|
|
108
108
|
idx_t len;
|
109
109
|
auto data = yyjson_mut_val_write_opts(result_obj,
|
110
110
|
info.format ? JSONCommon::WRITE_PRETTY_FLAG : JSONCommon::WRITE_FLAG,
|
111
|
-
alc,
|
111
|
+
alc, reinterpret_cast<size_t *>(&len), nullptr);
|
112
112
|
if (data == nullptr) {
|
113
113
|
throw SerializationException(
|
114
114
|
"Failed to serialize json, perhaps the query contains invalid utf8 characters?");
|
@@ -124,7 +124,7 @@ static void JsonSerializeFunction(DataChunk &args, ExpressionState &state, Vecto
|
|
124
124
|
idx_t len;
|
125
125
|
auto data = yyjson_mut_val_write_opts(result_obj,
|
126
126
|
info.format ? JSONCommon::WRITE_PRETTY_FLAG : JSONCommon::WRITE_FLAG,
|
127
|
-
alc,
|
127
|
+
alc, reinterpret_cast<size_t *>(&len), nullptr);
|
128
128
|
return StringVector::AddString(result, data, len);
|
129
129
|
}
|
130
130
|
});
|
@@ -150,7 +150,8 @@ void JSONStructureNode::RefineCandidateTypesArray(yyjson_val *vals[], idx_t coun
|
|
150
150
|
}
|
151
151
|
|
152
152
|
idx_t offset = 0;
|
153
|
-
auto child_vals =
|
153
|
+
auto child_vals =
|
154
|
+
reinterpret_cast<yyjson_val **>(allocator.AllocateAligned(total_list_size * sizeof(yyjson_val *)));
|
154
155
|
|
155
156
|
size_t idx, max;
|
156
157
|
yyjson_val *child_val;
|
@@ -173,11 +174,12 @@ void JSONStructureNode::RefineCandidateTypesObject(yyjson_val *vals[], idx_t cou
|
|
173
174
|
vector<yyjson_val **> child_vals;
|
174
175
|
child_vals.reserve(child_count);
|
175
176
|
for (idx_t child_idx = 0; child_idx < child_count; child_idx++) {
|
176
|
-
child_vals.emplace_back(
|
177
|
+
child_vals.emplace_back(
|
178
|
+
reinterpret_cast<yyjson_val **>(allocator.AllocateAligned(count * sizeof(yyjson_val *))));
|
177
179
|
}
|
178
180
|
|
179
181
|
idx_t found_key_count;
|
180
|
-
auto found_keys =
|
182
|
+
auto found_keys = reinterpret_cast<bool *>(allocator.AllocateAligned(sizeof(bool) * child_count));
|
181
183
|
|
182
184
|
const auto &key_map = desc.key_map;
|
183
185
|
size_t idx, max;
|
@@ -216,7 +216,7 @@ static inline bool GetValueString(yyjson_val *val, yyjson_alc *alc, string_t &re
|
|
216
216
|
|
217
217
|
template <class T>
|
218
218
|
static bool TransformNumerical(yyjson_val *vals[], Vector &result, const idx_t count, JSONTransformOptions &options) {
|
219
|
-
auto data =
|
219
|
+
auto data = FlatVector::GetData<T>(result);
|
220
220
|
auto &validity = FlatVector::Validity(result);
|
221
221
|
|
222
222
|
bool success = true;
|
@@ -238,7 +238,7 @@ static bool TransformNumerical(yyjson_val *vals[], Vector &result, const idx_t c
|
|
238
238
|
template <class T>
|
239
239
|
static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t count, uint8_t width, uint8_t scale,
|
240
240
|
JSONTransformOptions &options) {
|
241
|
-
auto data =
|
241
|
+
auto data = FlatVector::GetData<T>(result);
|
242
242
|
auto &validity = FlatVector::Validity(result);
|
243
243
|
|
244
244
|
bool success = true;
|
@@ -373,7 +373,7 @@ static bool TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, co
|
|
373
373
|
}
|
374
374
|
|
375
375
|
static bool TransformToString(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
|
376
|
-
auto data =
|
376
|
+
auto data = FlatVector::GetData<string_t>(result);
|
377
377
|
auto &validity = FlatVector::Validity(result);
|
378
378
|
for (idx_t i = 0; i < count; i++) {
|
379
379
|
const auto &val = vals[i];
|
@@ -400,11 +400,11 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
|
|
400
400
|
nested_vals.reserve(column_count);
|
401
401
|
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
402
402
|
key_map.insert({{names[col_idx].c_str(), names[col_idx].length()}, col_idx});
|
403
|
-
nested_vals.push_back(
|
403
|
+
nested_vals.push_back(JSONCommon::AllocateArray<yyjson_val *>(alc, count));
|
404
404
|
}
|
405
405
|
|
406
406
|
idx_t found_key_count;
|
407
|
-
auto found_keys =
|
407
|
+
auto found_keys = JSONCommon::AllocateArray<bool>(alc, column_count);
|
408
408
|
|
409
409
|
bool success = true;
|
410
410
|
|
@@ -558,7 +558,7 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
558
558
|
ListVector::Reserve(result, offset);
|
559
559
|
|
560
560
|
// Initialize array for the nested values
|
561
|
-
auto nested_vals =
|
561
|
+
auto nested_vals = JSONCommon::AllocateArray<yyjson_val *>(alc, offset);
|
562
562
|
|
563
563
|
// Get array values
|
564
564
|
size_t idx, max;
|
@@ -617,8 +617,8 @@ static bool TransformObjectToMap(yyjson_val *objects[], yyjson_alc *alc, Vector
|
|
617
617
|
auto list_entries = FlatVector::GetData<list_entry_t>(result);
|
618
618
|
auto &list_validity = FlatVector::Validity(result);
|
619
619
|
|
620
|
-
auto keys =
|
621
|
-
auto vals =
|
620
|
+
auto keys = JSONCommon::AllocateArray<yyjson_val *>(alc, list_size);
|
621
|
+
auto vals = JSONCommon::AllocateArray<yyjson_val *>(alc, list_size);
|
622
622
|
|
623
623
|
bool success = true;
|
624
624
|
idx_t list_offset = 0;
|
@@ -675,7 +675,7 @@ static bool TransformObjectToMap(yyjson_val *objects[], yyjson_alc *alc, Vector
|
|
675
675
|
}
|
676
676
|
|
677
677
|
bool TransformToJSON(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
|
678
|
-
auto data =
|
678
|
+
auto data = FlatVector::GetData<string_t>(result);
|
679
679
|
auto &validity = FlatVector::Validity(result);
|
680
680
|
for (idx_t i = 0; i < count; i++) {
|
681
681
|
const auto &val = vals[i];
|
@@ -779,8 +779,8 @@ static bool TransformFunctionInternal(Vector &input, const idx_t count, Vector &
|
|
779
779
|
auto inputs = UnifiedVectorFormat::GetData<string_t>(input_data);
|
780
780
|
|
781
781
|
// Read documents
|
782
|
-
auto docs =
|
783
|
-
auto vals =
|
782
|
+
auto docs = JSONCommon::AllocateArray<yyjson_doc *>(alc, count);
|
783
|
+
auto vals = JSONCommon::AllocateArray<yyjson_val *>(alc, count);
|
784
784
|
auto &result_validity = FlatVector::Validity(result);
|
785
785
|
for (idx_t i = 0; i < count; i++) {
|
786
786
|
auto idx = input_data.sel->get_index(i);
|
@@ -258,7 +258,8 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
258
258
|
string hint =
|
259
259
|
gstate.bind_data.auto_detect
|
260
260
|
? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns', 'format' or "
|
261
|
-
"'records' manually, or setting '
|
261
|
+
"'records' manually, setting 'ignore_errors' to true, or setting 'union_by_name' to true when "
|
262
|
+
"reading multiple files with a different structure."
|
262
263
|
: "\nTry setting 'auto_detect' to true, specifying 'format' or 'records' manually, or setting "
|
263
264
|
"'ignore_errors' to true.";
|
264
265
|
lstate.ThrowTransformError(lstate.transform_options.object_index,
|
@@ -223,8 +223,8 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
|
|
223
223
|
bool success = true;
|
224
224
|
UnaryExecutor::ExecuteWithNulls<string_t, string_t>(
|
225
225
|
source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) {
|
226
|
-
auto data =
|
227
|
-
auto length = input.GetSize();
|
226
|
+
auto data = input.GetDataWriteable();
|
227
|
+
const auto length = input.GetSize();
|
228
228
|
|
229
229
|
yyjson_read_err error;
|
230
230
|
auto doc = JSONCommon::ReadDocumentUnsafe(data, length, JSONCommon::READ_FLAG, alc, &error);
|
@@ -239,7 +239,7 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
|
|
239
239
|
}
|
240
240
|
return input;
|
241
241
|
});
|
242
|
-
result
|
242
|
+
StringVector::AddHeapReference(result, source);
|
243
243
|
return success;
|
244
244
|
}
|
245
245
|
|
@@ -244,14 +244,13 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
244
244
|
|
245
245
|
idx_t JSONGlobalTableFunctionState::MaxThreads() const {
|
246
246
|
auto &bind_data = state.bind_data;
|
247
|
-
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED
|
248
|
-
bind_data.options.compression == FileCompressionType::UNCOMPRESSED) {
|
247
|
+
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
|
249
248
|
return state.system_threads;
|
250
249
|
}
|
251
250
|
|
252
251
|
if (!state.json_readers.empty() && state.json_readers[0]->IsOpen()) {
|
253
252
|
auto &reader = *state.json_readers[0];
|
254
|
-
if (reader.
|
253
|
+
if (reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) { // Auto-detected NDJSON
|
255
254
|
return state.system_threads;
|
256
255
|
}
|
257
256
|
}
|
@@ -298,6 +297,7 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
298
297
|
if (!ReadNextBuffer(gstate)) {
|
299
298
|
return scan_count;
|
300
299
|
}
|
300
|
+
D_ASSERT(buffer_size != 0);
|
301
301
|
if (current_buffer_handle->buffer_index != 0 && current_reader->GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
|
302
302
|
ReconstructFirstObject(gstate);
|
303
303
|
scan_count++;
|
@@ -308,8 +308,8 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
308
308
|
return scan_count;
|
309
309
|
}
|
310
310
|
|
311
|
-
static inline const char *NextNewline(
|
312
|
-
return (
|
311
|
+
static inline const char *NextNewline(char *ptr, idx_t size) {
|
312
|
+
return char_ptr_cast(memchr(ptr, '\n', size));
|
313
313
|
}
|
314
314
|
|
315
315
|
static inline const char *PreviousNewline(const char *ptr) {
|
@@ -455,7 +455,21 @@ void JSONScanLocalState::ThrowInvalidAtEndError() {
|
|
455
455
|
throw InvalidInputException("Invalid JSON detected at the end of file \"%s\".", current_reader->GetFileName());
|
456
456
|
}
|
457
457
|
|
458
|
-
|
458
|
+
bool JSONScanLocalState::IsParallel(JSONScanGlobalState &gstate) const {
|
459
|
+
if (bind_data.files.size() >= gstate.system_threads) {
|
460
|
+
// More files than threads, just parallelize over the files
|
461
|
+
return false;
|
462
|
+
}
|
463
|
+
|
464
|
+
if (current_reader->GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
|
465
|
+
// NDJSON can be read in parallel
|
466
|
+
return true;
|
467
|
+
}
|
468
|
+
|
469
|
+
return false;
|
470
|
+
}
|
471
|
+
|
472
|
+
static pair<JSONFormat, JSONRecordType> DetectFormatAndRecordType(char *const buffer_ptr, const idx_t buffer_size,
|
459
473
|
yyjson_alc *alc) {
|
460
474
|
// First we do the easy check whether it's NEWLINE_DELIMITED
|
461
475
|
auto line_end = NextNewline(buffer_ptr, buffer_size);
|
@@ -464,7 +478,7 @@ static pair<JSONFormat, JSONRecordType> DetectFormatAndRecordType(const char *co
|
|
464
478
|
SkipWhitespace(buffer_ptr, line_size, buffer_size);
|
465
479
|
|
466
480
|
yyjson_read_err error;
|
467
|
-
auto doc = JSONCommon::ReadDocumentUnsafe(
|
481
|
+
auto doc = JSONCommon::ReadDocumentUnsafe(buffer_ptr, line_size, JSONCommon::READ_FLAG, alc, &error);
|
468
482
|
if (error.code == YYJSON_READ_SUCCESS) { // We successfully read the line
|
469
483
|
if (yyjson_is_arr(doc->root) && line_size == buffer_size) {
|
470
484
|
// It's just one array, let's actually assume ARRAY, not NEWLINE_DELIMITED
|
@@ -500,8 +514,8 @@ static pair<JSONFormat, JSONRecordType> DetectFormatAndRecordType(const char *co
|
|
500
514
|
|
501
515
|
// It's definitely an ARRAY, but now we have to figure out if there's more than one top-level array
|
502
516
|
yyjson_read_err error;
|
503
|
-
auto doc =
|
504
|
-
|
517
|
+
auto doc =
|
518
|
+
JSONCommon::ReadDocumentUnsafe(buffer_ptr + buffer_offset, remaining, JSONCommon::READ_STOP_FLAG, alc, &error);
|
505
519
|
if (error.code == YYJSON_READ_SUCCESS) {
|
506
520
|
D_ASSERT(yyjson_is_arr(doc->root));
|
507
521
|
|
@@ -563,7 +577,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
563
577
|
} else {
|
564
578
|
buffer = gstate.allocator.Allocate(gstate.buffer_capacity);
|
565
579
|
}
|
566
|
-
buffer_ptr = (
|
580
|
+
buffer_ptr = char_ptr_cast(buffer.get());
|
567
581
|
|
568
582
|
idx_t buffer_index;
|
569
583
|
while (true) {
|
@@ -573,7 +587,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
573
587
|
if (is_last && gstate.bind_data.type != JSONScanType::SAMPLE) {
|
574
588
|
current_reader->CloseJSONFile();
|
575
589
|
}
|
576
|
-
if (
|
590
|
+
if (IsParallel(gstate)) {
|
577
591
|
// If this threads' current reader is still the one at gstate.file_index,
|
578
592
|
// this thread can end the parallel scan
|
579
593
|
lock_guard<mutex> guard(gstate.lock);
|
@@ -599,7 +613,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
599
613
|
current_reader = gstate.json_readers[gstate.file_index].get();
|
600
614
|
if (current_reader->IsOpen()) {
|
601
615
|
// Can only be open from auto detection, so these should be known
|
602
|
-
if (!
|
616
|
+
if (!IsParallel(gstate)) {
|
603
617
|
batch_index = gstate.batch_index++;
|
604
618
|
gstate.file_index++;
|
605
619
|
}
|
@@ -609,15 +623,15 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
609
623
|
current_reader->OpenJSONFile();
|
610
624
|
batch_index = gstate.batch_index++;
|
611
625
|
if (current_reader->GetFormat() != JSONFormat::AUTO_DETECT) {
|
612
|
-
if (!
|
626
|
+
if (!IsParallel(gstate)) {
|
613
627
|
gstate.file_index++;
|
614
628
|
}
|
615
629
|
continue;
|
616
630
|
}
|
617
631
|
|
618
|
-
// If we have
|
632
|
+
// If we have less files than threads, we auto-detect within the lock,
|
619
633
|
// so other threads may join a parallel NDJSON scan
|
620
|
-
if (gstate.json_readers.size() <
|
634
|
+
if (gstate.json_readers.size() < gstate.system_threads) {
|
621
635
|
if (ReadAndAutoDetect(gstate, buffer_index, false)) {
|
622
636
|
continue;
|
623
637
|
}
|
@@ -637,7 +651,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
637
651
|
D_ASSERT(buffer_size != 0); // We should have read something if we got here
|
638
652
|
|
639
653
|
idx_t readers = 1;
|
640
|
-
if (current_reader->
|
654
|
+
if (current_reader->GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
|
641
655
|
readers = is_last ? 1 : 2;
|
642
656
|
}
|
643
657
|
|
@@ -650,7 +664,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
650
664
|
lines_or_objects_in_buffer = 0;
|
651
665
|
|
652
666
|
// YYJSON needs this
|
653
|
-
memset(
|
667
|
+
memset(buffer_ptr + buffer_size, 0, YYJSON_PADDING_SIZE);
|
654
668
|
|
655
669
|
return true;
|
656
670
|
}
|
@@ -680,7 +694,7 @@ bool JSONScanLocalState::ReadAndAutoDetect(JSONScanGlobalState &gstate, idx_t &b
|
|
680
694
|
throw InvalidInputException("Expected file \"%s\" to contain records, detected non-record JSON instead.",
|
681
695
|
current_reader->GetFileName());
|
682
696
|
}
|
683
|
-
if (!already_incremented_file_idx && !
|
697
|
+
if (!already_incremented_file_idx && !IsParallel(gstate)) {
|
684
698
|
gstate.file_index++;
|
685
699
|
}
|
686
700
|
return false;
|
@@ -739,13 +753,14 @@ void JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, idx_t
|
|
739
753
|
lock_guard<mutex> reader_guard(current_reader->lock);
|
740
754
|
buffer_index = current_reader->GetBufferIndex();
|
741
755
|
|
742
|
-
if (current_reader->IsOpen()) {
|
756
|
+
if (current_reader->IsOpen() && !current_reader->IsDone()) {
|
743
757
|
read_size = current_reader->GetFileHandle().Read(buffer_ptr + prev_buffer_remainder, request_size,
|
744
758
|
gstate.bind_data.type == JSONScanType::SAMPLE);
|
759
|
+
is_last = read_size < request_size;
|
745
760
|
} else {
|
746
761
|
read_size = 0;
|
762
|
+
is_last = false;
|
747
763
|
}
|
748
|
-
is_last = read_size < request_size;
|
749
764
|
|
750
765
|
if (!gstate.bind_data.ignore_errors && read_size == 0 && prev_buffer_remainder != 0) {
|
751
766
|
ThrowInvalidAtEndError();
|
@@ -796,13 +811,13 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
|
|
796
811
|
D_ASSERT(current_reader->GetFormat() == JSONFormat::NEWLINE_DELIMITED);
|
797
812
|
|
798
813
|
// Spinlock until the previous batch index has also read its buffer
|
799
|
-
JSONBufferHandle
|
814
|
+
optional_ptr<JSONBufferHandle> previous_buffer_handle;
|
800
815
|
while (!previous_buffer_handle) {
|
801
816
|
previous_buffer_handle = current_reader->GetBuffer(current_buffer_handle->buffer_index - 1);
|
802
817
|
}
|
803
818
|
|
804
819
|
// First we find the newline in the previous block
|
805
|
-
auto prev_buffer_ptr = (
|
820
|
+
auto prev_buffer_ptr = char_ptr_cast(previous_buffer_handle->buffer.get()) + previous_buffer_handle->buffer_size;
|
806
821
|
auto part1_ptr = PreviousNewline(prev_buffer_ptr);
|
807
822
|
auto part1_size = prev_buffer_ptr - part1_ptr;
|
808
823
|
|
@@ -825,7 +840,7 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
|
|
825
840
|
|
826
841
|
// And copy the remainder of the line to the reconstruct buffer
|
827
842
|
memcpy(reconstruct_ptr + part1_size, buffer_ptr, part2_size);
|
828
|
-
memset(
|
843
|
+
memset(reconstruct_ptr + line_size, 0, YYJSON_PADDING_SIZE);
|
829
844
|
buffer_offset += part2_size;
|
830
845
|
|
831
846
|
// We copied the object, so we are no longer reading the previous buffer
|
@@ -833,7 +848,7 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
|
|
833
848
|
current_reader->RemoveBuffer(current_buffer_handle->buffer_index - 1);
|
834
849
|
}
|
835
850
|
|
836
|
-
ParseJSON((
|
851
|
+
ParseJSON(char_ptr_cast(reconstruct_ptr), line_size, line_size);
|
837
852
|
}
|
838
853
|
|
839
854
|
void JSONScanLocalState::ParseNextChunk() {
|
@@ -867,7 +882,7 @@ void JSONScanLocalState::ParseNextChunk() {
|
|
867
882
|
}
|
868
883
|
|
869
884
|
idx_t json_size = json_end - json_start;
|
870
|
-
ParseJSON(
|
885
|
+
ParseJSON(json_start, json_size, remaining);
|
871
886
|
buffer_offset += json_size;
|
872
887
|
|
873
888
|
if (format == JSONFormat::ARRAY) {
|
@@ -6,8 +6,8 @@ namespace duckdb {
|
|
6
6
|
|
7
7
|
// FLAT, CONSTANT, DICTIONARY, SEQUENCE
|
8
8
|
struct TestVectorBindData : public TableFunctionData {
|
9
|
-
LogicalType
|
10
|
-
bool all_flat;
|
9
|
+
vector<LogicalType> types;
|
10
|
+
bool all_flat = false;
|
11
11
|
};
|
12
12
|
|
13
13
|
struct TestVectorTypesData : public GlobalTableFunctionState {
|
@@ -19,16 +19,41 @@ struct TestVectorTypesData : public GlobalTableFunctionState {
|
|
19
19
|
};
|
20
20
|
|
21
21
|
struct TestVectorInfo {
|
22
|
-
TestVectorInfo(const LogicalType &
|
22
|
+
TestVectorInfo(const vector<LogicalType> &types, const map<LogicalTypeId, TestType> &test_type_map,
|
23
23
|
vector<unique_ptr<DataChunk>> &entries)
|
24
|
-
:
|
24
|
+
: types(types), test_type_map(test_type_map), entries(entries) {
|
25
25
|
}
|
26
26
|
|
27
|
-
const LogicalType &
|
27
|
+
const vector<LogicalType> &types;
|
28
28
|
const map<LogicalTypeId, TestType> &test_type_map;
|
29
29
|
vector<unique_ptr<DataChunk>> &entries;
|
30
30
|
};
|
31
31
|
|
32
|
+
struct TestGeneratedValues {
|
33
|
+
public:
|
34
|
+
void AddColumn(vector<Value> values) {
|
35
|
+
if (!column_values.empty() && column_values[0].size() != values.size()) {
|
36
|
+
throw InternalException("Size mismatch when adding a column to TestGeneratedValues");
|
37
|
+
}
|
38
|
+
column_values.push_back(std::move(values));
|
39
|
+
}
|
40
|
+
|
41
|
+
const Value &GetValue(idx_t row, idx_t column) const {
|
42
|
+
return column_values[column][row];
|
43
|
+
}
|
44
|
+
|
45
|
+
idx_t Rows() const {
|
46
|
+
return column_values.empty() ? 0 : column_values[0].size();
|
47
|
+
}
|
48
|
+
|
49
|
+
idx_t Columns() const {
|
50
|
+
return column_values.size();
|
51
|
+
}
|
52
|
+
|
53
|
+
private:
|
54
|
+
vector<vector<Value>> column_values;
|
55
|
+
};
|
56
|
+
|
32
57
|
struct TestVectorFlat {
|
33
58
|
static constexpr const idx_t TEST_VECTOR_CARDINALITY = 3;
|
34
59
|
|
@@ -75,14 +100,25 @@ struct TestVectorFlat {
|
|
75
100
|
return result;
|
76
101
|
}
|
77
102
|
|
103
|
+
static TestGeneratedValues GenerateValues(TestVectorInfo &info) {
|
104
|
+
// generate the values for each column
|
105
|
+
TestGeneratedValues generated_values;
|
106
|
+
for (auto &type : info.types) {
|
107
|
+
generated_values.AddColumn(GenerateValues(info, type));
|
108
|
+
}
|
109
|
+
return generated_values;
|
110
|
+
}
|
111
|
+
|
78
112
|
static void Generate(TestVectorInfo &info) {
|
79
|
-
|
80
|
-
for (idx_t cur_row = 0; cur_row < result_values.
|
113
|
+
auto result_values = GenerateValues(info);
|
114
|
+
for (idx_t cur_row = 0; cur_row < result_values.Rows(); cur_row += STANDARD_VECTOR_SIZE) {
|
81
115
|
auto result = make_uniq<DataChunk>();
|
82
|
-
result->Initialize(Allocator::DefaultAllocator(),
|
83
|
-
auto cardinality = MinValue<idx_t>(STANDARD_VECTOR_SIZE, result_values.
|
84
|
-
for (idx_t
|
85
|
-
|
116
|
+
result->Initialize(Allocator::DefaultAllocator(), info.types);
|
117
|
+
auto cardinality = MinValue<idx_t>(STANDARD_VECTOR_SIZE, result_values.Rows() - cur_row);
|
118
|
+
for (idx_t c = 0; c < info.types.size(); c++) {
|
119
|
+
for (idx_t i = 0; i < cardinality; i++) {
|
120
|
+
result->data[c].SetValue(i, result_values.GetValue(cur_row + i, c));
|
121
|
+
}
|
86
122
|
}
|
87
123
|
result->SetCardinality(cardinality);
|
88
124
|
info.entries.push_back(std::move(result));
|
@@ -92,13 +128,15 @@ struct TestVectorFlat {
|
|
92
128
|
|
93
129
|
struct TestVectorConstant {
|
94
130
|
static void Generate(TestVectorInfo &info) {
|
95
|
-
auto values = TestVectorFlat::GenerateValues(info
|
131
|
+
auto values = TestVectorFlat::GenerateValues(info);
|
96
132
|
for (idx_t cur_row = 0; cur_row < TestVectorFlat::TEST_VECTOR_CARDINALITY; cur_row += STANDARD_VECTOR_SIZE) {
|
97
133
|
auto result = make_uniq<DataChunk>();
|
98
|
-
result->Initialize(Allocator::DefaultAllocator(),
|
134
|
+
result->Initialize(Allocator::DefaultAllocator(), info.types);
|
99
135
|
auto cardinality = MinValue<idx_t>(STANDARD_VECTOR_SIZE, TestVectorFlat::TEST_VECTOR_CARDINALITY - cur_row);
|
100
|
-
|
101
|
-
|
136
|
+
for (idx_t c = 0; c < info.types.size(); c++) {
|
137
|
+
result->data[c].SetValue(0, values.GetValue(0, c));
|
138
|
+
result->data[c].SetVectorType(VectorType::CONSTANT_VECTOR);
|
139
|
+
}
|
102
140
|
result->SetCardinality(cardinality);
|
103
141
|
|
104
142
|
info.entries.push_back(std::move(result));
|
@@ -160,9 +198,11 @@ struct TestVectorSequence {
|
|
160
198
|
static void Generate(TestVectorInfo &info) {
|
161
199
|
#if STANDARD_VECTOR_SIZE > 2
|
162
200
|
auto result = make_uniq<DataChunk>();
|
163
|
-
result->Initialize(Allocator::DefaultAllocator(),
|
201
|
+
result->Initialize(Allocator::DefaultAllocator(), info.types);
|
164
202
|
|
165
|
-
|
203
|
+
for (idx_t c = 0; c < info.types.size(); c++) {
|
204
|
+
GenerateVector(info, info.types[c], result->data[c]);
|
205
|
+
}
|
166
206
|
result->SetCardinality(3);
|
167
207
|
info.entries.push_back(std::move(result));
|
168
208
|
#endif
|
@@ -195,11 +235,23 @@ struct TestVectorDictionary {
|
|
195
235
|
static unique_ptr<FunctionData> TestVectorTypesBind(ClientContext &context, TableFunctionBindInput &input,
|
196
236
|
vector<LogicalType> &return_types, vector<string> &names) {
|
197
237
|
auto result = make_uniq<TestVectorBindData>();
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
238
|
+
for (idx_t i = 0; i < input.inputs.size(); i++) {
|
239
|
+
string name = "test_vector";
|
240
|
+
if (i > 0) {
|
241
|
+
name += to_string(i + 1);
|
242
|
+
}
|
243
|
+
auto &input_val = input.inputs[i];
|
244
|
+
names.emplace_back(name);
|
245
|
+
return_types.push_back(input_val.type());
|
246
|
+
result->types.push_back(input_val.type());
|
247
|
+
}
|
248
|
+
for (auto &entry : input.named_parameters) {
|
249
|
+
if (entry.first == "all_flat") {
|
250
|
+
result->all_flat = BooleanValue::Get(entry.second);
|
251
|
+
} else {
|
252
|
+
throw InternalException("Unrecognized named parameter for test_vector_types");
|
253
|
+
}
|
254
|
+
}
|
203
255
|
return std::move(result);
|
204
256
|
}
|
205
257
|
|
@@ -215,7 +267,7 @@ unique_ptr<GlobalTableFunctionState> TestVectorTypesInit(ClientContext &context,
|
|
215
267
|
test_type_map.insert(make_pair(test_type.type.id(), std::move(test_type)));
|
216
268
|
}
|
217
269
|
|
218
|
-
TestVectorInfo info(bind_data.
|
270
|
+
TestVectorInfo info(bind_data.types, test_type_map, result->entries);
|
219
271
|
TestVectorFlat::Generate(info);
|
220
272
|
TestVectorConstant::Generate(info);
|
221
273
|
TestVectorDictionary::Generate(info);
|
@@ -243,8 +295,12 @@ void TestVectorTypesFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
243
295
|
}
|
244
296
|
|
245
297
|
void TestVectorTypesFun::RegisterFunction(BuiltinFunctions &set) {
|
246
|
-
|
247
|
-
|
298
|
+
TableFunction test_vector_types("test_vector_types", {LogicalType::ANY}, TestVectorTypesFunction,
|
299
|
+
TestVectorTypesBind, TestVectorTypesInit);
|
300
|
+
test_vector_types.varargs = LogicalType::ANY;
|
301
|
+
test_vector_types.named_parameters["all_flat"] = LogicalType::BOOLEAN;
|
302
|
+
|
303
|
+
set.AddFunction(std::move(test_vector_types));
|
248
304
|
}
|
249
305
|
|
250
306
|
} // namespace duckdb
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.1-
|
2
|
+
#define DUCKDB_VERSION "0.8.1-dev276"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "98475f4555"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -348,17 +348,17 @@
|
|
348
348
|
|
349
349
|
#include "extension/icu/third_party/icu/i18n/wintzimpl.cpp"
|
350
350
|
|
351
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
|
352
|
-
|
353
351
|
#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
|
354
352
|
|
355
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
353
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
|
356
354
|
|
357
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
355
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp"
|
358
356
|
|
359
357
|
#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
|
360
358
|
|
359
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
|
360
|
+
|
361
361
|
#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp"
|
362
362
|
|
363
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
363
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp"
|
364
364
|
|