duckdb 0.7.2-dev3353.0 → 0.7.2-dev3441.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -3
- package/src/duckdb/extension/json/include/json_functions.hpp +5 -1
- package/src/duckdb/extension/json/include/json_scan.hpp +1 -0
- package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
- package/src/duckdb/extension/json/json-extension.cpp +7 -3
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +16 -5
- package/src/duckdb/extension/json/json_functions/json_create.cpp +220 -93
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -2
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +283 -117
- package/src/duckdb/extension/json/json_functions/read_json.cpp +8 -6
- package/src/duckdb/extension/json/json_functions.cpp +17 -15
- package/src/duckdb/extension/json/json_scan.cpp +8 -4
- package/src/duckdb/extension/parquet/column_reader.cpp +6 -2
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +2 -2
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/include/thrift_tools.hpp +3 -5
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -4
- package/src/duckdb/extension/parquet/parquet_reader.cpp +11 -22
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +5 -0
- package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/common/file_system.cpp +13 -20
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
- package/src/duckdb/src/execution/index/art/art.cpp +3 -1
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +4 -5
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +1 -1
- package/src/duckdb/src/function/cast/cast_function_set.cpp +89 -25
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +20 -15
- package/src/duckdb/src/function/table/copy_csv.cpp +4 -5
- package/src/duckdb/src/function/table/read_csv.cpp +6 -5
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +118 -0
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -2
- package/src/duckdb/src/include/duckdb/common/types/type_map.hpp +19 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
- package/src/duckdb/src/main/client_context.cpp +1 -4
- package/src/duckdb/src/main/client_data.cpp +19 -0
- package/src/duckdb/src/main/database.cpp +4 -1
- package/src/duckdb/src/main/extension/extension_install.cpp +5 -6
- package/src/duckdb/src/main/extension/extension_load.cpp +11 -16
- package/src/duckdb/src/main/settings/settings.cpp +2 -3
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -35
- package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +7998 -7955
@@ -1,8 +1,10 @@
|
|
1
1
|
#include "json_transform.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/common/types.hpp"
|
4
3
|
#include "duckdb/common/enum_util.hpp"
|
4
|
+
#include "duckdb/common/types.hpp"
|
5
5
|
#include "duckdb/execution/expression_executor.hpp"
|
6
|
+
#include "duckdb/function/cast/cast_function_set.hpp"
|
7
|
+
#include "duckdb/function/cast/default_casts.hpp"
|
6
8
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
7
9
|
#include "json_functions.hpp"
|
8
10
|
#include "json_scan.hpp"
|
@@ -23,7 +25,7 @@ void JSONTransformOptions::Serialize(FieldWriter &writer) {
|
|
23
25
|
writer.WriteField(error_duplicate_key);
|
24
26
|
writer.WriteField(error_missing_key);
|
25
27
|
writer.WriteField(error_unknown_key);
|
26
|
-
writer.WriteField(
|
28
|
+
writer.WriteField(delay_error);
|
27
29
|
}
|
28
30
|
|
29
31
|
void JSONTransformOptions::Deserialize(FieldReader &reader) {
|
@@ -31,7 +33,7 @@ void JSONTransformOptions::Deserialize(FieldReader &reader) {
|
|
31
33
|
error_duplicate_key = reader.ReadRequired<bool>();
|
32
34
|
error_missing_key = reader.ReadRequired<bool>();
|
33
35
|
error_unknown_key = reader.ReadRequired<bool>();
|
34
|
-
|
36
|
+
delay_error = reader.ReadRequired<bool>();
|
35
37
|
}
|
36
38
|
|
37
39
|
//! Forward declaration for recursion
|
@@ -78,8 +80,8 @@ static LogicalType StructureStringToType(yyjson_val *val, ClientContext &context
|
|
78
80
|
}
|
79
81
|
}
|
80
82
|
|
81
|
-
static
|
82
|
-
|
83
|
+
static unique_ptr<FunctionData> JSONTransformBind(ClientContext &context, ScalarFunction &bound_function,
|
84
|
+
vector<unique_ptr<Expression>> &arguments) {
|
83
85
|
D_ASSERT(bound_function.arguments.size() == 2);
|
84
86
|
if (arguments[1]->HasParameter()) {
|
85
87
|
throw ParameterNotResolvedException();
|
@@ -217,19 +219,20 @@ static bool TransformNumerical(yyjson_val *vals[], Vector &result, const idx_t c
|
|
217
219
|
auto data = (T *)FlatVector::GetData(result);
|
218
220
|
auto &validity = FlatVector::Validity(result);
|
219
221
|
|
222
|
+
bool success = true;
|
220
223
|
for (idx_t i = 0; i < count; i++) {
|
221
224
|
const auto &val = vals[i];
|
222
225
|
if (!val || unsafe_yyjson_is_null(val)) {
|
223
226
|
validity.SetInvalid(i);
|
224
227
|
} else if (!GetValueNumerical<T>(val, data[i], options)) {
|
225
228
|
validity.SetInvalid(i);
|
226
|
-
if (options.strict_cast) {
|
229
|
+
if (success && options.strict_cast) {
|
227
230
|
options.object_index = i;
|
228
|
-
|
231
|
+
success = false;
|
229
232
|
}
|
230
233
|
}
|
231
234
|
}
|
232
|
-
return
|
235
|
+
return success;
|
233
236
|
}
|
234
237
|
|
235
238
|
template <class T>
|
@@ -238,19 +241,20 @@ static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t cou
|
|
238
241
|
auto data = (T *)FlatVector::GetData(result);
|
239
242
|
auto &validity = FlatVector::Validity(result);
|
240
243
|
|
244
|
+
bool success = true;
|
241
245
|
for (idx_t i = 0; i < count; i++) {
|
242
246
|
const auto &val = vals[i];
|
243
247
|
if (!val || unsafe_yyjson_is_null(val)) {
|
244
248
|
validity.SetInvalid(i);
|
245
249
|
} else if (!GetValueDecimal<T>(val, data[i], width, scale, options)) {
|
246
250
|
validity.SetInvalid(i);
|
247
|
-
if (options.strict_cast) {
|
251
|
+
if (success && options.strict_cast) {
|
248
252
|
options.object_index = i;
|
249
|
-
|
253
|
+
success = false;
|
250
254
|
}
|
251
255
|
}
|
252
256
|
}
|
253
|
-
return
|
257
|
+
return success;
|
254
258
|
}
|
255
259
|
|
256
260
|
bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
|
@@ -262,35 +266,46 @@ bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const
|
|
262
266
|
auto &validity = FlatVector::Validity(string_vector);
|
263
267
|
validity.SetAllValid(count);
|
264
268
|
|
269
|
+
bool success = true;
|
265
270
|
for (idx_t i = 0; i < count; i++) {
|
266
271
|
const auto &val = vals[i];
|
267
272
|
if (!val || unsafe_yyjson_is_null(val)) {
|
268
273
|
validity.SetInvalid(i);
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
274
|
+
continue;
|
275
|
+
}
|
276
|
+
|
277
|
+
if (!unsafe_yyjson_is_str(val)) {
|
278
|
+
validity.SetInvalid(i);
|
279
|
+
if (success && options.strict_cast && !unsafe_yyjson_is_str(val)) {
|
280
|
+
options.error_message = StringUtil::Format("Unable to cast '%s' to " + EnumUtil::ToString(target.id()),
|
281
|
+
JSONCommon::ValToString(val, 50));
|
282
|
+
options.object_index = i;
|
283
|
+
success = false;
|
284
|
+
}
|
285
|
+
continue;
|
276
286
|
}
|
287
|
+
|
288
|
+
data[i] = GetString(val);
|
277
289
|
}
|
278
|
-
return
|
290
|
+
return success;
|
279
291
|
}
|
280
292
|
|
281
293
|
static bool TransformFromString(yyjson_val *vals[], Vector &result, const idx_t count, JSONTransformOptions &options) {
|
282
294
|
Vector string_vector(LogicalTypeId::VARCHAR, count);
|
295
|
+
|
296
|
+
bool success = true;
|
283
297
|
if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
|
284
|
-
|
298
|
+
success = false;
|
285
299
|
}
|
286
300
|
|
287
301
|
if (!VectorOperations::DefaultTryCast(string_vector, result, count, &options.error_message) &&
|
288
302
|
options.strict_cast) {
|
289
303
|
options.object_index = 0; // Can't get line number information here
|
290
|
-
options.error_message +=
|
291
|
-
|
304
|
+
options.error_message +=
|
305
|
+
"\n If this error occurred during read_json, line/object number information is approximate";
|
306
|
+
success = false;
|
292
307
|
}
|
293
|
-
return
|
308
|
+
return success;
|
294
309
|
}
|
295
310
|
|
296
311
|
template <class OP, class T>
|
@@ -302,13 +317,14 @@ static bool TransformStringWithFormat(Vector &string_vector, StrpTimeFormat &for
|
|
302
317
|
auto target_vals = FlatVector::GetData<T>(result);
|
303
318
|
auto &target_validity = FlatVector::Validity(result);
|
304
319
|
|
320
|
+
bool success = true;
|
305
321
|
if (source_validity.AllValid()) {
|
306
322
|
for (idx_t i = 0; i < count; i++) {
|
307
323
|
if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
|
308
324
|
target_validity.SetInvalid(i);
|
309
|
-
if (options.strict_cast) {
|
325
|
+
if (success && options.strict_cast) {
|
310
326
|
options.object_index = i;
|
311
|
-
|
327
|
+
success = false;
|
312
328
|
}
|
313
329
|
}
|
314
330
|
}
|
@@ -318,21 +334,22 @@ static bool TransformStringWithFormat(Vector &string_vector, StrpTimeFormat &for
|
|
318
334
|
target_validity.SetInvalid(i);
|
319
335
|
} else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
|
320
336
|
target_validity.SetInvalid(i);
|
321
|
-
if (options.strict_cast) {
|
337
|
+
if (success && options.strict_cast) {
|
322
338
|
options.object_index = i;
|
323
|
-
|
339
|
+
success = false;
|
324
340
|
}
|
325
341
|
}
|
326
342
|
}
|
327
343
|
}
|
328
|
-
return
|
344
|
+
return success;
|
329
345
|
}
|
330
346
|
|
331
347
|
static bool TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, const idx_t count,
|
332
348
|
JSONTransformOptions &options) {
|
333
349
|
Vector string_vector(LogicalTypeId::VARCHAR, count);
|
350
|
+
bool success = true;
|
334
351
|
if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
|
335
|
-
|
352
|
+
success = false;
|
336
353
|
}
|
337
354
|
|
338
355
|
const auto &result_type = result.GetType().id();
|
@@ -340,12 +357,19 @@ static bool TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, co
|
|
340
357
|
|
341
358
|
switch (result_type) {
|
342
359
|
case LogicalTypeId::DATE:
|
343
|
-
|
360
|
+
if (!TransformStringWithFormat<TryParseDate, date_t>(string_vector, format, count, result, options)) {
|
361
|
+
success = false;
|
362
|
+
}
|
363
|
+
break;
|
344
364
|
case LogicalTypeId::TIMESTAMP:
|
345
|
-
|
365
|
+
if (!TransformStringWithFormat<TryParseTimeStamp, timestamp_t>(string_vector, format, count, result, options)) {
|
366
|
+
success = false;
|
367
|
+
}
|
368
|
+
break;
|
346
369
|
default:
|
347
370
|
throw InternalException("No date/timestamp formats for %s", EnumUtil::ToString(result.GetType().id()));
|
348
371
|
}
|
372
|
+
return success;
|
349
373
|
}
|
350
374
|
|
351
375
|
static bool TransformToString(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
|
@@ -387,89 +411,103 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
|
|
387
411
|
size_t idx, max;
|
388
412
|
yyjson_val *key, *val;
|
389
413
|
for (idx_t i = 0; i < count; i++) {
|
390
|
-
|
391
|
-
|
414
|
+
const auto &obj = objects[i];
|
415
|
+
if (!obj || unsafe_yyjson_is_null(obj)) {
|
416
|
+
// Set nested val to null so the recursion doesn't break
|
417
|
+
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
418
|
+
nested_vals[col_idx][i] = nullptr;
|
419
|
+
}
|
420
|
+
continue;
|
421
|
+
}
|
422
|
+
|
423
|
+
if (!unsafe_yyjson_is_obj(obj)) {
|
424
|
+
// Set nested val to null so the recursion doesn't break
|
425
|
+
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
426
|
+
nested_vals[col_idx][i] = nullptr;
|
427
|
+
}
|
428
|
+
if (success && options.strict_cast && obj) {
|
392
429
|
options.error_message =
|
393
|
-
StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(
|
394
|
-
JSONCommon::ValToString(
|
430
|
+
StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(obj),
|
431
|
+
JSONCommon::ValToString(obj, 50));
|
395
432
|
options.object_index = i;
|
396
433
|
success = false;
|
397
|
-
break;
|
398
434
|
}
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
435
|
+
continue;
|
436
|
+
}
|
437
|
+
|
438
|
+
found_key_count = 0;
|
439
|
+
memset(found_keys, false, column_count);
|
440
|
+
yyjson_obj_foreach(objects[i], idx, max, key, val) {
|
441
|
+
auto key_ptr = unsafe_yyjson_get_str(key);
|
442
|
+
auto key_len = unsafe_yyjson_get_len(key);
|
443
|
+
auto it = key_map.find({key_ptr, key_len});
|
444
|
+
if (it != key_map.end()) {
|
445
|
+
const auto &col_idx = it->second;
|
446
|
+
if (found_keys[col_idx]) {
|
447
|
+
if (success && options.error_duplicate_key) {
|
408
448
|
options.error_message =
|
409
449
|
StringUtil::Format("Duplicate key \"" + string(key_ptr, key_len) + "\" in object %s",
|
410
450
|
JSONCommon::ValToString(objects[i], 50));
|
411
451
|
options.object_index = i;
|
412
452
|
success = false;
|
413
|
-
break;
|
414
453
|
}
|
454
|
+
} else {
|
415
455
|
nested_vals[col_idx][i] = val;
|
416
456
|
found_keys[col_idx] = true;
|
417
457
|
found_key_count++;
|
418
|
-
} else if (options.error_unknown_key) {
|
419
|
-
options.error_message =
|
420
|
-
StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
|
421
|
-
JSONCommon::ValToString(objects[i], 50));
|
422
|
-
options.object_index = i;
|
423
|
-
success = false;
|
424
|
-
}
|
425
|
-
}
|
426
|
-
if (found_key_count != column_count) {
|
427
|
-
// If 'error_missing_key, we throw an error if one of the keys was not found.
|
428
|
-
// If not, we set the nested val to null so the recursion doesn't break
|
429
|
-
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
430
|
-
if (!found_keys[col_idx]) {
|
431
|
-
if (options.error_missing_key) {
|
432
|
-
options.error_message =
|
433
|
-
StringUtil::Format("Object %s does not have key \"" + names[col_idx] + "\"",
|
434
|
-
JSONCommon::ValToString(objects[i], 50));
|
435
|
-
options.object_index = i;
|
436
|
-
success = false;
|
437
|
-
} else {
|
438
|
-
nested_vals[col_idx][i] = nullptr;
|
439
|
-
}
|
440
|
-
}
|
441
458
|
}
|
459
|
+
} else if (success && options.error_unknown_key) {
|
460
|
+
options.error_message =
|
461
|
+
StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
|
462
|
+
JSONCommon::ValToString(objects[i], 50));
|
463
|
+
options.object_index = i;
|
464
|
+
success = false;
|
442
465
|
}
|
443
|
-
}
|
444
|
-
|
466
|
+
}
|
467
|
+
|
468
|
+
if (found_key_count != column_count) {
|
469
|
+
// If 'error_missing_key, we throw an error if one of the keys was not found.
|
470
|
+
// If not, we set the nested val to null so the recursion doesn't break
|
445
471
|
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
472
|
+
if (found_keys[col_idx]) {
|
473
|
+
continue;
|
474
|
+
}
|
446
475
|
nested_vals[col_idx][i] = nullptr;
|
476
|
+
|
477
|
+
if (success && options.error_missing_key) {
|
478
|
+
options.error_message = StringUtil::Format("Object %s does not have key \"" + names[col_idx] + "\"",
|
479
|
+
JSONCommon::ValToString(objects[i], 50));
|
480
|
+
options.object_index = i;
|
481
|
+
success = false;
|
482
|
+
}
|
447
483
|
}
|
448
484
|
}
|
449
485
|
}
|
450
486
|
|
451
|
-
|
452
|
-
if (!options
|
453
|
-
|
487
|
+
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
488
|
+
if (!JSONTransform::Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options)) {
|
489
|
+
success = false;
|
454
490
|
}
|
455
|
-
return false;
|
456
491
|
}
|
457
492
|
|
458
|
-
|
459
|
-
|
460
|
-
continue;
|
461
|
-
}
|
462
|
-
if (!options.from_file) {
|
463
|
-
throw InvalidInputException(options.error_message);
|
464
|
-
}
|
465
|
-
return false;
|
493
|
+
if (!options.delay_error && !success) {
|
494
|
+
throw InvalidInputException(options.error_message);
|
466
495
|
}
|
467
496
|
|
468
497
|
return success;
|
469
498
|
}
|
470
499
|
|
471
500
|
static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
|
472
|
-
|
501
|
+
JSONTransformOptions &options) {
|
502
|
+
// Set validity first
|
503
|
+
auto &result_validity = FlatVector::Validity(result);
|
504
|
+
for (idx_t i = 0; i < count; i++) {
|
505
|
+
const auto &obj = objects[i];
|
506
|
+
if (!obj || unsafe_yyjson_is_null(obj)) {
|
507
|
+
result_validity.SetInvalid(i);
|
508
|
+
}
|
509
|
+
}
|
510
|
+
|
473
511
|
// Get child vectors and names
|
474
512
|
auto &child_vs = StructVector::GetEntries(result);
|
475
513
|
vector<string> child_names;
|
@@ -477,7 +515,7 @@ static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vect
|
|
477
515
|
child_names.reserve(child_vs.size());
|
478
516
|
child_vectors.reserve(child_vs.size());
|
479
517
|
for (idx_t child_i = 0; child_i < child_vs.size(); child_i++) {
|
480
|
-
child_names.push_back(StructType::GetChildName(
|
518
|
+
child_names.push_back(StructType::GetChildName(result.GetType(), child_i));
|
481
519
|
child_vectors.push_back(child_vs[child_i].get());
|
482
520
|
}
|
483
521
|
|
@@ -486,29 +524,35 @@ static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vect
|
|
486
524
|
|
487
525
|
static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result, const idx_t count,
|
488
526
|
JSONTransformOptions &options) {
|
527
|
+
bool success = true;
|
528
|
+
|
489
529
|
// Initialize list vector
|
490
530
|
auto list_entries = FlatVector::GetData<list_entry_t>(result);
|
491
531
|
auto &list_validity = FlatVector::Validity(result);
|
492
532
|
idx_t offset = 0;
|
493
533
|
for (idx_t i = 0; i < count; i++) {
|
494
|
-
|
534
|
+
const auto &arr = arrays[i];
|
535
|
+
if (!arr || unsafe_yyjson_is_null(arr)) {
|
536
|
+
list_validity.SetInvalid(i);
|
537
|
+
continue;
|
538
|
+
}
|
539
|
+
|
540
|
+
if (!unsafe_yyjson_is_arr(arr)) {
|
495
541
|
list_validity.SetInvalid(i);
|
496
|
-
|
497
|
-
if (options.strict_cast) {
|
542
|
+
if (success && options.strict_cast) {
|
498
543
|
options.error_message =
|
499
544
|
StringUtil::Format("Expected ARRAY, but got %s: %s", JSONCommon::ValTypeToString(arrays[i]),
|
500
545
|
JSONCommon::ValToString(arrays[i], 50));
|
501
546
|
options.object_index = i;
|
502
|
-
|
503
|
-
} else {
|
504
|
-
list_validity.SetInvalid(i);
|
547
|
+
success = false;
|
505
548
|
}
|
506
|
-
|
507
|
-
auto &entry = list_entries[i];
|
508
|
-
entry.offset = offset;
|
509
|
-
entry.length = unsafe_yyjson_get_len(arrays[i]);
|
510
|
-
offset += entry.length;
|
549
|
+
continue;
|
511
550
|
}
|
551
|
+
|
552
|
+
auto &entry = list_entries[i];
|
553
|
+
entry.offset = offset;
|
554
|
+
entry.length = unsafe_yyjson_get_len(arr);
|
555
|
+
offset += entry.length;
|
512
556
|
}
|
513
557
|
ListVector::SetListSize(result, offset);
|
514
558
|
ListVector::Reserve(result, offset);
|
@@ -522,8 +566,7 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
522
566
|
idx_t list_i = 0;
|
523
567
|
for (idx_t i = 0; i < count; i++) {
|
524
568
|
if (!list_validity.RowIsValid(i)) {
|
525
|
-
// We already marked this as invalid
|
526
|
-
continue;
|
569
|
+
continue; // We already marked this as invalid
|
527
570
|
}
|
528
571
|
yyjson_arr_foreach(arrays[i], idx, max, val) {
|
529
572
|
nested_vals[list_i] = val;
|
@@ -532,9 +575,7 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
532
575
|
}
|
533
576
|
D_ASSERT(list_i == offset);
|
534
577
|
|
535
|
-
|
536
|
-
auto success = JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options);
|
537
|
-
if (!success && options.from_file) {
|
578
|
+
if (!success) {
|
538
579
|
// Set object index in case of error in nested list so we can get accurate line number information
|
539
580
|
for (idx_t i = 0; i < count; i++) {
|
540
581
|
if (!list_validity.RowIsValid(i)) {
|
@@ -546,6 +587,89 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
546
587
|
}
|
547
588
|
}
|
548
589
|
}
|
590
|
+
|
591
|
+
// Transform array values
|
592
|
+
if (!JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options)) {
|
593
|
+
success = false;
|
594
|
+
}
|
595
|
+
|
596
|
+
if (!options.delay_error && !success) {
|
597
|
+
throw InvalidInputException(options.error_message);
|
598
|
+
}
|
599
|
+
|
600
|
+
return success;
|
601
|
+
}
|
602
|
+
|
603
|
+
static bool TransformObjectToMap(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
|
604
|
+
JSONTransformOptions &options) {
|
605
|
+
// Pre-allocate list vector
|
606
|
+
idx_t list_size = 0;
|
607
|
+
for (idx_t i = 0; i < count; i++) {
|
608
|
+
const auto &obj = objects[i];
|
609
|
+
if (!obj || !unsafe_yyjson_is_obj(obj)) {
|
610
|
+
continue;
|
611
|
+
}
|
612
|
+
list_size += unsafe_yyjson_get_len(obj);
|
613
|
+
}
|
614
|
+
ListVector::Reserve(result, list_size);
|
615
|
+
ListVector::SetListSize(result, list_size);
|
616
|
+
|
617
|
+
auto list_entries = FlatVector::GetData<list_entry_t>(result);
|
618
|
+
auto list_validity = FlatVector::Validity(result);
|
619
|
+
|
620
|
+
auto keys = (yyjson_val **)alc->malloc(alc->ctx, sizeof(yyjson_val *) * list_size);
|
621
|
+
auto vals = (yyjson_val **)alc->malloc(alc->ctx, sizeof(yyjson_val *) * list_size);
|
622
|
+
|
623
|
+
bool success = true;
|
624
|
+
idx_t list_offset = 0;
|
625
|
+
|
626
|
+
size_t idx, max;
|
627
|
+
yyjson_val *key, *val;
|
628
|
+
for (idx_t i = 0; i < count; i++) {
|
629
|
+
const auto &obj = objects[i];
|
630
|
+
if (!obj || unsafe_yyjson_is_null(obj)) {
|
631
|
+
list_validity.SetInvalid(i);
|
632
|
+
continue;
|
633
|
+
}
|
634
|
+
|
635
|
+
if (!unsafe_yyjson_is_obj(obj)) {
|
636
|
+
list_validity.SetInvalid(i);
|
637
|
+
if (success && options.strict_cast && !unsafe_yyjson_is_obj(obj)) {
|
638
|
+
options.error_message =
|
639
|
+
StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(obj),
|
640
|
+
JSONCommon::ValToString(obj, 50));
|
641
|
+
options.object_index = i;
|
642
|
+
success = false;
|
643
|
+
}
|
644
|
+
continue;
|
645
|
+
}
|
646
|
+
|
647
|
+
auto &list_entry = list_entries[i];
|
648
|
+
list_entry.offset = list_offset;
|
649
|
+
list_entry.length = unsafe_yyjson_get_len(obj);
|
650
|
+
|
651
|
+
yyjson_obj_foreach(obj, idx, max, key, val) {
|
652
|
+
keys[list_offset] = key;
|
653
|
+
vals[list_offset] = val;
|
654
|
+
list_offset++;
|
655
|
+
}
|
656
|
+
}
|
657
|
+
|
658
|
+
// Transform keys
|
659
|
+
if (!JSONTransform::Transform(keys, alc, MapVector::GetKeys(result), list_size, options)) {
|
660
|
+
throw ConversionException(
|
661
|
+
StringUtil::Format(options.error_message, ". Cannot default to NULL, because map keys cannot be NULL"));
|
662
|
+
}
|
663
|
+
|
664
|
+
// Transform values
|
665
|
+
if (!JSONTransform::Transform(vals, alc, MapVector::GetValues(result), list_size, options)) {
|
666
|
+
success = false;
|
667
|
+
}
|
668
|
+
|
669
|
+
if (!options.delay_error && !success) {
|
670
|
+
throw InvalidInputException(options.error_message);
|
671
|
+
}
|
672
|
+
|
549
673
|
return success;
|
550
674
|
}
|
551
675
|
|
@@ -636,27 +760,25 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
|
|
636
760
|
case LogicalTypeId::BLOB:
|
637
761
|
return TransformToString(vals, alc, result, count);
|
638
762
|
case LogicalTypeId::STRUCT:
|
639
|
-
return TransformObjectInternal(vals, alc, result, count,
|
763
|
+
return TransformObjectInternal(vals, alc, result, count, options);
|
640
764
|
case LogicalTypeId::LIST:
|
641
765
|
return TransformArray(vals, alc, result, count, options);
|
766
|
+
case LogicalTypeId::MAP:
|
767
|
+
return TransformObjectToMap(vals, alc, result, count, options);
|
642
768
|
default:
|
643
769
|
throw InternalException("Unexpected type at JSON Transform %s", result_type.ToString());
|
644
770
|
}
|
645
771
|
}
|
646
772
|
|
647
|
-
|
648
|
-
|
649
|
-
auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
|
650
|
-
auto alc = lstate.json_allocator.GetYYJSONAllocator();
|
651
|
-
|
652
|
-
const auto count = args.size();
|
653
|
-
auto &input = args.data[0];
|
773
|
+
static bool TransformFunctionInternal(Vector &input, const idx_t count, Vector &result, yyjson_alc *alc,
|
774
|
+
JSONTransformOptions &options) {
|
654
775
|
UnifiedVectorFormat input_data;
|
655
776
|
input.ToUnifiedFormat(count, input_data);
|
656
777
|
auto inputs = (string_t *)input_data.data;
|
778
|
+
|
657
779
|
// Read documents
|
658
|
-
yyjson_doc *
|
659
|
-
yyjson_val *
|
780
|
+
auto docs = (yyjson_doc **)alc->malloc(alc->ctx, sizeof(yyjson_doc *) * count);
|
781
|
+
auto vals = (yyjson_val **)alc->malloc(alc->ctx, sizeof(yyjson_val *) * count);
|
660
782
|
auto &result_validity = FlatVector::Validity(result);
|
661
783
|
for (idx_t i = 0; i < count; i++) {
|
662
784
|
auto idx = input_data.sel->get_index(i);
|
@@ -670,14 +792,22 @@ static void TransformFunction(DataChunk &args, ExpressionState &state, Vector &r
|
|
670
792
|
}
|
671
793
|
}
|
672
794
|
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
throw InvalidInputException(options.error_message);
|
795
|
+
auto success = JSONTransform::Transform(vals, alc, result, count, options);
|
796
|
+
if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
797
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
677
798
|
}
|
678
799
|
|
679
|
-
|
680
|
-
|
800
|
+
return success;
|
801
|
+
}
|
802
|
+
|
803
|
+
template <bool strict>
|
804
|
+
static void TransformFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
805
|
+
auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
|
806
|
+
auto alc = lstate.json_allocator.GetYYJSONAllocator();
|
807
|
+
|
808
|
+
JSONTransformOptions options(strict, strict, strict, false);
|
809
|
+
if (!TransformFunctionInternal(args.data[0], args.size(), result, alc, options)) {
|
810
|
+
throw InvalidInputException(options.error_message);
|
681
811
|
}
|
682
812
|
}
|
683
813
|
|
@@ -705,4 +835,40 @@ ScalarFunctionSet JSONFunctions::GetTransformStrictFunction() {
|
|
705
835
|
return set;
|
706
836
|
}
|
707
837
|
|
838
|
+
static bool JSONToAnyCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
839
|
+
auto &lstate = parameters.local_state->Cast<JSONFunctionLocalState>();
|
840
|
+
lstate.json_allocator.Reset();
|
841
|
+
auto alc = lstate.json_allocator.GetYYJSONAllocator();
|
842
|
+
|
843
|
+
JSONTransformOptions options(true, true, true, true);
|
844
|
+
options.delay_error = true;
|
845
|
+
|
846
|
+
auto success = TransformFunctionInternal(source, count, result, alc, options);
|
847
|
+
if (!success) {
|
848
|
+
HandleCastError::AssignError(options.error_message, parameters.error_message);
|
849
|
+
}
|
850
|
+
return success;
|
851
|
+
}
|
852
|
+
|
853
|
+
BoundCastInfo JSONToAnyCastBind(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
854
|
+
return BoundCastInfo(JSONToAnyCast, nullptr, JSONFunctionLocalState::InitCastLocalState);
|
855
|
+
}
|
856
|
+
|
857
|
+
void JSONFunctions::RegisterJSONTransformCastFunctions(CastFunctionSet &casts) {
|
858
|
+
auto json_to_any_cost = casts.ImplicitCastCost(JSONCommon::JSONType(), LogicalType::ANY);
|
859
|
+
casts.RegisterCastFunction(JSONCommon::JSONType(), LogicalType::ANY, JSONToAnyCastBind, json_to_any_cost);
|
860
|
+
|
861
|
+
const auto struct_type = LogicalType::STRUCT({{"any", LogicalType::ANY}});
|
862
|
+
auto json_to_struct_cost = casts.ImplicitCastCost(LogicalType::VARCHAR, struct_type) - 2;
|
863
|
+
casts.RegisterCastFunction(JSONCommon::JSONType(), struct_type, JSONToAnyCastBind, json_to_struct_cost);
|
864
|
+
|
865
|
+
const auto list_type = LogicalType::LIST(LogicalType::ANY);
|
866
|
+
auto json_to_list_cost = casts.ImplicitCastCost(LogicalType::VARCHAR, list_type) - 2;
|
867
|
+
casts.RegisterCastFunction(JSONCommon::JSONType(), list_type, JSONToAnyCastBind, json_to_list_cost);
|
868
|
+
|
869
|
+
const auto map_type = LogicalType::MAP(LogicalType::ANY, LogicalType::ANY);
|
870
|
+
auto json_to_map_cost = casts.ImplicitCastCost(LogicalType::VARCHAR, map_type) - 2;
|
871
|
+
casts.RegisterCastFunction(JSONCommon::JSONType(), map_type, JSONToAnyCastBind, json_to_map_cost);
|
872
|
+
}
|
873
|
+
|
708
874
|
} // namespace duckdb
|
@@ -1,8 +1,8 @@
|
|
1
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
1
2
|
#include "json_functions.hpp"
|
2
3
|
#include "json_scan.hpp"
|
3
4
|
#include "json_structure.hpp"
|
4
5
|
#include "json_transform.hpp"
|
5
|
-
#include "duckdb/common/multi_file_reader.hpp"
|
6
6
|
|
7
7
|
namespace duckdb {
|
8
8
|
|
@@ -223,7 +223,7 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
|
|
223
223
|
transform_options.error_duplicate_key = !bind_data.ignore_errors;
|
224
224
|
transform_options.error_missing_key = false;
|
225
225
|
transform_options.error_unknown_key = bind_data.auto_detect && !bind_data.ignore_errors;
|
226
|
-
transform_options.
|
226
|
+
transform_options.delay_error = true;
|
227
227
|
|
228
228
|
return result;
|
229
229
|
}
|
@@ -262,10 +262,12 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
262
262
|
}
|
263
263
|
|
264
264
|
if (!success) {
|
265
|
-
string hint =
|
266
|
-
|
267
|
-
|
268
|
-
|
265
|
+
string hint =
|
266
|
+
gstate.bind_data.auto_detect
|
267
|
+
? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns', 'lines' or "
|
268
|
+
"'json_format' manually, or setting 'ignore_errors' to true."
|
269
|
+
: "\nTry setting 'auto_detect' to true, specifying 'lines' or 'json_format' manually, or setting "
|
270
|
+
"'ignore_errors' to true.";
|
269
271
|
lstate.ThrowTransformError(lstate.transform_options.object_index,
|
270
272
|
lstate.transform_options.error_message + hint);
|
271
273
|
}
|