duckdb 0.7.2-dev3353.0 → 0.7.2-dev3441.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -3
  3. package/src/duckdb/extension/json/include/json_functions.hpp +5 -1
  4. package/src/duckdb/extension/json/include/json_scan.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
  6. package/src/duckdb/extension/json/json-extension.cpp +7 -3
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +16 -5
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +220 -93
  9. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -2
  10. package/src/duckdb/extension/json/json_functions/json_transform.cpp +283 -117
  11. package/src/duckdb/extension/json/json_functions/read_json.cpp +8 -6
  12. package/src/duckdb/extension/json/json_functions.cpp +17 -15
  13. package/src/duckdb/extension/json/json_scan.cpp +8 -4
  14. package/src/duckdb/extension/parquet/column_reader.cpp +6 -2
  15. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -2
  16. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +2 -2
  17. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -0
  18. package/src/duckdb/extension/parquet/include/thrift_tools.hpp +3 -5
  19. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -4
  20. package/src/duckdb/extension/parquet/parquet_reader.cpp +11 -22
  21. package/src/duckdb/extension/parquet/parquet_statistics.cpp +5 -0
  22. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -4
  23. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  24. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  25. package/src/duckdb/src/common/file_system.cpp +13 -20
  26. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +2 -2
  27. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
  28. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
  29. package/src/duckdb/src/execution/index/art/art.cpp +3 -1
  30. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  31. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -2
  34. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +4 -5
  35. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +1 -1
  36. package/src/duckdb/src/function/cast/cast_function_set.cpp +89 -25
  37. package/src/duckdb/src/function/pragma/pragma_queries.cpp +20 -15
  38. package/src/duckdb/src/function/table/copy_csv.cpp +4 -5
  39. package/src/duckdb/src/function/table/read_csv.cpp +6 -5
  40. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  41. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +0 -1
  42. package/src/duckdb/src/include/duckdb/common/file_system.hpp +7 -6
  43. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +118 -0
  44. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -2
  45. package/src/duckdb/src/include/duckdb/common/types/type_map.hpp +19 -1
  46. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +3 -2
  47. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/main/client_data.hpp +4 -0
  49. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
  50. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
  51. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
  52. package/src/duckdb/src/main/client_context.cpp +1 -4
  53. package/src/duckdb/src/main/client_data.cpp +19 -0
  54. package/src/duckdb/src/main/database.cpp +4 -1
  55. package/src/duckdb/src/main/extension/extension_install.cpp +5 -6
  56. package/src/duckdb/src/main/extension/extension_load.cpp +11 -16
  57. package/src/duckdb/src/main/settings/settings.cpp +2 -3
  58. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
  59. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
  60. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -35
  61. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
  62. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +7998 -7955
@@ -1,8 +1,10 @@
1
1
  #include "json_transform.hpp"
2
2
 
3
- #include "duckdb/common/types.hpp"
4
3
  #include "duckdb/common/enum_util.hpp"
4
+ #include "duckdb/common/types.hpp"
5
5
  #include "duckdb/execution/expression_executor.hpp"
6
+ #include "duckdb/function/cast/cast_function_set.hpp"
7
+ #include "duckdb/function/cast/default_casts.hpp"
6
8
  #include "duckdb/function/scalar/nested_functions.hpp"
7
9
  #include "json_functions.hpp"
8
10
  #include "json_scan.hpp"
@@ -23,7 +25,7 @@ void JSONTransformOptions::Serialize(FieldWriter &writer) {
23
25
  writer.WriteField(error_duplicate_key);
24
26
  writer.WriteField(error_missing_key);
25
27
  writer.WriteField(error_unknown_key);
26
- writer.WriteField(from_file);
28
+ writer.WriteField(delay_error);
27
29
  }
28
30
 
29
31
  void JSONTransformOptions::Deserialize(FieldReader &reader) {
@@ -31,7 +33,7 @@ void JSONTransformOptions::Deserialize(FieldReader &reader) {
31
33
  error_duplicate_key = reader.ReadRequired<bool>();
32
34
  error_missing_key = reader.ReadRequired<bool>();
33
35
  error_unknown_key = reader.ReadRequired<bool>();
34
- from_file = reader.ReadRequired<bool>();
36
+ delay_error = reader.ReadRequired<bool>();
35
37
  }
36
38
 
37
39
  //! Forward declaration for recursion
@@ -78,8 +80,8 @@ static LogicalType StructureStringToType(yyjson_val *val, ClientContext &context
78
80
  }
79
81
  }
80
82
 
81
- static duckdb::unique_ptr<FunctionData> JSONTransformBind(ClientContext &context, ScalarFunction &bound_function,
82
- vector<duckdb::unique_ptr<Expression>> &arguments) {
83
+ static unique_ptr<FunctionData> JSONTransformBind(ClientContext &context, ScalarFunction &bound_function,
84
+ vector<unique_ptr<Expression>> &arguments) {
83
85
  D_ASSERT(bound_function.arguments.size() == 2);
84
86
  if (arguments[1]->HasParameter()) {
85
87
  throw ParameterNotResolvedException();
@@ -217,19 +219,20 @@ static bool TransformNumerical(yyjson_val *vals[], Vector &result, const idx_t c
217
219
  auto data = (T *)FlatVector::GetData(result);
218
220
  auto &validity = FlatVector::Validity(result);
219
221
 
222
+ bool success = true;
220
223
  for (idx_t i = 0; i < count; i++) {
221
224
  const auto &val = vals[i];
222
225
  if (!val || unsafe_yyjson_is_null(val)) {
223
226
  validity.SetInvalid(i);
224
227
  } else if (!GetValueNumerical<T>(val, data[i], options)) {
225
228
  validity.SetInvalid(i);
226
- if (options.strict_cast) {
229
+ if (success && options.strict_cast) {
227
230
  options.object_index = i;
228
- return false;
231
+ success = false;
229
232
  }
230
233
  }
231
234
  }
232
- return true;
235
+ return success;
233
236
  }
234
237
 
235
238
  template <class T>
@@ -238,19 +241,20 @@ static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t cou
238
241
  auto data = (T *)FlatVector::GetData(result);
239
242
  auto &validity = FlatVector::Validity(result);
240
243
 
244
+ bool success = true;
241
245
  for (idx_t i = 0; i < count; i++) {
242
246
  const auto &val = vals[i];
243
247
  if (!val || unsafe_yyjson_is_null(val)) {
244
248
  validity.SetInvalid(i);
245
249
  } else if (!GetValueDecimal<T>(val, data[i], width, scale, options)) {
246
250
  validity.SetInvalid(i);
247
- if (options.strict_cast) {
251
+ if (success && options.strict_cast) {
248
252
  options.object_index = i;
249
- return false;
253
+ success = false;
250
254
  }
251
255
  }
252
256
  }
253
- return true;
257
+ return success;
254
258
  }
255
259
 
256
260
  bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
@@ -262,35 +266,46 @@ bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const
262
266
  auto &validity = FlatVector::Validity(string_vector);
263
267
  validity.SetAllValid(count);
264
268
 
269
+ bool success = true;
265
270
  for (idx_t i = 0; i < count; i++) {
266
271
  const auto &val = vals[i];
267
272
  if (!val || unsafe_yyjson_is_null(val)) {
268
273
  validity.SetInvalid(i);
269
- } else if (options.strict_cast && !unsafe_yyjson_is_str(val)) {
270
- options.error_message = StringUtil::Format("Unable to cast '%s' to " + EnumUtil::ToString(target.id()),
271
- JSONCommon::ValToString(val, 50));
272
- options.object_index = i;
273
- return false;
274
- } else {
275
- data[i] = GetString(val);
274
+ continue;
275
+ }
276
+
277
+ if (!unsafe_yyjson_is_str(val)) {
278
+ validity.SetInvalid(i);
279
+ if (success && options.strict_cast && !unsafe_yyjson_is_str(val)) {
280
+ options.error_message = StringUtil::Format("Unable to cast '%s' to " + EnumUtil::ToString(target.id()),
281
+ JSONCommon::ValToString(val, 50));
282
+ options.object_index = i;
283
+ success = false;
284
+ }
285
+ continue;
276
286
  }
287
+
288
+ data[i] = GetString(val);
277
289
  }
278
- return true;
290
+ return success;
279
291
  }
280
292
 
281
293
  static bool TransformFromString(yyjson_val *vals[], Vector &result, const idx_t count, JSONTransformOptions &options) {
282
294
  Vector string_vector(LogicalTypeId::VARCHAR, count);
295
+
296
+ bool success = true;
283
297
  if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
284
- return false;
298
+ success = false;
285
299
  }
286
300
 
287
301
  if (!VectorOperations::DefaultTryCast(string_vector, result, count, &options.error_message) &&
288
302
  options.strict_cast) {
289
303
  options.object_index = 0; // Can't get line number information here
290
- options.error_message += " (line/object number information is approximate)";
291
- return false;
304
+ options.error_message +=
305
+ "\n If this error occurred during read_json, line/object number information is approximate";
306
+ success = false;
292
307
  }
293
- return true;
308
+ return success;
294
309
  }
295
310
 
296
311
  template <class OP, class T>
@@ -302,13 +317,14 @@ static bool TransformStringWithFormat(Vector &string_vector, StrpTimeFormat &for
302
317
  auto target_vals = FlatVector::GetData<T>(result);
303
318
  auto &target_validity = FlatVector::Validity(result);
304
319
 
320
+ bool success = true;
305
321
  if (source_validity.AllValid()) {
306
322
  for (idx_t i = 0; i < count; i++) {
307
323
  if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
308
324
  target_validity.SetInvalid(i);
309
- if (options.strict_cast) {
325
+ if (success && options.strict_cast) {
310
326
  options.object_index = i;
311
- return false;
327
+ success = false;
312
328
  }
313
329
  }
314
330
  }
@@ -318,21 +334,22 @@ static bool TransformStringWithFormat(Vector &string_vector, StrpTimeFormat &for
318
334
  target_validity.SetInvalid(i);
319
335
  } else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
320
336
  target_validity.SetInvalid(i);
321
- if (options.strict_cast) {
337
+ if (success && options.strict_cast) {
322
338
  options.object_index = i;
323
- return false;
339
+ success = false;
324
340
  }
325
341
  }
326
342
  }
327
343
  }
328
- return true;
344
+ return success;
329
345
  }
330
346
 
331
347
  static bool TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, const idx_t count,
332
348
  JSONTransformOptions &options) {
333
349
  Vector string_vector(LogicalTypeId::VARCHAR, count);
350
+ bool success = true;
334
351
  if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
335
- return false;
352
+ success = false;
336
353
  }
337
354
 
338
355
  const auto &result_type = result.GetType().id();
@@ -340,12 +357,19 @@ static bool TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, co
340
357
 
341
358
  switch (result_type) {
342
359
  case LogicalTypeId::DATE:
343
- return TransformStringWithFormat<TryParseDate, date_t>(string_vector, format, count, result, options);
360
+ if (!TransformStringWithFormat<TryParseDate, date_t>(string_vector, format, count, result, options)) {
361
+ success = false;
362
+ }
363
+ break;
344
364
  case LogicalTypeId::TIMESTAMP:
345
- return TransformStringWithFormat<TryParseTimeStamp, timestamp_t>(string_vector, format, count, result, options);
365
+ if (!TransformStringWithFormat<TryParseTimeStamp, timestamp_t>(string_vector, format, count, result, options)) {
366
+ success = false;
367
+ }
368
+ break;
346
369
  default:
347
370
  throw InternalException("No date/timestamp formats for %s", EnumUtil::ToString(result.GetType().id()));
348
371
  }
372
+ return success;
349
373
  }
350
374
 
351
375
  static bool TransformToString(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
@@ -387,89 +411,103 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
387
411
  size_t idx, max;
388
412
  yyjson_val *key, *val;
389
413
  for (idx_t i = 0; i < count; i++) {
390
- if (objects[i] && !unsafe_yyjson_is_null(objects[i])) {
391
- if (!unsafe_yyjson_is_obj(objects[i]) && options.strict_cast) {
414
+ const auto &obj = objects[i];
415
+ if (!obj || unsafe_yyjson_is_null(obj)) {
416
+ // Set nested val to null so the recursion doesn't break
417
+ for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
418
+ nested_vals[col_idx][i] = nullptr;
419
+ }
420
+ continue;
421
+ }
422
+
423
+ if (!unsafe_yyjson_is_obj(obj)) {
424
+ // Set nested val to null so the recursion doesn't break
425
+ for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
426
+ nested_vals[col_idx][i] = nullptr;
427
+ }
428
+ if (success && options.strict_cast && obj) {
392
429
  options.error_message =
393
- StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(objects[i]),
394
- JSONCommon::ValToString(objects[i], 50));
430
+ StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(obj),
431
+ JSONCommon::ValToString(obj, 50));
395
432
  options.object_index = i;
396
433
  success = false;
397
- break;
398
434
  }
399
- found_key_count = 0;
400
- memset(found_keys, false, column_count);
401
- yyjson_obj_foreach(objects[i], idx, max, key, val) {
402
- auto key_ptr = unsafe_yyjson_get_str(key);
403
- auto key_len = unsafe_yyjson_get_len(key);
404
- auto it = key_map.find({key_ptr, key_len});
405
- if (it != key_map.end()) {
406
- const auto &col_idx = it->second;
407
- if (options.error_duplicate_key && found_keys[col_idx]) {
435
+ continue;
436
+ }
437
+
438
+ found_key_count = 0;
439
+ memset(found_keys, false, column_count);
440
+ yyjson_obj_foreach(objects[i], idx, max, key, val) {
441
+ auto key_ptr = unsafe_yyjson_get_str(key);
442
+ auto key_len = unsafe_yyjson_get_len(key);
443
+ auto it = key_map.find({key_ptr, key_len});
444
+ if (it != key_map.end()) {
445
+ const auto &col_idx = it->second;
446
+ if (found_keys[col_idx]) {
447
+ if (success && options.error_duplicate_key) {
408
448
  options.error_message =
409
449
  StringUtil::Format("Duplicate key \"" + string(key_ptr, key_len) + "\" in object %s",
410
450
  JSONCommon::ValToString(objects[i], 50));
411
451
  options.object_index = i;
412
452
  success = false;
413
- break;
414
453
  }
454
+ } else {
415
455
  nested_vals[col_idx][i] = val;
416
456
  found_keys[col_idx] = true;
417
457
  found_key_count++;
418
- } else if (options.error_unknown_key) {
419
- options.error_message =
420
- StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
421
- JSONCommon::ValToString(objects[i], 50));
422
- options.object_index = i;
423
- success = false;
424
- }
425
- }
426
- if (found_key_count != column_count) {
427
- // If 'error_missing_key, we throw an error if one of the keys was not found.
428
- // If not, we set the nested val to null so the recursion doesn't break
429
- for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
430
- if (!found_keys[col_idx]) {
431
- if (options.error_missing_key) {
432
- options.error_message =
433
- StringUtil::Format("Object %s does not have key \"" + names[col_idx] + "\"",
434
- JSONCommon::ValToString(objects[i], 50));
435
- options.object_index = i;
436
- success = false;
437
- } else {
438
- nested_vals[col_idx][i] = nullptr;
439
- }
440
- }
441
458
  }
459
+ } else if (success && options.error_unknown_key) {
460
+ options.error_message =
461
+ StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
462
+ JSONCommon::ValToString(objects[i], 50));
463
+ options.object_index = i;
464
+ success = false;
442
465
  }
443
- } else {
444
- // Set nested val to null so the recursion doesn't break
466
+ }
467
+
468
+ if (found_key_count != column_count) {
469
+ // If 'error_missing_key, we throw an error if one of the keys was not found.
470
+ // If not, we set the nested val to null so the recursion doesn't break
445
471
  for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
472
+ if (found_keys[col_idx]) {
473
+ continue;
474
+ }
446
475
  nested_vals[col_idx][i] = nullptr;
476
+
477
+ if (success && options.error_missing_key) {
478
+ options.error_message = StringUtil::Format("Object %s does not have key \"" + names[col_idx] + "\"",
479
+ JSONCommon::ValToString(objects[i], 50));
480
+ options.object_index = i;
481
+ success = false;
482
+ }
447
483
  }
448
484
  }
449
485
  }
450
486
 
451
- if (!success) {
452
- if (!options.from_file) {
453
- throw InvalidInputException(options.error_message);
487
+ for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
488
+ if (!JSONTransform::Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options)) {
489
+ success = false;
454
490
  }
455
- return false;
456
491
  }
457
492
 
458
- for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
459
- if (JSONTransform::Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options)) {
460
- continue;
461
- }
462
- if (!options.from_file) {
463
- throw InvalidInputException(options.error_message);
464
- }
465
- return false;
493
+ if (!options.delay_error && !success) {
494
+ throw InvalidInputException(options.error_message);
466
495
  }
467
496
 
468
497
  return success;
469
498
  }
470
499
 
471
500
  static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
472
- const LogicalType &type, JSONTransformOptions &options) {
501
+ JSONTransformOptions &options) {
502
+ // Set validity first
503
+ auto &result_validity = FlatVector::Validity(result);
504
+ for (idx_t i = 0; i < count; i++) {
505
+ const auto &obj = objects[i];
506
+ if (!obj || unsafe_yyjson_is_null(obj)) {
507
+ result_validity.SetInvalid(i);
508
+ }
509
+ }
510
+
473
511
  // Get child vectors and names
474
512
  auto &child_vs = StructVector::GetEntries(result);
475
513
  vector<string> child_names;
@@ -477,7 +515,7 @@ static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vect
477
515
  child_names.reserve(child_vs.size());
478
516
  child_vectors.reserve(child_vs.size());
479
517
  for (idx_t child_i = 0; child_i < child_vs.size(); child_i++) {
480
- child_names.push_back(StructType::GetChildName(type, child_i));
518
+ child_names.push_back(StructType::GetChildName(result.GetType(), child_i));
481
519
  child_vectors.push_back(child_vs[child_i].get());
482
520
  }
483
521
 
@@ -486,29 +524,35 @@ static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vect
486
524
 
487
525
  static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result, const idx_t count,
488
526
  JSONTransformOptions &options) {
527
+ bool success = true;
528
+
489
529
  // Initialize list vector
490
530
  auto list_entries = FlatVector::GetData<list_entry_t>(result);
491
531
  auto &list_validity = FlatVector::Validity(result);
492
532
  idx_t offset = 0;
493
533
  for (idx_t i = 0; i < count; i++) {
494
- if (!arrays[i] || unsafe_yyjson_is_null(arrays[i])) {
534
+ const auto &arr = arrays[i];
535
+ if (!arr || unsafe_yyjson_is_null(arr)) {
536
+ list_validity.SetInvalid(i);
537
+ continue;
538
+ }
539
+
540
+ if (!unsafe_yyjson_is_arr(arr)) {
495
541
  list_validity.SetInvalid(i);
496
- } else if (!unsafe_yyjson_is_arr(arrays[i])) {
497
- if (options.strict_cast) {
542
+ if (success && options.strict_cast) {
498
543
  options.error_message =
499
544
  StringUtil::Format("Expected ARRAY, but got %s: %s", JSONCommon::ValTypeToString(arrays[i]),
500
545
  JSONCommon::ValToString(arrays[i], 50));
501
546
  options.object_index = i;
502
- return false;
503
- } else {
504
- list_validity.SetInvalid(i);
547
+ success = false;
505
548
  }
506
- } else {
507
- auto &entry = list_entries[i];
508
- entry.offset = offset;
509
- entry.length = unsafe_yyjson_get_len(arrays[i]);
510
- offset += entry.length;
549
+ continue;
511
550
  }
551
+
552
+ auto &entry = list_entries[i];
553
+ entry.offset = offset;
554
+ entry.length = unsafe_yyjson_get_len(arr);
555
+ offset += entry.length;
512
556
  }
513
557
  ListVector::SetListSize(result, offset);
514
558
  ListVector::Reserve(result, offset);
@@ -522,8 +566,7 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
522
566
  idx_t list_i = 0;
523
567
  for (idx_t i = 0; i < count; i++) {
524
568
  if (!list_validity.RowIsValid(i)) {
525
- // We already marked this as invalid
526
- continue;
569
+ continue; // We already marked this as invalid
527
570
  }
528
571
  yyjson_arr_foreach(arrays[i], idx, max, val) {
529
572
  nested_vals[list_i] = val;
@@ -532,9 +575,7 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
532
575
  }
533
576
  D_ASSERT(list_i == offset);
534
577
 
535
- // Transform array values
536
- auto success = JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options);
537
- if (!success && options.from_file) {
578
+ if (!success) {
538
579
  // Set object index in case of error in nested list so we can get accurate line number information
539
580
  for (idx_t i = 0; i < count; i++) {
540
581
  if (!list_validity.RowIsValid(i)) {
@@ -546,6 +587,89 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
546
587
  }
547
588
  }
548
589
  }
590
+
591
+ // Transform array values
592
+ if (!JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options)) {
593
+ success = false;
594
+ }
595
+
596
+ if (!options.delay_error && !success) {
597
+ throw InvalidInputException(options.error_message);
598
+ }
599
+
600
+ return success;
601
+ }
602
+
603
+ static bool TransformObjectToMap(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
604
+ JSONTransformOptions &options) {
605
+ // Pre-allocate list vector
606
+ idx_t list_size = 0;
607
+ for (idx_t i = 0; i < count; i++) {
608
+ const auto &obj = objects[i];
609
+ if (!obj || !unsafe_yyjson_is_obj(obj)) {
610
+ continue;
611
+ }
612
+ list_size += unsafe_yyjson_get_len(obj);
613
+ }
614
+ ListVector::Reserve(result, list_size);
615
+ ListVector::SetListSize(result, list_size);
616
+
617
+ auto list_entries = FlatVector::GetData<list_entry_t>(result);
618
+ auto list_validity = FlatVector::Validity(result);
619
+
620
+ auto keys = (yyjson_val **)alc->malloc(alc->ctx, sizeof(yyjson_val *) * list_size);
621
+ auto vals = (yyjson_val **)alc->malloc(alc->ctx, sizeof(yyjson_val *) * list_size);
622
+
623
+ bool success = true;
624
+ idx_t list_offset = 0;
625
+
626
+ size_t idx, max;
627
+ yyjson_val *key, *val;
628
+ for (idx_t i = 0; i < count; i++) {
629
+ const auto &obj = objects[i];
630
+ if (!obj || unsafe_yyjson_is_null(obj)) {
631
+ list_validity.SetInvalid(i);
632
+ continue;
633
+ }
634
+
635
+ if (!unsafe_yyjson_is_obj(obj)) {
636
+ list_validity.SetInvalid(i);
637
+ if (success && options.strict_cast && !unsafe_yyjson_is_obj(obj)) {
638
+ options.error_message =
639
+ StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(obj),
640
+ JSONCommon::ValToString(obj, 50));
641
+ options.object_index = i;
642
+ success = false;
643
+ }
644
+ continue;
645
+ }
646
+
647
+ auto &list_entry = list_entries[i];
648
+ list_entry.offset = list_offset;
649
+ list_entry.length = unsafe_yyjson_get_len(obj);
650
+
651
+ yyjson_obj_foreach(obj, idx, max, key, val) {
652
+ keys[list_offset] = key;
653
+ vals[list_offset] = val;
654
+ list_offset++;
655
+ }
656
+ }
657
+
658
+ // Transform keys
659
+ if (!JSONTransform::Transform(keys, alc, MapVector::GetKeys(result), list_size, options)) {
660
+ throw ConversionException(
661
+ StringUtil::Format(options.error_message, ". Cannot default to NULL, because map keys cannot be NULL"));
662
+ }
663
+
664
+ // Transform values
665
+ if (!JSONTransform::Transform(vals, alc, MapVector::GetValues(result), list_size, options)) {
666
+ success = false;
667
+ }
668
+
669
+ if (!options.delay_error && !success) {
670
+ throw InvalidInputException(options.error_message);
671
+ }
672
+
549
673
  return success;
550
674
  }
551
675
 
@@ -636,27 +760,25 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
636
760
  case LogicalTypeId::BLOB:
637
761
  return TransformToString(vals, alc, result, count);
638
762
  case LogicalTypeId::STRUCT:
639
- return TransformObjectInternal(vals, alc, result, count, result_type, options);
763
+ return TransformObjectInternal(vals, alc, result, count, options);
640
764
  case LogicalTypeId::LIST:
641
765
  return TransformArray(vals, alc, result, count, options);
766
+ case LogicalTypeId::MAP:
767
+ return TransformObjectToMap(vals, alc, result, count, options);
642
768
  default:
643
769
  throw InternalException("Unexpected type at JSON Transform %s", result_type.ToString());
644
770
  }
645
771
  }
646
772
 
647
- template <bool strict>
648
- static void TransformFunction(DataChunk &args, ExpressionState &state, Vector &result) {
649
- auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
650
- auto alc = lstate.json_allocator.GetYYJSONAllocator();
651
-
652
- const auto count = args.size();
653
- auto &input = args.data[0];
773
+ static bool TransformFunctionInternal(Vector &input, const idx_t count, Vector &result, yyjson_alc *alc,
774
+ JSONTransformOptions &options) {
654
775
  UnifiedVectorFormat input_data;
655
776
  input.ToUnifiedFormat(count, input_data);
656
777
  auto inputs = (string_t *)input_data.data;
778
+
657
779
  // Read documents
658
- yyjson_doc *docs[STANDARD_VECTOR_SIZE];
659
- yyjson_val *vals[STANDARD_VECTOR_SIZE];
780
+ auto docs = (yyjson_doc **)alc->malloc(alc->ctx, sizeof(yyjson_doc *) * count);
781
+ auto vals = (yyjson_val **)alc->malloc(alc->ctx, sizeof(yyjson_val *) * count);
660
782
  auto &result_validity = FlatVector::Validity(result);
661
783
  for (idx_t i = 0; i < count; i++) {
662
784
  auto idx = input_data.sel->get_index(i);
@@ -670,14 +792,22 @@ static void TransformFunction(DataChunk &args, ExpressionState &state, Vector &r
670
792
  }
671
793
  }
672
794
 
673
- JSONTransformOptions options(strict, strict, strict, false);
674
-
675
- if (!JSONTransform::Transform(vals, alc, result, count, options)) {
676
- throw InvalidInputException(options.error_message);
795
+ auto success = JSONTransform::Transform(vals, alc, result, count, options);
796
+ if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
797
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
677
798
  }
678
799
 
679
- if (args.AllConstant()) {
680
- result.SetVectorType(VectorType::CONSTANT_VECTOR);
800
+ return success;
801
+ }
802
+
803
+ template <bool strict>
804
+ static void TransformFunction(DataChunk &args, ExpressionState &state, Vector &result) {
805
+ auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
806
+ auto alc = lstate.json_allocator.GetYYJSONAllocator();
807
+
808
+ JSONTransformOptions options(strict, strict, strict, false);
809
+ if (!TransformFunctionInternal(args.data[0], args.size(), result, alc, options)) {
810
+ throw InvalidInputException(options.error_message);
681
811
  }
682
812
  }
683
813
 
@@ -705,4 +835,40 @@ ScalarFunctionSet JSONFunctions::GetTransformStrictFunction() {
705
835
  return set;
706
836
  }
707
837
 
838
+ static bool JSONToAnyCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
839
+ auto &lstate = parameters.local_state->Cast<JSONFunctionLocalState>();
840
+ lstate.json_allocator.Reset();
841
+ auto alc = lstate.json_allocator.GetYYJSONAllocator();
842
+
843
+ JSONTransformOptions options(true, true, true, true);
844
+ options.delay_error = true;
845
+
846
+ auto success = TransformFunctionInternal(source, count, result, alc, options);
847
+ if (!success) {
848
+ HandleCastError::AssignError(options.error_message, parameters.error_message);
849
+ }
850
+ return success;
851
+ }
852
+
853
+ BoundCastInfo JSONToAnyCastBind(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
854
+ return BoundCastInfo(JSONToAnyCast, nullptr, JSONFunctionLocalState::InitCastLocalState);
855
+ }
856
+
857
+ void JSONFunctions::RegisterJSONTransformCastFunctions(CastFunctionSet &casts) {
858
+ auto json_to_any_cost = casts.ImplicitCastCost(JSONCommon::JSONType(), LogicalType::ANY);
859
+ casts.RegisterCastFunction(JSONCommon::JSONType(), LogicalType::ANY, JSONToAnyCastBind, json_to_any_cost);
860
+
861
+ const auto struct_type = LogicalType::STRUCT({{"any", LogicalType::ANY}});
862
+ auto json_to_struct_cost = casts.ImplicitCastCost(LogicalType::VARCHAR, struct_type) - 2;
863
+ casts.RegisterCastFunction(JSONCommon::JSONType(), struct_type, JSONToAnyCastBind, json_to_struct_cost);
864
+
865
+ const auto list_type = LogicalType::LIST(LogicalType::ANY);
866
+ auto json_to_list_cost = casts.ImplicitCastCost(LogicalType::VARCHAR, list_type) - 2;
867
+ casts.RegisterCastFunction(JSONCommon::JSONType(), list_type, JSONToAnyCastBind, json_to_list_cost);
868
+
869
+ const auto map_type = LogicalType::MAP(LogicalType::ANY, LogicalType::ANY);
870
+ auto json_to_map_cost = casts.ImplicitCastCost(LogicalType::VARCHAR, map_type) - 2;
871
+ casts.RegisterCastFunction(JSONCommon::JSONType(), map_type, JSONToAnyCastBind, json_to_map_cost);
872
+ }
873
+
708
874
  } // namespace duckdb
@@ -1,8 +1,8 @@
1
+ #include "duckdb/common/multi_file_reader.hpp"
1
2
  #include "json_functions.hpp"
2
3
  #include "json_scan.hpp"
3
4
  #include "json_structure.hpp"
4
5
  #include "json_transform.hpp"
5
- #include "duckdb/common/multi_file_reader.hpp"
6
6
 
7
7
  namespace duckdb {
8
8
 
@@ -223,7 +223,7 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
223
223
  transform_options.error_duplicate_key = !bind_data.ignore_errors;
224
224
  transform_options.error_missing_key = false;
225
225
  transform_options.error_unknown_key = bind_data.auto_detect && !bind_data.ignore_errors;
226
- transform_options.from_file = true;
226
+ transform_options.delay_error = true;
227
227
 
228
228
  return result;
229
229
  }
@@ -262,10 +262,12 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
262
262
  }
263
263
 
264
264
  if (!success) {
265
- string hint = gstate.bind_data.auto_detect
266
- ? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns' manually, "
267
- "specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true."
268
- : "\n Try specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true.";
265
+ string hint =
266
+ gstate.bind_data.auto_detect
267
+ ? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns', 'lines' or "
268
+ "'json_format' manually, or setting 'ignore_errors' to true."
269
+ : "\nTry setting 'auto_detect' to true, specifying 'lines' or 'json_format' manually, or setting "
270
+ "'ignore_errors' to true.";
269
271
  lstate.ThrowTransformError(lstate.transform_options.object_index,
270
272
  lstate.transform_options.error_message + hint);
271
273
  }