duckdb 0.7.1-dev90.0 → 0.7.2-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +1 -1
  2. package/binding.gyp +7 -7
  3. package/package.json +3 -3
  4. package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
  5. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
  6. package/src/duckdb/extension/json/include/json_scan.hpp +45 -10
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
  9. package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
  10. package/src/duckdb/extension/json/json_functions/json_transform.cpp +54 -10
  11. package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
  12. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
  13. package/src/duckdb/extension/json/json_functions.cpp +7 -0
  14. package/src/duckdb/extension/json/json_scan.cpp +144 -38
  15. package/src/duckdb/extension/parquet/column_reader.cpp +7 -0
  16. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -0
  17. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -10
  18. package/src/duckdb/src/catalog/catalog.cpp +62 -13
  19. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
  20. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -1
  21. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  22. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  23. package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
  24. package/src/duckdb/src/common/bind_helpers.cpp +55 -0
  25. package/src/duckdb/src/common/file_system.cpp +23 -9
  26. package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
  27. package/src/duckdb/src/common/local_file_system.cpp +4 -4
  28. package/src/duckdb/src/common/string_util.cpp +8 -4
  29. package/src/duckdb/src/common/types/partitioned_column_data.cpp +1 -0
  30. package/src/duckdb/src/common/types.cpp +37 -11
  31. package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
  32. package/src/duckdb/src/execution/index/art/art.cpp +117 -67
  33. package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
  34. package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
  35. package/src/duckdb/src/execution/index/art/node.cpp +13 -27
  36. package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
  37. package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
  38. package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
  39. package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
  40. package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -27
  42. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -9
  43. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -9
  44. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
  45. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +9 -0
  46. package/src/duckdb/src/execution/physical_operator.cpp +6 -6
  47. package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
  48. package/src/duckdb/src/function/scalar/generic/current_setting.cpp +2 -2
  49. package/src/duckdb/src/function/scalar/list/array_slice.cpp +2 -3
  50. package/src/duckdb/src/function/scalar/map/map.cpp +69 -21
  51. package/src/duckdb/src/function/scalar/string/like.cpp +6 -3
  52. package/src/duckdb/src/function/table/read_csv.cpp +16 -5
  53. package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
  54. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  55. package/src/duckdb/src/function/table/table_scan.cpp +3 -0
  56. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  57. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +7 -1
  58. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  60. package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
  61. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
  65. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
  66. package/src/duckdb/src/include/duckdb/common/string_util.hpp +9 -2
  67. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
  68. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
  69. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  70. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -1
  71. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  72. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
  73. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/main/{extension_functions.hpp → extension_entries.hpp} +27 -5
  75. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +11 -1
  76. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  77. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
  78. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
  79. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
  80. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  81. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
  82. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -0
  84. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
  85. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
  86. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
  87. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
  88. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
  89. package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
  90. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
  91. package/src/duckdb/src/main/client_context.cpp +2 -0
  92. package/src/duckdb/src/main/config.cpp +1 -0
  93. package/src/duckdb/src/main/database.cpp +14 -5
  94. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
  95. package/src/duckdb/src/main/extension/extension_helper.cpp +15 -0
  96. package/src/duckdb/src/main/extension/extension_install.cpp +60 -16
  97. package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
  98. package/src/duckdb/src/main/settings/settings.cpp +16 -0
  99. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
  100. package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
  101. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
  102. package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
  103. package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
  104. package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
  105. package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
  106. package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
  107. package/src/duckdb/src/planner/bind_context.cpp +1 -1
  108. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
  109. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -14
  110. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
  111. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
  112. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
  113. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
  114. package/src/duckdb/src/planner/logical_operator.cpp +4 -1
  115. package/src/duckdb/src/storage/buffer_manager.cpp +105 -26
  116. package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
  117. package/src/duckdb/src/storage/data_table.cpp +66 -3
  118. package/src/duckdb/src/storage/index.cpp +1 -1
  119. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  120. package/src/duckdb/src/storage/table_index_list.cpp +1 -2
  121. package/src/duckdb/src/storage/wal_replay.cpp +68 -0
  122. package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
  123. package/src/duckdb/src/transaction/commit_state.cpp +5 -2
  124. package/src/duckdb/third_party/concurrentqueue/blockingconcurrentqueue.h +2 -2
  125. package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
  126. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  127. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  128. package/src/statement.cpp +46 -12
  129. package/test/arrow.test.ts +3 -3
  130. package/test/prepare.test.ts +39 -1
  131. package/test/typescript_decls.test.ts +1 -1
@@ -58,6 +58,9 @@ static LogicalType StructureToTypeObject(yyjson_val *obj, ClientContext &context
58
58
  child_types.emplace_back(key_str, StructureStringToType(val, context));
59
59
  }
60
60
  D_ASSERT(yyjson_obj_size(obj) == names.size());
61
+ if (child_types.empty()) {
62
+ throw InvalidInputException("Empty object in JSON structure");
63
+ }
61
64
  return LogicalType::STRUCT(child_types);
62
65
  }
63
66
 
@@ -87,7 +90,7 @@ static unique_ptr<FunctionData> JSONTransformBind(ClientContext &context, Scalar
87
90
  } else {
88
91
  auto structure_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
89
92
  if (!structure_val.DefaultTryCastAs(JSONCommon::JSONType())) {
90
- throw InvalidInputException("cannot cast JSON structure to string");
93
+ throw InvalidInputException("Cannot cast JSON structure to string");
91
94
  }
92
95
  auto structure_string = structure_val.GetValueUnsafe<string_t>();
93
96
  JSONAllocator json_allocator(Allocator::DefaultAllocator());
@@ -251,7 +254,10 @@ static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t cou
251
254
 
252
255
  bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
253
256
  Vector &string_vector, JSONTransformOptions &options) {
254
- auto data = (string_t *)FlatVector::GetData(string_vector);
257
+ if (count > STANDARD_VECTOR_SIZE) {
258
+ string_vector.Initialize(false, count);
259
+ }
260
+ auto data = FlatVector::GetData<string_t>(string_vector);
255
261
  auto &validity = FlatVector::Validity(string_vector);
256
262
  validity.SetAllValid(count);
257
263
 
@@ -380,12 +386,20 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
380
386
  size_t idx, max;
381
387
  yyjson_val *key, *val;
382
388
  for (idx_t i = 0; i < count; i++) {
383
- if (objects[i]) {
389
+ if (objects[i] && !unsafe_yyjson_is_null(objects[i])) {
390
+ if (!unsafe_yyjson_is_obj(objects[i]) && options.strict_cast) {
391
+ options.error_message =
392
+ StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(objects[i]),
393
+ JSONCommon::ValToString(objects[i], 50));
394
+ options.object_index = i;
395
+ success = false;
396
+ break;
397
+ }
384
398
  found_key_count = 0;
385
399
  memset(found_keys, false, column_count);
386
400
  yyjson_obj_foreach(objects[i], idx, max, key, val) {
387
- auto key_ptr = yyjson_get_str(key);
388
- auto key_len = yyjson_get_len(key);
401
+ auto key_ptr = unsafe_yyjson_get_str(key);
402
+ auto key_len = unsafe_yyjson_get_len(key);
389
403
  auto it = key_map.find({key_ptr, key_len});
390
404
  if (it != key_map.end()) {
391
405
  const auto &col_idx = it->second;
@@ -476,13 +490,24 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
476
490
  auto &list_validity = FlatVector::Validity(result);
477
491
  idx_t offset = 0;
478
492
  for (idx_t i = 0; i < count; i++) {
479
- if (!arrays[i] || yyjson_is_null(arrays[i])) {
493
+ if (!arrays[i] || unsafe_yyjson_is_null(arrays[i])) {
480
494
  list_validity.SetInvalid(i);
495
+ } else if (!unsafe_yyjson_is_arr(arrays[i])) {
496
+ if (options.strict_cast) {
497
+ options.error_message =
498
+ StringUtil::Format("Expected ARRAY, but got %s: %s", JSONCommon::ValTypeToString(arrays[i]),
499
+ JSONCommon::ValToString(arrays[i], 50));
500
+ options.object_index = i;
501
+ return false;
502
+ } else {
503
+ list_validity.SetInvalid(i);
504
+ }
505
+ } else {
506
+ auto &entry = list_entries[i];
507
+ entry.offset = offset;
508
+ entry.length = unsafe_yyjson_get_len(arrays[i]);
509
+ offset += entry.length;
481
510
  }
482
- auto &entry = list_entries[i];
483
- entry.offset = offset;
484
- entry.length = yyjson_arr_size(arrays[i]);
485
- offset += entry.length;
486
511
  }
487
512
  ListVector::SetListSize(result, offset);
488
513
  ListVector::Reserve(result, offset);
@@ -523,6 +548,21 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
523
548
  return success;
524
549
  }
525
550
 
551
+ bool TransformToJSON(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
552
+ auto data = (string_t *)FlatVector::GetData(result);
553
+ auto &validity = FlatVector::Validity(result);
554
+ for (idx_t i = 0; i < count; i++) {
555
+ const auto &val = vals[i];
556
+ if (!val) {
557
+ validity.SetInvalid(i);
558
+ } else {
559
+ data[i] = JSONCommon::WriteVal(val, alc);
560
+ }
561
+ }
562
+ // Can always transform to JSON
563
+ return true;
564
+ }
565
+
526
566
  bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
527
567
  JSONTransformOptions &options) {
528
568
  auto result_type = result.GetType();
@@ -531,6 +571,10 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
531
571
  return TransformFromStringWithFormat(vals, result, count, options);
532
572
  }
533
573
 
574
+ if (JSONCommon::LogicalTypeIsJSON(result_type)) {
575
+ return TransformToJSON(vals, alc, result, count);
576
+ }
577
+
534
578
  switch (result_type.id()) {
535
579
  case LogicalTypeId::SQLNULL:
536
580
  return true;
@@ -13,63 +13,88 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
13
13
  JSONScanLocalState lstate(context, gstate);
14
14
  ArenaAllocator allocator(BufferAllocator::Get(context));
15
15
 
16
- static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
17
- {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
18
- {LogicalTypeId::TIMESTAMP,
19
- {"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
20
- "%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
21
- };
22
-
23
- // Populate possible date/timestamp formats, assume this is consistent across columns
24
- for (auto &kv : FORMAT_TEMPLATES) {
25
- const auto &type = kv.first;
26
- if (bind_data.date_format_map.HasFormats(type)) {
27
- continue; // Already populated
28
- }
29
- const auto &format_strings = kv.second;
30
- for (auto &format_string : format_strings) {
31
- bind_data.date_format_map.AddFormat(type, format_string);
32
- }
33
- }
34
-
35
16
  // Read for the specified sample size
36
17
  JSONStructureNode node;
18
+ bool more_than_one = false;
37
19
  Vector string_vector(LogicalType::VARCHAR);
38
20
  idx_t remaining = bind_data.sample_size;
39
21
  while (remaining != 0) {
40
22
  allocator.Reset();
41
23
  auto read_count = lstate.ReadNext(gstate);
24
+ if (lstate.scan_count > 1) {
25
+ more_than_one = true;
26
+ }
42
27
  if (read_count == 0) {
43
28
  break;
44
29
  }
45
30
  idx_t next = MinValue<idx_t>(read_count, remaining);
31
+ yyjson_val **values;
32
+ if (bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
33
+ bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
34
+ values = lstate.array_values;
35
+ } else {
36
+ values = lstate.values;
37
+ }
46
38
  for (idx_t i = 0; i < next; i++) {
47
- if (lstate.objects[i]) {
48
- JSONStructure::ExtractStructure(lstate.objects[i], node);
39
+ if (values[i]) {
40
+ JSONStructure::ExtractStructure(values[i], node);
49
41
  }
50
42
  }
51
43
  if (!node.ContainsVarchar()) { // Can't refine non-VARCHAR types
52
44
  continue;
53
45
  }
54
46
  node.InitializeCandidateTypes(bind_data.max_depth);
55
- node.RefineCandidateTypes(lstate.objects, next, string_vector, allocator, bind_data.date_format_map);
47
+ node.RefineCandidateTypes(values, next, string_vector, allocator, bind_data.date_format_map);
56
48
  remaining -= next;
49
+
50
+ if (gstate.file_index == 10) {
51
+ // We really shouldn't open more than 10 files when sampling
52
+ break;
53
+ }
57
54
  }
58
55
  bind_data.type = original_scan_type;
59
- bind_data.transform_options.date_format_map = &bind_data.date_format_map;
60
56
 
61
- const auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
62
- if (type.id() != LogicalTypeId::STRUCT) {
63
- return_types.emplace_back(type);
64
- names.emplace_back("json");
65
- bind_data.objects = false;
66
- } else {
67
- const auto &child_types = StructType::GetChildTypes(type);
68
- return_types.reserve(child_types.size());
69
- names.reserve(child_types.size());
70
- for (auto &child_type : child_types) {
71
- return_types.emplace_back(child_type.second);
72
- names.emplace_back(child_type.first);
57
+ // Convert structure to logical type
58
+ auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
59
+
60
+ // Detect record type
61
+ if (bind_data.record_type == JSONRecordType::AUTO) {
62
+ switch (type.id()) {
63
+ case LogicalTypeId::STRUCT:
64
+ bind_data.record_type = JSONRecordType::RECORDS;
65
+ break;
66
+ case LogicalTypeId::LIST: {
67
+ if (more_than_one) {
68
+ bind_data.record_type = JSONRecordType::JSON;
69
+ } else {
70
+ type = ListType::GetChildType(type);
71
+ if (type.id() == LogicalTypeId::STRUCT) {
72
+ bind_data.record_type = JSONRecordType::ARRAY_OF_RECORDS;
73
+ } else {
74
+ bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
75
+ }
76
+ }
77
+ break;
78
+ }
79
+ default:
80
+ bind_data.record_type = JSONRecordType::JSON;
81
+ }
82
+ }
83
+
84
+ // Detect return type
85
+ if (bind_data.auto_detect) {
86
+ bind_data.transform_options.date_format_map = &bind_data.date_format_map;
87
+ if (type.id() != LogicalTypeId::STRUCT) {
88
+ return_types.emplace_back(type);
89
+ names.emplace_back("json");
90
+ } else {
91
+ const auto &child_types = StructType::GetChildTypes(type);
92
+ return_types.reserve(child_types.size());
93
+ names.reserve(child_types.size());
94
+ for (auto &child_type : child_types) {
95
+ return_types.emplace_back(child_type.second);
96
+ names.emplace_back(child_type.first);
97
+ }
73
98
  }
74
99
  }
75
100
 
@@ -150,6 +175,22 @@ void JSONScan::InitializeBindData(ClientContext &context, JSONScanData &bind_dat
150
175
  if (!error.empty()) {
151
176
  throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
152
177
  }
178
+ } else if (loption == "json_format") {
179
+ auto arg = StringValue::Get(kv.second);
180
+ if (arg == "records") {
181
+ bind_data.record_type = JSONRecordType::RECORDS;
182
+ } else if (arg == "array_of_records") {
183
+ bind_data.record_type = JSONRecordType::ARRAY_OF_RECORDS;
184
+ } else if (arg == "values") {
185
+ bind_data.record_type = JSONRecordType::JSON;
186
+ } else if (arg == "array_of_values") {
187
+ bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
188
+ } else if (arg == "auto") {
189
+ bind_data.record_type = JSONRecordType::AUTO;
190
+ } else {
191
+ throw InvalidInputException("\"json_format\" must be one of ['records', 'array_of_records', 'json', "
192
+ "'array_of_json', 'auto']");
193
+ }
153
194
  }
154
195
  }
155
196
  }
@@ -170,7 +211,7 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
170
211
 
171
212
  bind_data.InitializeFormats();
172
213
 
173
- if (bind_data.auto_detect) {
214
+ if (bind_data.auto_detect || bind_data.record_type == JSONRecordType::AUTO) {
174
215
  JSONScan::AutoDetect(context, bind_data, return_types, names);
175
216
  bind_data.names = names;
176
217
  }
@@ -189,9 +230,16 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
189
230
  auto &gstate = ((JSONGlobalTableFunctionState &)*data_p.global_state).state;
190
231
  auto &lstate = ((JSONLocalTableFunctionState &)*data_p.local_state).state;
191
232
 
192
- // Fetch next lines
193
233
  const auto count = lstate.ReadNext(gstate);
194
- const auto objects = lstate.objects;
234
+ yyjson_val **values;
235
+ if (gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
236
+ gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
237
+ values = lstate.array_values;
238
+ } else {
239
+ D_ASSERT(gstate.bind_data.record_type != JSONRecordType::AUTO);
240
+ values = lstate.values;
241
+ }
242
+ output.SetCardinality(count);
195
243
 
196
244
  vector<Vector *> result_vectors;
197
245
  result_vectors.reserve(output.ColumnCount());
@@ -202,22 +250,23 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
202
250
 
203
251
  // Pass current reader to transform options so we can get line number information if an error occurs
204
252
  bool success;
205
- if (gstate.bind_data.objects) {
206
- success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
253
+ if (gstate.bind_data.record_type == JSONRecordType::RECORDS ||
254
+ gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS) {
255
+ success = JSONTransform::TransformObject(values, lstate.GetAllocator(), count, gstate.bind_data.names,
207
256
  result_vectors, lstate.transform_options);
208
257
  } else {
209
- success = JSONTransform::Transform(objects, lstate.GetAllocator(), *result_vectors[0], count,
258
+ success = JSONTransform::Transform(values, lstate.GetAllocator(), *result_vectors[0], count,
210
259
  lstate.transform_options);
211
260
  }
261
+
212
262
  if (!success) {
213
263
  string hint = gstate.bind_data.auto_detect
214
264
  ? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns' manually, "
215
- "or setting 'ignore_errors' to true."
216
- : "";
217
- lstate.ThrowTransformError(count, lstate.transform_options.object_index,
265
+ "specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true."
266
+ : "\n Try specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true.";
267
+ lstate.ThrowTransformError(lstate.transform_options.object_index,
218
268
  lstate.transform_options.error_message + hint);
219
269
  }
220
- output.SetCardinality(count);
221
270
  }
222
271
 
223
272
  TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, shared_ptr<JSONScanInfo> function_info) {
@@ -233,8 +282,10 @@ TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, share
233
282
  table_function.named_parameters["date_format"] = LogicalType::VARCHAR;
234
283
  table_function.named_parameters["timestampformat"] = LogicalType::VARCHAR;
235
284
  table_function.named_parameters["timestamp_format"] = LogicalType::VARCHAR;
285
+ table_function.named_parameters["json_format"] = LogicalType::VARCHAR;
236
286
 
237
287
  table_function.projection_pushdown = true;
288
+ // TODO: might be able to do filter pushdown/prune too
238
289
 
239
290
  table_function.function_info = std::move(function_info);
240
291
 
@@ -249,7 +300,8 @@ TableFunction GetReadJSONAutoTableFunction(bool list_parameter, shared_ptr<JSONS
249
300
 
250
301
  CreateTableFunctionInfo JSONFunctions::GetReadJSONFunction() {
251
302
  TableFunctionSet function_set("read_json");
252
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::UNSTRUCTURED, false);
303
+ auto function_info =
304
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::UNSTRUCTURED, JSONRecordType::RECORDS, false);
253
305
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(false, function_info));
254
306
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(true, function_info));
255
307
  return CreateTableFunctionInfo(function_set);
@@ -257,7 +309,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONFunction() {
257
309
 
258
310
  CreateTableFunctionInfo JSONFunctions::GetReadNDJSONFunction() {
259
311
  TableFunctionSet function_set("read_ndjson");
260
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, false);
312
+ auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED,
313
+ JSONRecordType::RECORDS, false);
261
314
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(false, function_info));
262
315
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(true, function_info));
263
316
  return CreateTableFunctionInfo(function_set);
@@ -265,7 +318,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadNDJSONFunction() {
265
318
 
266
319
  CreateTableFunctionInfo JSONFunctions::GetReadJSONAutoFunction() {
267
320
  TableFunctionSet function_set("read_json_auto");
268
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, true);
321
+ auto function_info =
322
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::AUTO, true);
269
323
  function_set.AddFunction(GetReadJSONAutoTableFunction(false, function_info));
270
324
  function_set.AddFunction(GetReadJSONAutoTableFunction(true, function_info));
271
325
  return CreateTableFunctionInfo(function_set);
@@ -273,7 +327,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONAutoFunction() {
273
327
 
274
328
  CreateTableFunctionInfo JSONFunctions::GetReadNDJSONAutoFunction() {
275
329
  TableFunctionSet function_set("read_ndjson_auto");
276
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, true);
330
+ auto function_info =
331
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, JSONRecordType::AUTO, true);
277
332
  function_set.AddFunction(GetReadJSONAutoTableFunction(false, function_info));
278
333
  function_set.AddFunction(GetReadJSONAutoTableFunction(true, function_info));
279
334
  return CreateTableFunctionInfo(function_set);
@@ -20,7 +20,7 @@ static void ReadJSONObjectsFunction(ClientContext &context, TableFunctionInput &
20
20
  // Fetch next lines
21
21
  const auto count = lstate.ReadNext(gstate);
22
22
  const auto lines = lstate.lines;
23
- const auto objects = lstate.objects;
23
+ const auto objects = lstate.values;
24
24
 
25
25
  // Create the strings without copying them
26
26
  auto strings = FlatVector::GetData<string_t>(output.data[0]);
@@ -48,7 +48,8 @@ TableFunction GetReadJSONObjectsTableFunction(bool list_parameter, shared_ptr<JS
48
48
 
49
49
  CreateTableFunctionInfo JSONFunctions::GetReadJSONObjectsFunction() {
50
50
  TableFunctionSet function_set("read_json_objects");
51
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::UNSTRUCTURED);
51
+ auto function_info =
52
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::UNSTRUCTURED, JSONRecordType::JSON);
52
53
  function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
53
54
  function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
54
55
  return CreateTableFunctionInfo(function_set);
@@ -56,7 +57,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONObjectsFunction() {
56
57
 
57
58
  CreateTableFunctionInfo JSONFunctions::GetReadNDJSONObjectsFunction() {
58
59
  TableFunctionSet function_set("read_ndjson_objects");
59
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::NEWLINE_DELIMITED);
60
+ auto function_info =
61
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::NEWLINE_DELIMITED, JSONRecordType::JSON);
60
62
  function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
61
63
  function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
62
64
  return CreateTableFunctionInfo(function_set);
@@ -166,7 +166,14 @@ vector<CreateTableFunctionInfo> JSONFunctions::GetTableFunctions() {
166
166
  unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context, const string &table_name,
167
167
  ReplacementScanData *data) {
168
168
  auto lower_name = StringUtil::Lower(table_name);
169
+ // remove any compression
170
+ if (StringUtil::EndsWith(lower_name, ".gz")) {
171
+ lower_name = lower_name.substr(0, lower_name.size() - 3);
172
+ } else if (StringUtil::EndsWith(lower_name, ".zst")) {
173
+ lower_name = lower_name.substr(0, lower_name.size() - 4);
174
+ }
169
175
  if (!StringUtil::EndsWith(lower_name, ".json") && !StringUtil::Contains(lower_name, ".json?") &&
176
+ !StringUtil::EndsWith(lower_name, ".jsonl") && !StringUtil::Contains(lower_name, ".jsonl?") &&
170
177
  !StringUtil::EndsWith(lower_name, ".ndjson") && !StringUtil::Contains(lower_name, ".ndjson?")) {
171
178
  return nullptr;
172
179
  }