duckdb 0.7.1-dev2.0 → 0.7.1-dev284.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
  5. package/src/duckdb/extension/json/include/json_common.hpp +2 -2
  6. package/src/duckdb/extension/json/include/json_scan.hpp +29 -10
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
  9. package/src/duckdb/extension/json/json_functions/json_transform.cpp +47 -8
  10. package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
  11. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
  12. package/src/duckdb/extension/json/json_functions.cpp +6 -0
  13. package/src/duckdb/extension/json/json_scan.cpp +144 -34
  14. package/src/duckdb/extension/parquet/parquet-extension.cpp +3 -2
  15. package/src/duckdb/src/catalog/catalog.cpp +15 -0
  16. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
  17. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  18. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  19. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  20. package/src/duckdb/src/common/file_system.cpp +14 -0
  21. package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
  22. package/src/duckdb/src/common/operator/cast_operators.cpp +14 -8
  23. package/src/duckdb/src/common/printer.cpp +1 -1
  24. package/src/duckdb/src/common/types/time.cpp +1 -1
  25. package/src/duckdb/src/common/types/timestamp.cpp +35 -4
  26. package/src/duckdb/src/common/types.cpp +36 -10
  27. package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
  28. package/src/duckdb/src/execution/index/art/art.cpp +117 -67
  29. package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
  30. package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
  31. package/src/duckdb/src/execution/index/art/node.cpp +13 -27
  32. package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
  33. package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
  34. package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
  35. package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
  36. package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
  37. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
  38. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +6 -11
  39. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +13 -13
  40. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +1 -1
  41. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +37 -0
  42. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -5
  43. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +4 -0
  44. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -0
  45. package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
  46. package/src/duckdb/src/function/table/read_csv.cpp +17 -5
  47. package/src/duckdb/src/function/table/table_scan.cpp +3 -0
  48. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  49. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +5 -1
  50. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  53. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  54. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +3 -2
  55. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
  56. package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
  57. package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
  58. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
  59. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
  60. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +5 -1
  61. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
  62. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
  63. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -3
  64. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -2
  65. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  66. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +32 -0
  67. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
  68. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -3
  69. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -4
  70. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +32 -0
  71. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
  73. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/parser/statement/detach_statement.hpp +29 -0
  75. package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -0
  76. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
  77. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  79. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
  80. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
  81. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
  82. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
  83. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
  84. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
  85. package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
  86. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +7 -0
  87. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -0
  88. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
  89. package/src/duckdb/src/main/client_context.cpp +2 -0
  90. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
  91. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
  92. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
  93. package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
  94. package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
  95. package/src/duckdb/src/parser/statement/detach_statement.cpp +15 -0
  96. package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
  97. package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
  98. package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
  99. package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -1
  100. package/src/duckdb/src/parser/transform/statement/transform_detach.cpp +19 -0
  101. package/src/duckdb/src/parser/transformer.cpp +2 -0
  102. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
  103. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +6 -3
  104. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +16 -14
  105. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
  106. package/src/duckdb/src/planner/binder/statement/bind_detach.cpp +19 -0
  107. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +29 -4
  108. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
  109. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +2 -1
  110. package/src/duckdb/src/planner/binder.cpp +2 -0
  111. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
  112. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +21 -5
  113. package/src/duckdb/src/planner/logical_operator.cpp +4 -0
  114. package/src/duckdb/src/planner/planner.cpp +1 -0
  115. package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
  116. package/src/duckdb/src/storage/data_table.cpp +66 -3
  117. package/src/duckdb/src/storage/index.cpp +1 -1
  118. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  119. package/src/duckdb/src/storage/storage_info.cpp +2 -1
  120. package/src/duckdb/src/storage/table/column_data.cpp +4 -2
  121. package/src/duckdb/src/storage/table/update_segment.cpp +15 -0
  122. package/src/duckdb/src/storage/table_index_list.cpp +1 -2
  123. package/src/duckdb/src/storage/wal_replay.cpp +68 -0
  124. package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
  125. package/src/duckdb/src/transaction/commit_state.cpp +5 -2
  126. package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
  127. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
  128. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +14 -0
  129. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +530 -1006
  130. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17659 -17626
  131. package/src/duckdb/third_party/thrift/thrift/Thrift.h +8 -2
  132. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  133. package/src/duckdb/ub_src_execution_operator_schema.cpp +2 -0
  134. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  135. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  136. package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -0
  137. package/src/statement.cpp +46 -12
  138. package/test/prepare.test.ts +39 -1
  139. package/test/typescript_decls.test.ts +1 -1
  140. package/src/duckdb/src/include/duckdb/function/create_database_extension.hpp +0 -37
@@ -13,63 +13,88 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
13
13
  JSONScanLocalState lstate(context, gstate);
14
14
  ArenaAllocator allocator(BufferAllocator::Get(context));
15
15
 
16
- static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
17
- {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
18
- {LogicalTypeId::TIMESTAMP,
19
- {"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
20
- "%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S"}},
21
- };
22
-
23
- // Populate possible date/timestamp formats, assume this is consistent across columns
24
- for (auto &kv : FORMAT_TEMPLATES) {
25
- const auto &type = kv.first;
26
- if (bind_data.date_format_map.HasFormats(type)) {
27
- continue; // Already populated
28
- }
29
- const auto &format_strings = kv.second;
30
- for (auto &format_string : format_strings) {
31
- bind_data.date_format_map.AddFormat(type, format_string);
32
- }
33
- }
34
-
35
16
  // Read for the specified sample size
36
17
  JSONStructureNode node;
18
+ bool more_than_one = false;
37
19
  Vector string_vector(LogicalType::VARCHAR);
38
20
  idx_t remaining = bind_data.sample_size;
39
21
  while (remaining != 0) {
40
22
  allocator.Reset();
41
23
  auto read_count = lstate.ReadNext(gstate);
24
+ if (lstate.scan_count > 1) {
25
+ more_than_one = true;
26
+ }
42
27
  if (read_count == 0) {
43
28
  break;
44
29
  }
45
30
  idx_t next = MinValue<idx_t>(read_count, remaining);
31
+ yyjson_val **values;
32
+ if (bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
33
+ bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
34
+ values = lstate.array_values;
35
+ } else {
36
+ values = lstate.values;
37
+ }
46
38
  for (idx_t i = 0; i < next; i++) {
47
- if (lstate.objects[i]) {
48
- JSONStructure::ExtractStructure(lstate.objects[i], node);
39
+ if (values[i]) {
40
+ JSONStructure::ExtractStructure(values[i], node);
49
41
  }
50
42
  }
51
43
  if (!node.ContainsVarchar()) { // Can't refine non-VARCHAR types
52
44
  continue;
53
45
  }
54
46
  node.InitializeCandidateTypes(bind_data.max_depth);
55
- node.RefineCandidateTypes(lstate.objects, next, string_vector, allocator, bind_data.date_format_map);
47
+ node.RefineCandidateTypes(values, next, string_vector, allocator, bind_data.date_format_map);
56
48
  remaining -= next;
49
+
50
+ if (gstate.file_index == 10) {
51
+ // We really shouldn't open more than 10 files when sampling
52
+ break;
53
+ }
57
54
  }
58
55
  bind_data.type = original_scan_type;
59
- bind_data.transform_options.date_format_map = &bind_data.date_format_map;
60
56
 
61
- const auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
62
- if (type.id() != LogicalTypeId::STRUCT) {
63
- return_types.emplace_back(type);
64
- names.emplace_back("json");
65
- bind_data.objects = false;
66
- } else {
67
- const auto &child_types = StructType::GetChildTypes(type);
68
- return_types.reserve(child_types.size());
69
- names.reserve(child_types.size());
70
- for (auto &child_type : child_types) {
71
- return_types.emplace_back(child_type.second);
72
- names.emplace_back(child_type.first);
57
+ // Convert structure to logical type
58
+ auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
59
+
60
+ // Detect record type
61
+ if (bind_data.record_type == JSONRecordType::AUTO) {
62
+ switch (type.id()) {
63
+ case LogicalTypeId::STRUCT:
64
+ bind_data.record_type = JSONRecordType::RECORDS;
65
+ break;
66
+ case LogicalTypeId::LIST: {
67
+ if (more_than_one) {
68
+ bind_data.record_type = JSONRecordType::JSON;
69
+ } else {
70
+ type = ListType::GetChildType(type);
71
+ if (type.id() == LogicalTypeId::STRUCT) {
72
+ bind_data.record_type = JSONRecordType::ARRAY_OF_RECORDS;
73
+ } else {
74
+ bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
75
+ }
76
+ }
77
+ break;
78
+ }
79
+ default:
80
+ bind_data.record_type = JSONRecordType::JSON;
81
+ }
82
+ }
83
+
84
+ // Detect return type
85
+ if (bind_data.auto_detect) {
86
+ bind_data.transform_options.date_format_map = &bind_data.date_format_map;
87
+ if (type.id() != LogicalTypeId::STRUCT) {
88
+ return_types.emplace_back(type);
89
+ names.emplace_back("json");
90
+ } else {
91
+ const auto &child_types = StructType::GetChildTypes(type);
92
+ return_types.reserve(child_types.size());
93
+ names.reserve(child_types.size());
94
+ for (auto &child_type : child_types) {
95
+ return_types.emplace_back(child_type.second);
96
+ names.emplace_back(child_type.first);
97
+ }
73
98
  }
74
99
  }
75
100
 
@@ -150,6 +175,22 @@ void JSONScan::InitializeBindData(ClientContext &context, JSONScanData &bind_dat
150
175
  if (!error.empty()) {
151
176
  throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
152
177
  }
178
+ } else if (loption == "json_format") {
179
+ auto arg = StringValue::Get(kv.second);
180
+ if (arg == "records") {
181
+ bind_data.record_type = JSONRecordType::RECORDS;
182
+ } else if (arg == "array_of_records") {
183
+ bind_data.record_type = JSONRecordType::ARRAY_OF_RECORDS;
184
+ } else if (arg == "values") {
185
+ bind_data.record_type = JSONRecordType::JSON;
186
+ } else if (arg == "array_of_values") {
187
+ bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
188
+ } else if (arg == "auto") {
189
+ bind_data.record_type = JSONRecordType::AUTO;
190
+ } else {
191
+ throw InvalidInputException("\"json_format\" must be one of ['records', 'array_of_records', 'json', "
192
+ "'array_of_json', 'auto']");
193
+ }
153
194
  }
154
195
  }
155
196
  }
@@ -170,7 +211,7 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
170
211
 
171
212
  bind_data.InitializeFormats();
172
213
 
173
- if (bind_data.auto_detect) {
214
+ if (bind_data.auto_detect || bind_data.record_type == JSONRecordType::AUTO) {
174
215
  JSONScan::AutoDetect(context, bind_data, return_types, names);
175
216
  bind_data.names = names;
176
217
  }
@@ -189,9 +230,16 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
189
230
  auto &gstate = ((JSONGlobalTableFunctionState &)*data_p.global_state).state;
190
231
  auto &lstate = ((JSONLocalTableFunctionState &)*data_p.local_state).state;
191
232
 
192
- // Fetch next lines
193
233
  const auto count = lstate.ReadNext(gstate);
194
- const auto objects = lstate.objects;
234
+ yyjson_val **values;
235
+ if (gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
236
+ gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
237
+ values = lstate.array_values;
238
+ } else {
239
+ D_ASSERT(gstate.bind_data.record_type != JSONRecordType::AUTO);
240
+ values = lstate.values;
241
+ }
242
+ output.SetCardinality(count);
195
243
 
196
244
  vector<Vector *> result_vectors;
197
245
  result_vectors.reserve(output.ColumnCount());
@@ -202,22 +250,23 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
202
250
 
203
251
  // Pass current reader to transform options so we can get line number information if an error occurs
204
252
  bool success;
205
- if (gstate.bind_data.objects) {
206
- success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
253
+ if (gstate.bind_data.record_type == JSONRecordType::RECORDS ||
254
+ gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS) {
255
+ success = JSONTransform::TransformObject(values, lstate.GetAllocator(), count, gstate.bind_data.names,
207
256
  result_vectors, lstate.transform_options);
208
257
  } else {
209
- success = JSONTransform::Transform(objects, lstate.GetAllocator(), *result_vectors[0], count,
258
+ success = JSONTransform::Transform(values, lstate.GetAllocator(), *result_vectors[0], count,
210
259
  lstate.transform_options);
211
260
  }
261
+
212
262
  if (!success) {
213
263
  string hint = gstate.bind_data.auto_detect
214
264
  ? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns' manually, "
215
- "or setting 'ignore_errors' to true."
216
- : "";
217
- lstate.ThrowTransformError(count, lstate.transform_options.object_index,
265
+ "specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true."
266
+ : "\n Try specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true.";
267
+ lstate.ThrowTransformError(lstate.transform_options.object_index,
218
268
  lstate.transform_options.error_message + hint);
219
269
  }
220
- output.SetCardinality(count);
221
270
  }
222
271
 
223
272
  TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, shared_ptr<JSONScanInfo> function_info) {
@@ -233,8 +282,10 @@ TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, share
233
282
  table_function.named_parameters["date_format"] = LogicalType::VARCHAR;
234
283
  table_function.named_parameters["timestampformat"] = LogicalType::VARCHAR;
235
284
  table_function.named_parameters["timestamp_format"] = LogicalType::VARCHAR;
285
+ table_function.named_parameters["json_format"] = LogicalType::VARCHAR;
236
286
 
237
287
  table_function.projection_pushdown = true;
288
+ // TODO: might be able to do filter pushdown/prune too
238
289
 
239
290
  table_function.function_info = std::move(function_info);
240
291
 
@@ -249,7 +300,8 @@ TableFunction GetReadJSONAutoTableFunction(bool list_parameter, shared_ptr<JSONS
249
300
 
250
301
  CreateTableFunctionInfo JSONFunctions::GetReadJSONFunction() {
251
302
  TableFunctionSet function_set("read_json");
252
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::UNSTRUCTURED, false);
303
+ auto function_info =
304
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::UNSTRUCTURED, JSONRecordType::RECORDS, false);
253
305
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(false, function_info));
254
306
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(true, function_info));
255
307
  return CreateTableFunctionInfo(function_set);
@@ -257,7 +309,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONFunction() {
257
309
 
258
310
  CreateTableFunctionInfo JSONFunctions::GetReadNDJSONFunction() {
259
311
  TableFunctionSet function_set("read_ndjson");
260
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, false);
312
+ auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED,
313
+ JSONRecordType::RECORDS, false);
261
314
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(false, function_info));
262
315
  function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(true, function_info));
263
316
  return CreateTableFunctionInfo(function_set);
@@ -265,7 +318,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadNDJSONFunction() {
265
318
 
266
319
  CreateTableFunctionInfo JSONFunctions::GetReadJSONAutoFunction() {
267
320
  TableFunctionSet function_set("read_json_auto");
268
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, true);
321
+ auto function_info =
322
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::AUTO, true);
269
323
  function_set.AddFunction(GetReadJSONAutoTableFunction(false, function_info));
270
324
  function_set.AddFunction(GetReadJSONAutoTableFunction(true, function_info));
271
325
  return CreateTableFunctionInfo(function_set);
@@ -273,7 +327,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONAutoFunction() {
273
327
 
274
328
  CreateTableFunctionInfo JSONFunctions::GetReadNDJSONAutoFunction() {
275
329
  TableFunctionSet function_set("read_ndjson_auto");
276
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, true);
330
+ auto function_info =
331
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, JSONRecordType::AUTO, true);
277
332
  function_set.AddFunction(GetReadJSONAutoTableFunction(false, function_info));
278
333
  function_set.AddFunction(GetReadJSONAutoTableFunction(true, function_info));
279
334
  return CreateTableFunctionInfo(function_set);
@@ -20,7 +20,7 @@ static void ReadJSONObjectsFunction(ClientContext &context, TableFunctionInput &
20
20
  // Fetch next lines
21
21
  const auto count = lstate.ReadNext(gstate);
22
22
  const auto lines = lstate.lines;
23
- const auto objects = lstate.objects;
23
+ const auto objects = lstate.values;
24
24
 
25
25
  // Create the strings without copying them
26
26
  auto strings = FlatVector::GetData<string_t>(output.data[0]);
@@ -48,7 +48,8 @@ TableFunction GetReadJSONObjectsTableFunction(bool list_parameter, shared_ptr<JS
48
48
 
49
49
  CreateTableFunctionInfo JSONFunctions::GetReadJSONObjectsFunction() {
50
50
  TableFunctionSet function_set("read_json_objects");
51
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::UNSTRUCTURED);
51
+ auto function_info =
52
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::UNSTRUCTURED, JSONRecordType::JSON);
52
53
  function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
53
54
  function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
54
55
  return CreateTableFunctionInfo(function_set);
@@ -56,7 +57,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONObjectsFunction() {
56
57
 
57
58
  CreateTableFunctionInfo JSONFunctions::GetReadNDJSONObjectsFunction() {
58
59
  TableFunctionSet function_set("read_ndjson_objects");
59
- auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::NEWLINE_DELIMITED);
60
+ auto function_info =
61
+ make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::NEWLINE_DELIMITED, JSONRecordType::JSON);
60
62
  function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
61
63
  function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
62
64
  return CreateTableFunctionInfo(function_set);
@@ -166,6 +166,12 @@ vector<CreateTableFunctionInfo> JSONFunctions::GetTableFunctions() {
166
166
  unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context, const string &table_name,
167
167
  ReplacementScanData *data) {
168
168
  auto lower_name = StringUtil::Lower(table_name);
169
+ // remove any compression
170
+ if (StringUtil::EndsWith(lower_name, ".gz")) {
171
+ lower_name = lower_name.substr(0, lower_name.size() - 3);
172
+ } else if (StringUtil::EndsWith(lower_name, ".zst")) {
173
+ lower_name = lower_name.substr(0, lower_name.size() - 4);
174
+ }
169
175
  if (!StringUtil::EndsWith(lower_name, ".json") && !StringUtil::Contains(lower_name, ".json?") &&
170
176
  !StringUtil::EndsWith(lower_name, ".ndjson") && !StringUtil::Contains(lower_name, ".ndjson?")) {
171
177
  return nullptr;
@@ -1,6 +1,7 @@
1
1
  #include "json_scan.hpp"
2
2
 
3
3
  #include "duckdb/main/database.hpp"
4
+ #include "duckdb/main/extension_helper.hpp"
4
5
  #include "duckdb/parallel/task_scheduler.hpp"
5
6
  #include "duckdb/storage/buffer_manager.hpp"
6
7
 
@@ -19,8 +20,9 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
19
20
  auto &options = result->options;
20
21
 
21
22
  auto &info = (JSONScanInfo &)*input.info;
22
- options.format = info.format;
23
23
  result->type = info.type;
24
+ options.format = info.format;
25
+ result->record_type = info.record_type;
24
26
  result->auto_detect = info.auto_detect;
25
27
 
26
28
  vector<string> patterns;
@@ -39,16 +41,16 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
39
41
  result->ignore_errors = BooleanValue::Get(kv.second);
40
42
  } else if (loption == "maximum_object_size") {
41
43
  result->maximum_object_size = MaxValue<idx_t>(UIntegerValue::Get(kv.second), result->maximum_object_size);
42
- } else if (loption == "format") {
44
+ } else if (loption == "lines") {
43
45
  auto format = StringUtil::Lower(StringValue::Get(kv.second));
44
46
  if (format == "auto") {
45
47
  options.format = JSONFormat::AUTO_DETECT;
46
- } else if (format == "unstructured") {
48
+ } else if (format == "false") {
47
49
  options.format = JSONFormat::UNSTRUCTURED;
48
- } else if (format == "newline_delimited") {
50
+ } else if (format == "true") {
49
51
  options.format = JSONFormat::NEWLINE_DELIMITED;
50
52
  } else {
51
- throw BinderException("format must be one of ['auto', 'unstructured', 'newline_delimited']");
53
+ throw BinderException("\"lines\" must be one of ['auto', 'true', 'false']");
52
54
  }
53
55
  } else if (loption == "compression") {
54
56
  auto compression = StringUtil::Lower(StringValue::Get(kv.second));
@@ -75,7 +77,7 @@ void JSONScanData::InitializeFilePaths(ClientContext &context, const vector<stri
75
77
  for (auto &file_pattern : patterns) {
76
78
  auto found_files = fs.Glob(file_pattern, context);
77
79
  if (found_files.empty()) {
78
- throw IOException("No files found that match the pattern \"%s\"", file_pattern);
80
+ throw FileSystem::MissingFileException(file_pattern, context);
79
81
  }
80
82
  file_paths.insert(file_paths.end(), found_files.begin(), found_files.end());
81
83
  }
@@ -97,6 +99,27 @@ void JSONScanData::InitializeFormats() {
97
99
  if (!timestamp_format.empty()) {
98
100
  date_format_map.AddFormat(LogicalTypeId::TIMESTAMP, timestamp_format);
99
101
  }
102
+
103
+ if (auto_detect) {
104
+ static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
105
+ {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
106
+ {LogicalTypeId::TIMESTAMP,
107
+ {"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
108
+ "%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
109
+ };
110
+
111
+ // Populate possible date/timestamp formats, assume this is consistent across columns
112
+ for (auto &kv : FORMAT_TEMPLATES) {
113
+ const auto &type = kv.first;
114
+ if (date_format_map.HasFormats(type)) {
115
+ continue; // Already populated
116
+ }
117
+ const auto &format_strings = kv.second;
118
+ for (auto &format_string : format_strings) {
119
+ date_format_map.AddFormat(type, format_string);
120
+ }
121
+ }
122
+ }
100
123
  }
101
124
 
102
125
  void JSONScanData::Serialize(FieldWriter &writer) {
@@ -111,9 +134,17 @@ void JSONScanData::Serialize(FieldWriter &writer) {
111
134
  writer.WriteList<string>(names);
112
135
  writer.WriteList<idx_t>(valid_cols);
113
136
  writer.WriteField<idx_t>(max_depth);
114
- writer.WriteField<bool>(objects);
115
- writer.WriteString(date_format);
116
- writer.WriteString(timestamp_format);
137
+ writer.WriteField<JSONRecordType>(record_type);
138
+ if (!date_format.empty()) {
139
+ writer.WriteString(date_format);
140
+ } else {
141
+ writer.WriteString(date_format_map.GetFormat(LogicalTypeId::DATE).format_specifier);
142
+ }
143
+ if (!timestamp_format.empty()) {
144
+ writer.WriteString(timestamp_format);
145
+ } else {
146
+ writer.WriteString(date_format_map.GetFormat(LogicalTypeId::TIMESTAMP).format_specifier);
147
+ }
117
148
  }
118
149
 
119
150
  void JSONScanData::Deserialize(FieldReader &reader) {
@@ -128,9 +159,12 @@ void JSONScanData::Deserialize(FieldReader &reader) {
128
159
  names = reader.ReadRequiredList<string>();
129
160
  valid_cols = reader.ReadRequiredList<idx_t>();
130
161
  max_depth = reader.ReadRequired<idx_t>();
131
- objects = reader.ReadRequired<bool>();
162
+ record_type = reader.ReadRequired<JSONRecordType>();
132
163
  date_format = reader.ReadRequired<string>();
133
164
  timestamp_format = reader.ReadRequired<string>();
165
+
166
+ InitializeFormats();
167
+ transform_options.date_format_map = &date_format_map;
134
168
  }
135
169
 
136
170
  JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &bind_data_p)
@@ -149,11 +183,11 @@ JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &b
149
183
  }
150
184
 
151
185
  JSONScanLocalState::JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate)
152
- : batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
186
+ : scan_count(0), array_idx(0), array_offset(0), batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
153
187
  json_allocator(BufferAllocator::Get(context)), current_reader(nullptr), current_buffer_handle(nullptr),
154
- buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
188
+ is_last(false), buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
155
189
 
156
- // Buffer to reconstruct JSON objects when they cross a buffer boundary
190
+ // Buffer to reconstruct JSON values when they cross a buffer boundary
157
191
  reconstruct_buffer = gstate.allocator.Allocate(gstate.bind_data.maximum_object_size + YYJSON_PADDING_SIZE);
158
192
 
159
193
  // This is needed for JSONFormat::UNSTRUCTURED, to make use of YYJSON_READ_INSITU
@@ -173,11 +207,6 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
173
207
  // Perform projection pushdown
174
208
  if (bind_data.type == JSONScanType::READ_JSON) {
175
209
  D_ASSERT(input.column_ids.size() <= bind_data.names.size()); // Can't project to have more columns
176
- if (bind_data.auto_detect && input.column_ids.size() < bind_data.names.size()) {
177
- // If we are auto-detecting, but don't need all columns present in the file,
178
- // then we don't need to throw an error if we encounter an unseen column
179
- bind_data.transform_options.error_unknown_key = false;
180
- }
181
210
  vector<string> names;
182
211
  names.reserve(input.column_ids.size());
183
212
  for (idx_t i = 0; i < input.column_ids.size(); i++) {
@@ -188,13 +217,37 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
188
217
  names.push_back(std::move(bind_data.names[id]));
189
218
  bind_data.valid_cols.push_back(i);
190
219
  }
220
+ if (names.size() < bind_data.names.size()) {
221
+ // If we are auto-detecting, but don't need all columns present in the file,
222
+ // then we don't need to throw an error if we encounter an unseen column
223
+ bind_data.transform_options.error_unknown_key = false;
224
+ }
191
225
  bind_data.names = std::move(names);
192
226
  }
193
227
  return result;
194
228
  }
195
229
 
196
230
  idx_t JSONGlobalTableFunctionState::MaxThreads() const {
197
- return state.system_threads;
231
+ auto &bind_data = state.bind_data;
232
+
233
+ auto num_files = bind_data.file_paths.size();
234
+ idx_t readers_per_file;
235
+ if (bind_data.options.format == JSONFormat::UNSTRUCTURED) {
236
+ // Unstructured necessitates single thread
237
+ readers_per_file = 1;
238
+ } else if (!state.json_readers.empty() && state.json_readers[0]->IsOpen()) {
239
+ auto &reader = *state.json_readers[0];
240
+ const auto &options = reader.GetOptions();
241
+ if (options.format == JSONFormat::UNSTRUCTURED || options.compression != FileCompressionType::UNCOMPRESSED) {
242
+ // Auto-detected unstructured - same story, compression also really limits parallelism
243
+ readers_per_file = 1;
244
+ } else {
245
+ return state.system_threads;
246
+ }
247
+ } else {
248
+ return state.system_threads;
249
+ }
250
+ return num_files * readers_per_file;
198
251
  }
199
252
 
200
253
  JSONLocalTableFunctionState::JSONLocalTableFunctionState(ClientContext &context, JSONScanGlobalState &gstate)
@@ -230,6 +283,12 @@ static inline void SkipWhitespace(const char *buffer_ptr, idx_t &buffer_offset,
230
283
  idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
231
284
  json_allocator.Reset();
232
285
 
286
+ if ((gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
287
+ gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) &&
288
+ array_idx < scan_count) {
289
+ return GetObjectsFromArray(gstate);
290
+ }
291
+
233
292
  idx_t count = 0;
234
293
  if (buffer_offset == buffer_size) {
235
294
  if (!ReadNextBuffer(gstate)) {
@@ -253,10 +312,18 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
253
312
  default:
254
313
  throw InternalException("Unknown JSON format");
255
314
  }
315
+ scan_count = count;
256
316
 
257
317
  // Skip over any remaining whitespace for the next scan
258
318
  SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
259
319
 
320
+ if (gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
321
+ gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
322
+ array_idx = 0;
323
+ array_offset = 0;
324
+ return GetObjectsFromArray(gstate);
325
+ }
326
+
260
327
  return count;
261
328
  }
262
329
 
@@ -331,10 +398,48 @@ yyjson_val *JSONScanLocalState::ParseLine(char *line_start, idx_t line_size, idx
331
398
  }
332
399
  }
333
400
 
401
+ idx_t JSONScanLocalState::GetObjectsFromArray(JSONScanGlobalState &gstate) {
402
+ idx_t arr_count = 0;
403
+
404
+ size_t idx, max;
405
+ yyjson_val *val;
406
+ for (; array_idx < scan_count; array_idx++, array_offset = 0) {
407
+ auto &value = values[array_idx];
408
+ if (!value) {
409
+ continue;
410
+ }
411
+ if (unsafe_yyjson_is_arr(value)) {
412
+ yyjson_arr_foreach(value, idx, max, val) {
413
+ if (idx < array_offset) {
414
+ continue;
415
+ }
416
+ array_values[arr_count++] = val;
417
+ if (arr_count == STANDARD_VECTOR_SIZE) {
418
+ break;
419
+ }
420
+ }
421
+ array_offset = idx + 1;
422
+ if (arr_count == STANDARD_VECTOR_SIZE) {
423
+ break;
424
+ }
425
+ } else if (!gstate.bind_data.ignore_errors) {
426
+ ThrowTransformError(
427
+ array_idx,
428
+ StringUtil::Format("Expected JSON ARRAY but got %s: %s\nTry setting json_format to 'records'",
429
+ JSONCommon::ValTypeToString(value), JSONCommon::ValToString(value, 50)));
430
+ }
431
+ }
432
+ return arr_count;
433
+ }
434
+
334
435
  bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
335
436
  if (current_reader) {
336
437
  D_ASSERT(current_buffer_handle);
337
438
  current_reader->SetBufferLineOrObjectCount(current_buffer_handle->buffer_index, lines_or_objects_in_buffer);
439
+ if (is_last && gstate.bind_data.type != JSONScanType::SAMPLE) {
440
+ // Close files that are done if we're not sampling
441
+ current_reader->CloseJSONFile();
442
+ }
338
443
  }
339
444
 
340
445
  AllocatedData buffer;
@@ -395,7 +500,9 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
395
500
  // Unopened file
396
501
  current_reader->OpenJSONFile();
397
502
  batch_index = gstate.batch_index++;
398
- if (options.format == JSONFormat::UNSTRUCTURED) {
503
+ if (options.format == JSONFormat::UNSTRUCTURED || (options.format == JSONFormat::NEWLINE_DELIMITED &&
504
+ options.compression != FileCompressionType::UNCOMPRESSED &&
505
+ gstate.file_index < gstate.json_readers.size())) {
399
506
  gstate.file_index++; // UNSTRUCTURED necessitates single-threaded read
400
507
  }
401
508
  if (options.format != JSONFormat::AUTO_DETECT) {
@@ -449,9 +556,6 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
449
556
  auto json_buffer_handle = make_unique<JSONBufferHandle>(buffer_index, readers, std::move(buffer), buffer_size);
450
557
  current_buffer_handle = json_buffer_handle.get();
451
558
  current_reader->InsertBuffer(buffer_index, std::move(json_buffer_handle));
452
- if (!current_reader->GetFileHandle().PlainFileSource() && gstate.bind_data.type == JSONScanType::SAMPLE) {
453
- // TODO: store buffer
454
- }
455
559
 
456
560
  buffer_offset = 0;
457
561
  prev_buffer_remainder = 0;
@@ -507,16 +611,18 @@ void JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, idx_t &
507
611
  }
508
612
 
509
613
  void JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, idx_t &buffer_index) {
510
- auto &file_handle = current_reader->GetFileHandle();
511
-
512
614
  idx_t request_size = gstate.buffer_capacity - prev_buffer_remainder - YYJSON_PADDING_SIZE;
513
615
  idx_t read_size;
514
616
  {
515
617
  lock_guard<mutex> reader_guard(current_reader->lock);
516
618
  buffer_index = current_reader->GetBufferIndex();
517
619
 
518
- read_size = file_handle.Read(buffer_ptr + prev_buffer_remainder, request_size,
519
- gstate.bind_data.type == JSONScanType::SAMPLE);
620
+ if (current_reader->IsOpen()) {
621
+ read_size = current_reader->GetFileHandle().Read(buffer_ptr + prev_buffer_remainder, request_size,
622
+ gstate.bind_data.type == JSONScanType::SAMPLE);
623
+ } else {
624
+ read_size = 0;
625
+ }
520
626
  is_last = read_size < request_size;
521
627
 
522
628
  if (!gstate.bind_data.ignore_errors && read_size == 0 && prev_buffer_remainder != 0) {
@@ -578,10 +684,15 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
578
684
  current_reader->RemoveBuffer(current_buffer_handle->buffer_index - 1);
579
685
  }
580
686
 
581
- objects[0] = ParseLine((char *)reconstruct_ptr, line_size, line_size, lines[0]);
687
+ values[0] = ParseLine((char *)reconstruct_ptr, line_size, line_size, lines[0]);
582
688
  }
583
689
 
584
690
  void JSONScanLocalState::ReadUnstructured(idx_t &count) {
691
+ // yyjson does not always return YYJSON_READ_ERROR_UNEXPECTED_END properly
692
+ // if a different error code happens within the last 50 bytes
693
+ // we assume it should be YYJSON_READ_ERROR_UNEXPECTED_END instead
694
+ static constexpr idx_t END_BOUND = 50;
695
+
585
696
  const auto max_obj_size = reconstruct_buffer.GetSize();
586
697
  yyjson_read_err error;
587
698
  for (; count < STANDARD_VECTOR_SIZE; count++) {
@@ -607,8 +718,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
607
718
  } else if (error.pos > max_obj_size) {
608
719
  current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error,
609
720
  "Try increasing \"maximum_object_size\".");
610
-
611
- } else if (error.code == YYJSON_READ_ERROR_UNEXPECTED_END && !is_last) {
721
+ } else if (!is_last && (error.code == YYJSON_READ_ERROR_UNEXPECTED_END || remaining - error.pos < END_BOUND)) {
612
722
  // Copy remaining to reconstruct_buffer
613
723
  const auto reconstruct_ptr = reconstruct_buffer.get();
614
724
  memcpy(reconstruct_ptr, obj_copy_start, remaining);
@@ -618,7 +728,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
618
728
  } else {
619
729
  current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error);
620
730
  }
621
- objects[count] = read_doc->root;
731
+ values[count] = read_doc->root;
622
732
  }
623
733
  }
624
734
 
@@ -644,7 +754,7 @@ void JSONScanLocalState::ReadNewlineDelimited(idx_t &count) {
644
754
  }
645
755
  idx_t line_size = line_end - line_start;
646
756
 
647
- objects[count] = ParseLine((char *)line_start, line_size, remaining, lines[count]);
757
+ values[count] = ParseLine((char *)line_start, line_size, remaining, lines[count]);
648
758
 
649
759
  buffer_offset += line_size;
650
760
  SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
@@ -655,11 +765,11 @@ yyjson_alc *JSONScanLocalState::GetAllocator() {
655
765
  return json_allocator.GetYYJSONAllocator();
656
766
  }
657
767
 
658
- void JSONScanLocalState::ThrowTransformError(idx_t count, idx_t object_index, const string &error_message) {
768
+ void JSONScanLocalState::ThrowTransformError(idx_t object_index, const string &error_message) {
659
769
  D_ASSERT(current_reader);
660
770
  D_ASSERT(current_buffer_handle);
661
771
  D_ASSERT(object_index != DConstants::INVALID_INDEX);
662
- auto line_or_object_in_buffer = lines_or_objects_in_buffer - count + object_index;
772
+ auto line_or_object_in_buffer = lines_or_objects_in_buffer - scan_count + object_index;
663
773
  current_reader->ThrowTransformError(current_buffer_handle->buffer_index, line_or_object_in_buffer, error_message);
664
774
  }
665
775
 
@@ -223,7 +223,7 @@ public:
223
223
  FileSystem &fs = FileSystem::GetFileSystem(context);
224
224
  auto files = fs.Glob(info.file_path, context);
225
225
  if (files.empty()) {
226
- throw IOException("No files found that match the pattern \"%s\"", info.file_path);
226
+ throw FileSystem::MissingFileException(info.file_path, context);
227
227
  }
228
228
 
229
229
  // The most likely path (Parquet read without union by name option)
@@ -363,8 +363,9 @@ public:
363
363
 
364
364
  static vector<string> ParquetGlob(FileSystem &fs, const string &glob, ClientContext &context) {
365
365
  auto files = fs.Glob(glob, FileSystem::GetFileOpener(context));
366
+
366
367
  if (files.empty()) {
367
- throw IOException("No files found that match the pattern \"%s\"", glob);
368
+ throw FileSystem::MissingFileException(glob, context);
368
369
  }
369
370
  return files;
370
371
  }