duckdb 0.6.2-dev2115.0 → 0.6.2-dev2226.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -5
  3. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +6 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_scan.hpp +7 -0
  6. package/src/duckdb/extension/json/include/json_transform.hpp +25 -10
  7. package/src/duckdb/extension/json/json_common.cpp +6 -2
  8. package/src/duckdb/extension/json/json_functions/json_structure.cpp +47 -9
  9. package/src/duckdb/extension/json/json_functions/json_transform.cpp +183 -106
  10. package/src/duckdb/extension/json/json_functions/read_json.cpp +35 -22
  11. package/src/duckdb/extension/json/json_scan.cpp +26 -5
  12. package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
  13. package/src/duckdb/src/catalog/catalog.cpp +11 -12
  14. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  15. package/src/duckdb/src/common/box_renderer.cpp +9 -1
  16. package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
  17. package/src/duckdb/src/common/enums/relation_type.cpp +2 -0
  18. package/src/duckdb/src/common/gzip_file_system.cpp +1 -1
  19. package/src/duckdb/src/common/local_file_system.cpp +1 -1
  20. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +2 -2
  21. package/src/duckdb/src/common/types/column_data_allocator.cpp +2 -2
  22. package/src/duckdb/src/common/types/date.cpp +7 -2
  23. package/src/duckdb/src/common/types/vector.cpp +3 -2
  24. package/src/duckdb/src/common/virtual_file_system.cpp +1 -1
  25. package/src/duckdb/src/execution/index/art/art.cpp +5 -5
  26. package/src/duckdb/src/execution/join_hashtable.cpp +4 -5
  27. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +2 -0
  28. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +182 -123
  29. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +22 -18
  30. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +1 -1
  31. package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -3
  32. package/src/duckdb/src/function/scalar/math/setseed.cpp +1 -1
  33. package/src/duckdb/src/function/scalar/string/substring.cpp +8 -0
  34. package/src/duckdb/src/function/table/read_csv.cpp +1 -1
  35. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  36. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
  37. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +4 -0
  38. package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +1 -0
  39. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/http_stats.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/common/limits.hpp +3 -0
  42. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +1 -9
  43. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
  44. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  45. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -3
  46. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_unnest.hpp +5 -1
  47. package/src/duckdb/src/include/duckdb/main/client_context.hpp +3 -0
  48. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -4
  49. package/src/duckdb/src/include/duckdb/main/database.hpp +6 -0
  50. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
  51. package/src/duckdb/src/include/duckdb/main/relation/write_csv_relation.hpp +2 -1
  52. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +34 -0
  53. package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -1
  54. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +2 -1
  55. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
  58. package/src/duckdb/src/include/duckdb.h +7 -0
  59. package/src/duckdb/src/main/capi/threading-c.cpp +8 -0
  60. package/src/duckdb/src/main/client_context.cpp +7 -0
  61. package/src/duckdb/src/main/client_context_file_opener.cpp +14 -0
  62. package/src/duckdb/src/main/database.cpp +57 -40
  63. package/src/duckdb/src/main/extension/extension_load.cpp +20 -28
  64. package/src/duckdb/src/main/relation/write_csv_relation.cpp +4 -2
  65. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +37 -0
  66. package/src/duckdb/src/main/relation.cpp +12 -2
  67. package/src/duckdb/src/parallel/executor.cpp +4 -0
  68. package/src/duckdb/src/parser/statement/copy_statement.cpp +1 -1
  69. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +4 -3
  70. package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
  71. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +24 -3
  72. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
  73. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
  74. package/src/duckdb/src/storage/compression/bitpacking.cpp +2 -1
  75. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +1 -1
  76. package/src/duckdb/src/storage/index.cpp +1 -1
  77. package/src/duckdb/src/storage/meta_block_writer.cpp +1 -1
  78. package/src/duckdb/src/storage/table/column_segment.cpp +3 -3
  79. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1 -2
  80. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +539 -300
  81. package/src/duckdb/ub_src_main.cpp +0 -2
  82. package/src/duckdb/ub_src_main_relation.cpp +2 -0
  83. package/src/duckdb/src/include/duckdb/function/replacement_open.hpp +0 -54
  84. package/src/duckdb/src/include/duckdb/main/replacement_opens.hpp +0 -20
  85. package/src/duckdb/src/main/extension_prefix_opener.cpp +0 -55
@@ -8,11 +8,21 @@
8
8
 
9
9
  namespace duckdb {
10
10
 
11
+ JSONTransformOptions::JSONTransformOptions() {
12
+ }
13
+
14
+ JSONTransformOptions::JSONTransformOptions(bool strict_cast_p, bool error_duplicate_key_p, bool error_missing_key_p,
15
+ bool error_unkown_key_p)
16
+ : strict_cast(strict_cast_p), error_duplicate_key(error_duplicate_key_p), error_missing_key(error_missing_key_p),
17
+ error_unknown_key(error_unkown_key_p) {
18
+ }
19
+
11
20
  void JSONTransformOptions::Serialize(FieldWriter &writer) {
12
21
  writer.WriteField(strict_cast);
13
22
  writer.WriteField(error_duplicate_key);
14
23
  writer.WriteField(error_missing_key);
15
24
  writer.WriteField(error_unknown_key);
25
+ writer.WriteField(from_file);
16
26
  }
17
27
 
18
28
  void JSONTransformOptions::Deserialize(FieldReader &reader) {
@@ -20,6 +30,7 @@ void JSONTransformOptions::Deserialize(FieldReader &reader) {
20
30
  error_duplicate_key = reader.ReadRequired<bool>();
21
31
  error_missing_key = reader.ReadRequired<bool>();
22
32
  error_unknown_key = reader.ReadRequired<bool>();
33
+ from_file = reader.ReadRequired<bool>();
23
34
  }
24
35
 
25
36
  //! Forward declaration for recursion
@@ -96,13 +107,13 @@ static inline string_t GetString(yyjson_val *val) {
96
107
  }
97
108
 
98
109
  template <class T, class OP = TryCast>
99
- static inline bool GetValueNumerical(yyjson_val *val, T &result, bool strict) {
110
+ static inline bool GetValueNumerical(yyjson_val *val, T &result, JSONTransformOptions &options) {
100
111
  bool success;
101
- switch (yyjson_get_tag(val)) {
112
+ switch (unsafe_yyjson_get_tag(val)) {
102
113
  case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
103
114
  return false;
104
115
  case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
105
- success = OP::template Operation<string_t, T>(GetString(val), result, strict);
116
+ success = OP::template Operation<string_t, T>(GetString(val), result, options.strict_cast);
106
117
  break;
107
118
  case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE:
108
119
  case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE:
@@ -110,35 +121,35 @@ static inline bool GetValueNumerical(yyjson_val *val, T &result, bool strict) {
110
121
  break;
111
122
  case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE:
112
123
  case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE:
113
- success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, strict);
124
+ success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, options.strict_cast);
114
125
  break;
115
126
  case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT:
116
- success = OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, strict);
127
+ success = OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, options.strict_cast);
117
128
  break;
118
129
  case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT:
119
- success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, strict);
130
+ success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, options.strict_cast);
120
131
  break;
121
132
  case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL:
122
- success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, strict);
133
+ success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, options.strict_cast);
123
134
  break;
124
135
  default:
125
136
  throw InternalException("Unknown yyjson tag in GetValueNumerical");
126
137
  }
127
- if (!success && strict) {
128
- JSONCommon::ThrowValFormatError("Failed to cast value to numerical: %s", val);
138
+ if (!success && options.strict_cast) {
139
+ options.error_message =
140
+ StringUtil::Format("Failed to cast value to numerical: %s", JSONCommon::ValToString(val));
129
141
  }
130
142
  return success;
131
143
  }
132
144
 
133
145
  template <class T, class OP = TryCastToDecimal>
134
- static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_t s, bool strict) {
146
+ static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_t s, JSONTransformOptions &options) {
135
147
  bool success;
136
- string error_message;
137
- switch (yyjson_get_tag(val)) {
148
+ switch (unsafe_yyjson_get_tag(val)) {
138
149
  case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
139
150
  return false;
140
151
  case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
141
- success = OP::template Operation<string_t, T>(GetString(val), result, &error_message, w, s);
152
+ success = OP::template Operation<string_t, T>(GetString(val), result, &options.error_message, w, s);
142
153
  break;
143
154
  case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE:
144
155
  case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE:
@@ -146,30 +157,31 @@ static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_
146
157
  break;
147
158
  case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE:
148
159
  case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE:
149
- success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, &error_message, w, s);
160
+ success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, &options.error_message, w, s);
150
161
  break;
151
162
  case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT:
152
- success = OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, &error_message, w, s);
163
+ success =
164
+ OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, &options.error_message, w, s);
153
165
  break;
154
166
  case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT:
155
- success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, &error_message, w, s);
167
+ success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, &options.error_message, w, s);
156
168
  break;
157
169
  case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL:
158
- success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, &error_message, w, s);
170
+ success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, &options.error_message, w, s);
159
171
  break;
160
172
  default:
161
173
  throw InternalException("Unknown yyjson tag in GetValueString");
162
174
  }
163
- if (!success && strict) {
164
- JSONCommon::ThrowValFormatError("Failed to cast value to numerical: %s", val);
175
+ if (!success && options.strict_cast) {
176
+ options.error_message = StringUtil::Format("Failed to cast value to decimal: %s", JSONCommon::ValToString(val));
165
177
  }
166
178
  return success;
167
179
  }
168
180
 
169
181
  static inline bool GetValueString(yyjson_val *val, yyjson_alc *alc, string_t &result, Vector &vector) {
170
- switch (yyjson_get_tag(val)) {
182
+ switch (unsafe_yyjson_get_tag(val)) {
171
183
  case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
172
- return false;
184
+ return true;
173
185
  case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
174
186
  result = string_t(unsafe_yyjson_get_str(val), unsafe_yyjson_get_len(val));
175
187
  return true;
@@ -196,131 +208,155 @@ static inline bool GetValueString(yyjson_val *val, yyjson_alc *alc, string_t &re
196
208
  }
197
209
 
198
210
  template <class T>
199
- static void TransformNumerical(yyjson_val *vals[], Vector &result, const idx_t count, const bool strict) {
211
+ static bool TransformNumerical(yyjson_val *vals[], Vector &result, const idx_t count, JSONTransformOptions &options) {
200
212
  auto data = (T *)FlatVector::GetData(result);
201
213
  auto &validity = FlatVector::Validity(result);
214
+
202
215
  for (idx_t i = 0; i < count; i++) {
203
216
  const auto &val = vals[i];
204
- if (!val || !GetValueNumerical<T>(val, data[i], strict)) {
217
+ if (!val || unsafe_yyjson_is_null(val)) {
218
+ validity.SetInvalid(i);
219
+ } else if (!GetValueNumerical<T>(val, data[i], options)) {
205
220
  validity.SetInvalid(i);
221
+ if (options.strict_cast) {
222
+ options.object_index = i;
223
+ return false;
224
+ }
206
225
  }
207
226
  }
227
+ return true;
208
228
  }
209
229
 
210
230
  template <class T>
211
- static void TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t count, uint8_t width, uint8_t scale,
212
- const bool strict) {
231
+ static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t count, uint8_t width, uint8_t scale,
232
+ JSONTransformOptions &options) {
213
233
  auto data = (T *)FlatVector::GetData(result);
214
234
  auto &validity = FlatVector::Validity(result);
235
+
215
236
  for (idx_t i = 0; i < count; i++) {
216
237
  const auto &val = vals[i];
217
- if (!val || !GetValueDecimal<T>(val, data[i], width, scale, strict)) {
238
+ if (!val || unsafe_yyjson_is_null(val)) {
239
+ validity.SetInvalid(i);
240
+ } else if (!GetValueDecimal<T>(val, data[i], width, scale, options)) {
218
241
  validity.SetInvalid(i);
242
+ if (options.strict_cast) {
243
+ options.object_index = i;
244
+ return false;
245
+ }
219
246
  }
220
247
  }
248
+ return true;
221
249
  }
222
250
 
223
- void JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
224
- Vector &string_vector, const bool strict) {
251
+ bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
252
+ Vector &string_vector, JSONTransformOptions &options) {
225
253
  auto data = (string_t *)FlatVector::GetData(string_vector);
226
254
  auto &validity = FlatVector::Validity(string_vector);
227
255
 
228
256
  for (idx_t i = 0; i < count; i++) {
229
257
  const auto &val = vals[i];
230
- if (!val || yyjson_is_null(val)) {
258
+ if (!val || unsafe_yyjson_is_null(val)) {
231
259
  validity.SetInvalid(i);
232
- } else if (strict && !yyjson_is_str(val)) {
233
- JSONCommon::ThrowValFormatError("Unable to cast '%s' to " + LogicalTypeIdToString(target.id()), val);
260
+ } else if (options.strict_cast && !unsafe_yyjson_is_str(val)) {
261
+ options.error_message = StringUtil::Format("Unable to cast '%s' to " + LogicalTypeIdToString(target.id()),
262
+ JSONCommon::ValToString(val));
263
+ options.object_index = i;
264
+ return false;
234
265
  } else {
235
266
  data[i] = GetString(val);
236
267
  }
237
268
  }
269
+ return true;
238
270
  }
239
271
 
240
- static void TransformFromString(yyjson_val *vals[], Vector &result, const idx_t count, const bool strict) {
272
+ static bool TransformFromString(yyjson_val *vals[], Vector &result, const idx_t count, JSONTransformOptions &options) {
241
273
  Vector string_vector(LogicalTypeId::VARCHAR, count);
242
- JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, strict);
274
+ if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
275
+ return false;
276
+ }
243
277
 
244
- string error_message;
245
- if (!VectorOperations::DefaultTryCast(string_vector, result, count, &error_message, strict) && strict) {
246
- throw InvalidInputException(error_message);
278
+ if (!VectorOperations::DefaultTryCast(string_vector, result, count, &options.error_message, options.strict_cast) &&
279
+ options.strict_cast) {
280
+ options.object_index = 0; // Can't get line number information here
281
+ options.error_message += " (line/object number information is approximate)";
282
+ return false;
247
283
  }
284
+ return true;
248
285
  }
249
286
 
250
287
  template <class OP, class T>
251
288
  static bool TransformStringWithFormat(Vector &string_vector, StrpTimeFormat &format, const idx_t count, Vector &result,
252
- string &error_message) {
289
+ JSONTransformOptions &options) {
253
290
  const auto source_strings = FlatVector::GetData<string_t>(string_vector);
254
291
  const auto &source_validity = FlatVector::Validity(string_vector);
255
292
 
256
293
  auto target_vals = FlatVector::GetData<T>(result);
257
294
  auto &target_validity = FlatVector::Validity(result);
258
295
 
259
- bool success = true;
260
296
  if (source_validity.AllValid()) {
261
297
  for (idx_t i = 0; i < count; i++) {
262
- if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], error_message)) {
298
+ if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
263
299
  target_validity.SetInvalid(i);
264
- success = false;
300
+ if (options.strict_cast) {
301
+ options.object_index = i;
302
+ return false;
303
+ }
265
304
  }
266
305
  }
267
306
  } else {
268
307
  for (idx_t i = 0; i < count; i++) {
269
308
  if (!source_validity.RowIsValid(i)) {
270
309
  target_validity.SetInvalid(i);
271
- } else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], error_message)) {
310
+ } else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
272
311
  target_validity.SetInvalid(i);
273
- success = false;
312
+ if (options.strict_cast) {
313
+ options.object_index = i;
314
+ return false;
315
+ }
274
316
  }
275
317
  }
276
318
  }
277
- return success;
319
+ return true;
278
320
  }
279
321
 
280
- static void TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, const idx_t count,
281
- const JSONTransformOptions &options) {
322
+ static bool TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, const idx_t count,
323
+ JSONTransformOptions &options) {
282
324
  Vector string_vector(LogicalTypeId::VARCHAR, count);
283
- JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options.strict_cast);
325
+ if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
326
+ return false;
327
+ }
284
328
 
285
329
  const auto &result_type = result.GetType().id();
286
330
  auto &format = options.date_format_map->GetFormat(result_type);
287
331
 
288
- bool success;
289
- string error_message;
290
332
  switch (result_type) {
291
333
  case LogicalTypeId::DATE:
292
- success = TransformStringWithFormat<TryParseDate, date_t>(string_vector, format, count, result, error_message);
293
- break;
334
+ return TransformStringWithFormat<TryParseDate, date_t>(string_vector, format, count, result, options);
294
335
  case LogicalTypeId::TIMESTAMP:
295
- success = TransformStringWithFormat<TryParseTimeStamp, timestamp_t>(string_vector, format, count, result,
296
- error_message);
297
- break;
336
+ return TransformStringWithFormat<TryParseTimeStamp, timestamp_t>(string_vector, format, count, result, options);
298
337
  default:
299
338
  throw InternalException("No date/timestamp formats for %s", LogicalTypeIdToString(result.GetType().id()));
300
339
  }
301
-
302
- if (options.strict_cast && !success) {
303
- throw CastException(error_message);
304
- }
305
340
  }
306
341
 
307
- static void TransformToString(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
342
+ static bool TransformToString(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
308
343
  auto data = (string_t *)FlatVector::GetData(result);
309
344
  auto &validity = FlatVector::Validity(result);
310
345
  for (idx_t i = 0; i < count; i++) {
311
346
  const auto &val = vals[i];
312
- if (!val || !GetValueString(val, alc, data[i], result)) {
347
+ if (!val || unsafe_yyjson_is_null(vals[i])) {
348
+ validity.SetInvalid(i);
349
+ } else if (!GetValueString(val, alc, data[i], result)) {
313
350
  validity.SetInvalid(i);
314
351
  }
315
352
  }
353
+ // Can always transform to string
354
+ return true;
316
355
  }
317
356
 
318
- static void Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
319
- const JSONTransformOptions &options);
320
-
321
- void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count,
357
+ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count,
322
358
  const vector<string> &names, const vector<Vector *> &result_vectors,
323
- const JSONTransformOptions &options) {
359
+ JSONTransformOptions &options) {
324
360
  D_ASSERT(alc);
325
361
  D_ASSERT(names.size() == result_vectors.size());
326
362
  const idx_t column_count = names.size();
@@ -337,6 +373,8 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
337
373
  idx_t found_key_count;
338
374
  auto found_keys = (bool *)alc->malloc(alc->ctx, sizeof(bool) * column_count);
339
375
 
376
+ bool success = true;
377
+
340
378
  size_t idx, max;
341
379
  yyjson_val *key, *val;
342
380
  for (idx_t i = 0; i < count; i++) {
@@ -350,17 +388,22 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
350
388
  if (it != key_map.end()) {
351
389
  const auto &col_idx = it->second;
352
390
  if (options.error_duplicate_key && found_keys[col_idx]) {
353
- JSONCommon::ThrowValFormatError(
354
- "Duplicate key \"" + string(key_ptr, key_len) + "\" in object %s", objects[i]);
391
+ options.error_message =
392
+ StringUtil::Format("Duplicate key \"" + string(key_ptr, key_len) + "\" in object %s",
393
+ JSONCommon::ValToString(objects[i]));
394
+ options.object_index = i;
395
+ success = false;
396
+ break;
355
397
  }
356
398
  nested_vals[col_idx][i] = val;
357
399
  found_keys[col_idx] = true;
358
- if (++found_key_count == column_count) {
359
- break;
360
- }
400
+ found_key_count++;
361
401
  } else if (options.error_unknown_key) {
362
- JSONCommon::ThrowValFormatError("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
363
- objects[i]);
402
+ options.error_message =
403
+ StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
404
+ JSONCommon::ValToString(objects[i]));
405
+ options.object_index = i;
406
+ success = false;
364
407
  }
365
408
  }
366
409
  if (found_key_count != column_count) {
@@ -369,8 +412,11 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
369
412
  for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
370
413
  if (!found_keys[col_idx]) {
371
414
  if (options.error_missing_key) {
372
- JSONCommon::ThrowValFormatError("Object %s does not have key \"" + names[col_idx] + "\"",
373
- objects[i]);
415
+ options.error_message =
416
+ StringUtil::Format("Object %s does not have key \"" + names[col_idx] + "\"",
417
+ JSONCommon::ValToString(objects[i]));
418
+ options.object_index = i;
419
+ success = false;
374
420
  } else {
375
421
  nested_vals[col_idx][i] = nullptr;
376
422
  }
@@ -385,13 +431,28 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
385
431
  }
386
432
  }
387
433
 
434
+ if (!success) {
435
+ if (!options.from_file) {
436
+ throw InvalidInputException(options.error_message);
437
+ }
438
+ return false;
439
+ }
440
+
388
441
  for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
389
- Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options);
442
+ if (JSONTransform::Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options)) {
443
+ continue;
444
+ }
445
+ if (!options.from_file) {
446
+ throw InvalidInputException(options.error_message);
447
+ }
448
+ return false;
390
449
  }
450
+
451
+ return success;
391
452
  }
392
453
 
393
- static void TransformObject(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
394
- const LogicalType &type, const JSONTransformOptions &options) {
454
+ static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
455
+ const LogicalType &type, JSONTransformOptions &options) {
395
456
  // Get child vectors and names
396
457
  auto &child_vs = StructVector::GetEntries(result);
397
458
  vector<string> child_names;
@@ -403,11 +464,11 @@ static void TransformObject(yyjson_val *objects[], yyjson_alc *alc, Vector &resu
403
464
  child_vectors.push_back(child_vs[child_i].get());
404
465
  }
405
466
 
406
- JSONTransform::TransformObject(objects, alc, count, child_names, child_vectors, options);
467
+ return JSONTransform::TransformObject(objects, alc, count, child_names, child_vectors, options);
407
468
  }
408
469
 
409
- static void TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result, const idx_t count,
410
- const JSONTransformOptions &options) {
470
+ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result, const idx_t count,
471
+ JSONTransformOptions &options) {
411
472
  // Initialize list vector
412
473
  auto list_entries = FlatVector::GetData<list_entry_t>(result);
413
474
  auto &list_validity = FlatVector::Validity(result);
@@ -442,57 +503,71 @@ static void TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
442
503
  }
443
504
  }
444
505
  D_ASSERT(list_i == offset);
506
+
445
507
  // Transform array values
446
- Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options);
508
+ auto success = JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options);
509
+ if (!success && options.from_file) {
510
+ // Set object index in case of error in nested list so we can get accurate line number information
511
+ for (idx_t i = 0; i < count; i++) {
512
+ if (!list_validity.RowIsValid(i)) {
513
+ continue;
514
+ }
515
+ auto &entry = list_entries[i];
516
+ if (options.object_index >= entry.offset && options.object_index < entry.offset + entry.length) {
517
+ options.object_index = i;
518
+ }
519
+ }
520
+ }
521
+ return success;
447
522
  }
448
523
 
449
- static void Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
450
- const JSONTransformOptions &options) {
524
+ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
525
+ JSONTransformOptions &options) {
451
526
  auto result_type = result.GetType();
452
- if (options.date_format_map && (result_type == LogicalTypeId::TIMESTAMP || result_type == LogicalTypeId::DATE)) {
453
- TransformFromStringWithFormat(vals, result, count, options);
454
- return;
527
+ if ((result_type == LogicalTypeId::TIMESTAMP || result_type == LogicalTypeId::DATE) && options.date_format_map) {
528
+ // Auto-detected date/timestamp format during sampling
529
+ return TransformFromStringWithFormat(vals, result, count, options);
455
530
  }
456
531
 
457
532
  switch (result_type.id()) {
458
533
  case LogicalTypeId::SQLNULL:
459
- return;
534
+ return true;
460
535
  case LogicalTypeId::BOOLEAN:
461
- return TransformNumerical<bool>(vals, result, count, options.strict_cast);
536
+ return TransformNumerical<bool>(vals, result, count, options);
462
537
  case LogicalTypeId::TINYINT:
463
- return TransformNumerical<int8_t>(vals, result, count, options.strict_cast);
538
+ return TransformNumerical<int8_t>(vals, result, count, options);
464
539
  case LogicalTypeId::SMALLINT:
465
- return TransformNumerical<int16_t>(vals, result, count, options.strict_cast);
540
+ return TransformNumerical<int16_t>(vals, result, count, options);
466
541
  case LogicalTypeId::INTEGER:
467
- return TransformNumerical<int32_t>(vals, result, count, options.strict_cast);
542
+ return TransformNumerical<int32_t>(vals, result, count, options);
468
543
  case LogicalTypeId::BIGINT:
469
- return TransformNumerical<int64_t>(vals, result, count, options.strict_cast);
544
+ return TransformNumerical<int64_t>(vals, result, count, options);
470
545
  case LogicalTypeId::UTINYINT:
471
- return TransformNumerical<uint8_t>(vals, result, count, options.strict_cast);
546
+ return TransformNumerical<uint8_t>(vals, result, count, options);
472
547
  case LogicalTypeId::USMALLINT:
473
- return TransformNumerical<uint16_t>(vals, result, count, options.strict_cast);
548
+ return TransformNumerical<uint16_t>(vals, result, count, options);
474
549
  case LogicalTypeId::UINTEGER:
475
- return TransformNumerical<uint32_t>(vals, result, count, options.strict_cast);
550
+ return TransformNumerical<uint32_t>(vals, result, count, options);
476
551
  case LogicalTypeId::UBIGINT:
477
- return TransformNumerical<uint64_t>(vals, result, count, options.strict_cast);
552
+ return TransformNumerical<uint64_t>(vals, result, count, options);
478
553
  case LogicalTypeId::HUGEINT:
479
- return TransformNumerical<hugeint_t>(vals, result, count, options.strict_cast);
554
+ return TransformNumerical<hugeint_t>(vals, result, count, options);
480
555
  case LogicalTypeId::FLOAT:
481
- return TransformNumerical<float>(vals, result, count, options.strict_cast);
556
+ return TransformNumerical<float>(vals, result, count, options);
482
557
  case LogicalTypeId::DOUBLE:
483
- return TransformNumerical<double>(vals, result, count, options.strict_cast);
558
+ return TransformNumerical<double>(vals, result, count, options);
484
559
  case LogicalTypeId::DECIMAL: {
485
560
  auto width = DecimalType::GetWidth(result_type);
486
561
  auto scale = DecimalType::GetScale(result_type);
487
562
  switch (result_type.InternalType()) {
488
563
  case PhysicalType::INT16:
489
- return TransformDecimal<int16_t>(vals, result, count, width, scale, options.strict_cast);
564
+ return TransformDecimal<int16_t>(vals, result, count, width, scale, options);
490
565
  case PhysicalType::INT32:
491
- return TransformDecimal<int32_t>(vals, result, count, width, scale, options.strict_cast);
566
+ return TransformDecimal<int32_t>(vals, result, count, width, scale, options);
492
567
  case PhysicalType::INT64:
493
- return TransformDecimal<int64_t>(vals, result, count, width, scale, options.strict_cast);
568
+ return TransformDecimal<int64_t>(vals, result, count, width, scale, options);
494
569
  case PhysicalType::INT128:
495
- return TransformDecimal<hugeint_t>(vals, result, count, width, scale, options.strict_cast);
570
+ return TransformDecimal<hugeint_t>(vals, result, count, width, scale, options);
496
571
  default:
497
572
  throw InternalException("Unimplemented physical type for decimal");
498
573
  }
@@ -509,12 +584,12 @@ static void Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const
509
584
  case LogicalTypeId::TIMESTAMP_MS:
510
585
  case LogicalTypeId::TIMESTAMP_SEC:
511
586
  case LogicalTypeId::UUID:
512
- return TransformFromString(vals, result, count, options.strict_cast);
587
+ return TransformFromString(vals, result, count, options);
513
588
  case LogicalTypeId::VARCHAR:
514
589
  case LogicalTypeId::BLOB:
515
590
  return TransformToString(vals, alc, result, count);
516
591
  case LogicalTypeId::STRUCT:
517
- return TransformObject(vals, alc, result, count, result_type, options);
592
+ return TransformObjectInternal(vals, alc, result, count, result_type, options);
518
593
  case LogicalTypeId::LIST:
519
594
  return TransformArray(vals, alc, result, count, options);
520
595
  default:
@@ -548,9 +623,11 @@ static void TransformFunction(DataChunk &args, ExpressionState &state, Vector &r
548
623
  }
549
624
  }
550
625
 
551
- const JSONTransformOptions options {strict, strict, strict, false, nullptr};
626
+ JSONTransformOptions options(strict, strict, strict, false);
552
627
 
553
- Transform(vals, alc, result, count, options);
628
+ if (!JSONTransform::Transform(vals, alc, result, count, options)) {
629
+ throw InvalidInputException(options.error_message);
630
+ }
554
631
 
555
632
  if (args.AllConstant()) {
556
633
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
@@ -26,27 +26,25 @@ void AutoDetect(ClientContext &context, JSONScanData &bind_data, vector<LogicalT
26
26
  // Read for the specified sample size
27
27
  JSONStructureNode node;
28
28
  Vector string_vector(LogicalType::VARCHAR);
29
- idx_t read = 0;
30
- while (read < bind_data.sample_size) {
29
+ idx_t remaining = bind_data.sample_size;
30
+ while (remaining != 0) {
31
31
  allocator.Reset();
32
- auto count = lstate.ReadNext(gstate);
33
- if (count == 0) {
32
+ auto read_count = lstate.ReadNext(gstate);
33
+ if (read_count == 0) {
34
34
  break;
35
35
  }
36
- idx_t i;
37
- for (i = 0; i < count; i++) {
36
+ idx_t next = MinValue<idx_t>(read_count, remaining);
37
+ for (idx_t i = 0; i < next; i++) {
38
38
  if (lstate.objects[i]) {
39
39
  JSONStructure::ExtractStructure(lstate.objects[i], node);
40
40
  }
41
- if (++read == bind_data.sample_size) {
42
- break;
43
- }
44
41
  }
45
42
  if (!node.ContainsVarchar()) { // Can't refine non-VARCHAR types
46
43
  continue;
47
44
  }
48
45
  node.InitializeCandidateTypes(bind_data.max_depth);
49
- node.RefineCandidateTypes(lstate.objects, i, string_vector, allocator, bind_data.date_format_map);
46
+ node.RefineCandidateTypes(lstate.objects, next, string_vector, allocator, bind_data.date_format_map);
47
+ remaining -= next;
50
48
  }
51
49
  bind_data.type = original_scan_type;
52
50
  bind_data.transform_options.date_format_map = &bind_data.date_format_map;
@@ -55,15 +53,15 @@ void AutoDetect(ClientContext &context, JSONScanData &bind_data, vector<LogicalT
55
53
  if (type.id() != LogicalTypeId::STRUCT) {
56
54
  return_types.emplace_back(type);
57
55
  names.emplace_back("json");
58
- return;
59
- }
60
-
61
- const auto &child_types = StructType::GetChildTypes(type);
62
- return_types.reserve(child_types.size());
63
- names.reserve(child_types.size());
64
- for (auto &child_type : child_types) {
65
- return_types.emplace_back(child_type.second);
66
- names.emplace_back(child_type.first);
56
+ bind_data.objects = false;
57
+ } else {
58
+ const auto &child_types = StructType::GetChildTypes(type);
59
+ return_types.reserve(child_types.size());
60
+ names.reserve(child_types.size());
61
+ for (auto &child_type : child_types) {
62
+ return_types.emplace_back(child_type.second);
63
+ names.emplace_back(child_type.first);
64
+ }
67
65
  }
68
66
 
69
67
  for (auto &reader : gstate.json_readers) {
@@ -141,6 +139,7 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
141
139
  transform_options.error_duplicate_key = !bind_data.ignore_errors;
142
140
  transform_options.error_missing_key = false;
143
141
  transform_options.error_unknown_key = bind_data.auto_detect && !bind_data.ignore_errors;
142
+ transform_options.from_file = true;
144
143
 
145
144
  return result;
146
145
  }
@@ -160,9 +159,23 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
160
159
  result_vectors.push_back(&output.data[col_idx]);
161
160
  }
162
161
 
163
- // TODO: if errors occur during transformation, we don't have line number information
164
- JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names, result_vectors,
165
- gstate.bind_data.transform_options);
162
+ // Pass current reader to transform options so we can get line number information if an error occurs
163
+ bool success;
164
+ if (gstate.bind_data.objects) {
165
+ success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
166
+ result_vectors, lstate.transform_options);
167
+ } else {
168
+ success = JSONTransform::Transform(objects, lstate.GetAllocator(), *result_vectors[0], count,
169
+ lstate.transform_options);
170
+ }
171
+ if (!success) {
172
+ string hint =
173
+ gstate.bind_data.auto_detect
174
+ ? "\nTry increasing 'sample_size', reducing 'maximum_depth', or specifying 'columns' manually."
175
+ : "";
176
+ lstate.ThrowTransformError(count, lstate.transform_options.object_index,
177
+ lstate.transform_options.error_message + hint);
178
+ }
166
179
  output.SetCardinality(count);
167
180
  }
168
181