duckdb 0.6.2-dev2115.0 → 0.6.2-dev2226.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -5
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +6 -1
- package/src/duckdb/extension/json/include/json_common.hpp +1 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +7 -0
- package/src/duckdb/extension/json/include/json_transform.hpp +25 -10
- package/src/duckdb/extension/json/json_common.cpp +6 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +47 -9
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +183 -106
- package/src/duckdb/extension/json/json_functions/read_json.cpp +35 -22
- package/src/duckdb/extension/json/json_scan.cpp +26 -5
- package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
- package/src/duckdb/src/catalog/catalog.cpp +11 -12
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +9 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
- package/src/duckdb/src/common/enums/relation_type.cpp +2 -0
- package/src/duckdb/src/common/gzip_file_system.cpp +1 -1
- package/src/duckdb/src/common/local_file_system.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +2 -2
- package/src/duckdb/src/common/types/column_data_allocator.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +7 -2
- package/src/duckdb/src/common/types/vector.cpp +3 -2
- package/src/duckdb/src/common/virtual_file_system.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +5 -5
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -5
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +2 -0
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +182 -123
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +22 -18
- package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +1 -1
- package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -3
- package/src/duckdb/src/function/scalar/math/setseed.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/substring.cpp +8 -0
- package/src/duckdb/src/function/table/read_csv.cpp +1 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/http_stats.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/limits.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +1 -9
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_unnest.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -4
- package/src/duckdb/src/include/duckdb/main/database.hpp +6 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
- package/src/duckdb/src/include/duckdb/main/relation/write_csv_relation.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +34 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
- package/src/duckdb/src/include/duckdb.h +7 -0
- package/src/duckdb/src/main/capi/threading-c.cpp +8 -0
- package/src/duckdb/src/main/client_context.cpp +7 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +14 -0
- package/src/duckdb/src/main/database.cpp +57 -40
- package/src/duckdb/src/main/extension/extension_load.cpp +20 -28
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +4 -2
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +37 -0
- package/src/duckdb/src/main/relation.cpp +12 -2
- package/src/duckdb/src/parallel/executor.cpp +4 -0
- package/src/duckdb/src/parser/statement/copy_statement.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +4 -3
- package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +24 -3
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
- package/src/duckdb/src/storage/compression/bitpacking.cpp +2 -1
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/index.cpp +1 -1
- package/src/duckdb/src/storage/meta_block_writer.cpp +1 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +3 -3
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1 -2
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +539 -300
- package/src/duckdb/ub_src_main.cpp +0 -2
- package/src/duckdb/ub_src_main_relation.cpp +2 -0
- package/src/duckdb/src/include/duckdb/function/replacement_open.hpp +0 -54
- package/src/duckdb/src/include/duckdb/main/replacement_opens.hpp +0 -20
- package/src/duckdb/src/main/extension_prefix_opener.cpp +0 -55
|
@@ -8,11 +8,21 @@
|
|
|
8
8
|
|
|
9
9
|
namespace duckdb {
|
|
10
10
|
|
|
11
|
+
JSONTransformOptions::JSONTransformOptions() {
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
JSONTransformOptions::JSONTransformOptions(bool strict_cast_p, bool error_duplicate_key_p, bool error_missing_key_p,
|
|
15
|
+
bool error_unkown_key_p)
|
|
16
|
+
: strict_cast(strict_cast_p), error_duplicate_key(error_duplicate_key_p), error_missing_key(error_missing_key_p),
|
|
17
|
+
error_unknown_key(error_unkown_key_p) {
|
|
18
|
+
}
|
|
19
|
+
|
|
11
20
|
void JSONTransformOptions::Serialize(FieldWriter &writer) {
|
|
12
21
|
writer.WriteField(strict_cast);
|
|
13
22
|
writer.WriteField(error_duplicate_key);
|
|
14
23
|
writer.WriteField(error_missing_key);
|
|
15
24
|
writer.WriteField(error_unknown_key);
|
|
25
|
+
writer.WriteField(from_file);
|
|
16
26
|
}
|
|
17
27
|
|
|
18
28
|
void JSONTransformOptions::Deserialize(FieldReader &reader) {
|
|
@@ -20,6 +30,7 @@ void JSONTransformOptions::Deserialize(FieldReader &reader) {
|
|
|
20
30
|
error_duplicate_key = reader.ReadRequired<bool>();
|
|
21
31
|
error_missing_key = reader.ReadRequired<bool>();
|
|
22
32
|
error_unknown_key = reader.ReadRequired<bool>();
|
|
33
|
+
from_file = reader.ReadRequired<bool>();
|
|
23
34
|
}
|
|
24
35
|
|
|
25
36
|
//! Forward declaration for recursion
|
|
@@ -96,13 +107,13 @@ static inline string_t GetString(yyjson_val *val) {
|
|
|
96
107
|
}
|
|
97
108
|
|
|
98
109
|
template <class T, class OP = TryCast>
|
|
99
|
-
static inline bool GetValueNumerical(yyjson_val *val, T &result,
|
|
110
|
+
static inline bool GetValueNumerical(yyjson_val *val, T &result, JSONTransformOptions &options) {
|
|
100
111
|
bool success;
|
|
101
|
-
switch (
|
|
112
|
+
switch (unsafe_yyjson_get_tag(val)) {
|
|
102
113
|
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
|
|
103
114
|
return false;
|
|
104
115
|
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
|
|
105
|
-
success = OP::template Operation<string_t, T>(GetString(val), result,
|
|
116
|
+
success = OP::template Operation<string_t, T>(GetString(val), result, options.strict_cast);
|
|
106
117
|
break;
|
|
107
118
|
case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE:
|
|
108
119
|
case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE:
|
|
@@ -110,35 +121,35 @@ static inline bool GetValueNumerical(yyjson_val *val, T &result, bool strict) {
|
|
|
110
121
|
break;
|
|
111
122
|
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE:
|
|
112
123
|
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE:
|
|
113
|
-
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result,
|
|
124
|
+
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, options.strict_cast);
|
|
114
125
|
break;
|
|
115
126
|
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT:
|
|
116
|
-
success = OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result,
|
|
127
|
+
success = OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, options.strict_cast);
|
|
117
128
|
break;
|
|
118
129
|
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT:
|
|
119
|
-
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result,
|
|
130
|
+
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, options.strict_cast);
|
|
120
131
|
break;
|
|
121
132
|
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL:
|
|
122
|
-
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result,
|
|
133
|
+
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, options.strict_cast);
|
|
123
134
|
break;
|
|
124
135
|
default:
|
|
125
136
|
throw InternalException("Unknown yyjson tag in GetValueNumerical");
|
|
126
137
|
}
|
|
127
|
-
if (!success &&
|
|
128
|
-
|
|
138
|
+
if (!success && options.strict_cast) {
|
|
139
|
+
options.error_message =
|
|
140
|
+
StringUtil::Format("Failed to cast value to numerical: %s", JSONCommon::ValToString(val));
|
|
129
141
|
}
|
|
130
142
|
return success;
|
|
131
143
|
}
|
|
132
144
|
|
|
133
145
|
template <class T, class OP = TryCastToDecimal>
|
|
134
|
-
static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_t s,
|
|
146
|
+
static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_t s, JSONTransformOptions &options) {
|
|
135
147
|
bool success;
|
|
136
|
-
|
|
137
|
-
switch (yyjson_get_tag(val)) {
|
|
148
|
+
switch (unsafe_yyjson_get_tag(val)) {
|
|
138
149
|
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
|
|
139
150
|
return false;
|
|
140
151
|
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
|
|
141
|
-
success = OP::template Operation<string_t, T>(GetString(val), result, &error_message, w, s);
|
|
152
|
+
success = OP::template Operation<string_t, T>(GetString(val), result, &options.error_message, w, s);
|
|
142
153
|
break;
|
|
143
154
|
case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE:
|
|
144
155
|
case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE:
|
|
@@ -146,30 +157,31 @@ static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_
|
|
|
146
157
|
break;
|
|
147
158
|
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE:
|
|
148
159
|
case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE:
|
|
149
|
-
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, &error_message, w, s);
|
|
160
|
+
success = OP::template Operation<bool, T>(unsafe_yyjson_get_bool(val), result, &options.error_message, w, s);
|
|
150
161
|
break;
|
|
151
162
|
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT:
|
|
152
|
-
success =
|
|
163
|
+
success =
|
|
164
|
+
OP::template Operation<uint64_t, T>(unsafe_yyjson_get_uint(val), result, &options.error_message, w, s);
|
|
153
165
|
break;
|
|
154
166
|
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT:
|
|
155
|
-
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, &error_message, w, s);
|
|
167
|
+
success = OP::template Operation<int64_t, T>(unsafe_yyjson_get_sint(val), result, &options.error_message, w, s);
|
|
156
168
|
break;
|
|
157
169
|
case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL:
|
|
158
|
-
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, &error_message, w, s);
|
|
170
|
+
success = OP::template Operation<double, T>(unsafe_yyjson_get_real(val), result, &options.error_message, w, s);
|
|
159
171
|
break;
|
|
160
172
|
default:
|
|
161
173
|
throw InternalException("Unknown yyjson tag in GetValueString");
|
|
162
174
|
}
|
|
163
|
-
if (!success &&
|
|
164
|
-
|
|
175
|
+
if (!success && options.strict_cast) {
|
|
176
|
+
options.error_message = StringUtil::Format("Failed to cast value to decimal: %s", JSONCommon::ValToString(val));
|
|
165
177
|
}
|
|
166
178
|
return success;
|
|
167
179
|
}
|
|
168
180
|
|
|
169
181
|
static inline bool GetValueString(yyjson_val *val, yyjson_alc *alc, string_t &result, Vector &vector) {
|
|
170
|
-
switch (
|
|
182
|
+
switch (unsafe_yyjson_get_tag(val)) {
|
|
171
183
|
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
|
|
172
|
-
return
|
|
184
|
+
return true;
|
|
173
185
|
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
|
|
174
186
|
result = string_t(unsafe_yyjson_get_str(val), unsafe_yyjson_get_len(val));
|
|
175
187
|
return true;
|
|
@@ -196,131 +208,155 @@ static inline bool GetValueString(yyjson_val *val, yyjson_alc *alc, string_t &re
|
|
|
196
208
|
}
|
|
197
209
|
|
|
198
210
|
template <class T>
|
|
199
|
-
static
|
|
211
|
+
static bool TransformNumerical(yyjson_val *vals[], Vector &result, const idx_t count, JSONTransformOptions &options) {
|
|
200
212
|
auto data = (T *)FlatVector::GetData(result);
|
|
201
213
|
auto &validity = FlatVector::Validity(result);
|
|
214
|
+
|
|
202
215
|
for (idx_t i = 0; i < count; i++) {
|
|
203
216
|
const auto &val = vals[i];
|
|
204
|
-
if (!val ||
|
|
217
|
+
if (!val || unsafe_yyjson_is_null(val)) {
|
|
218
|
+
validity.SetInvalid(i);
|
|
219
|
+
} else if (!GetValueNumerical<T>(val, data[i], options)) {
|
|
205
220
|
validity.SetInvalid(i);
|
|
221
|
+
if (options.strict_cast) {
|
|
222
|
+
options.object_index = i;
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
206
225
|
}
|
|
207
226
|
}
|
|
227
|
+
return true;
|
|
208
228
|
}
|
|
209
229
|
|
|
210
230
|
template <class T>
|
|
211
|
-
static
|
|
212
|
-
|
|
231
|
+
static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t count, uint8_t width, uint8_t scale,
|
|
232
|
+
JSONTransformOptions &options) {
|
|
213
233
|
auto data = (T *)FlatVector::GetData(result);
|
|
214
234
|
auto &validity = FlatVector::Validity(result);
|
|
235
|
+
|
|
215
236
|
for (idx_t i = 0; i < count; i++) {
|
|
216
237
|
const auto &val = vals[i];
|
|
217
|
-
if (!val ||
|
|
238
|
+
if (!val || unsafe_yyjson_is_null(val)) {
|
|
239
|
+
validity.SetInvalid(i);
|
|
240
|
+
} else if (!GetValueDecimal<T>(val, data[i], width, scale, options)) {
|
|
218
241
|
validity.SetInvalid(i);
|
|
242
|
+
if (options.strict_cast) {
|
|
243
|
+
options.object_index = i;
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
219
246
|
}
|
|
220
247
|
}
|
|
248
|
+
return true;
|
|
221
249
|
}
|
|
222
250
|
|
|
223
|
-
|
|
224
|
-
Vector &string_vector,
|
|
251
|
+
bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
|
|
252
|
+
Vector &string_vector, JSONTransformOptions &options) {
|
|
225
253
|
auto data = (string_t *)FlatVector::GetData(string_vector);
|
|
226
254
|
auto &validity = FlatVector::Validity(string_vector);
|
|
227
255
|
|
|
228
256
|
for (idx_t i = 0; i < count; i++) {
|
|
229
257
|
const auto &val = vals[i];
|
|
230
|
-
if (!val ||
|
|
258
|
+
if (!val || unsafe_yyjson_is_null(val)) {
|
|
231
259
|
validity.SetInvalid(i);
|
|
232
|
-
} else if (
|
|
233
|
-
|
|
260
|
+
} else if (options.strict_cast && !unsafe_yyjson_is_str(val)) {
|
|
261
|
+
options.error_message = StringUtil::Format("Unable to cast '%s' to " + LogicalTypeIdToString(target.id()),
|
|
262
|
+
JSONCommon::ValToString(val));
|
|
263
|
+
options.object_index = i;
|
|
264
|
+
return false;
|
|
234
265
|
} else {
|
|
235
266
|
data[i] = GetString(val);
|
|
236
267
|
}
|
|
237
268
|
}
|
|
269
|
+
return true;
|
|
238
270
|
}
|
|
239
271
|
|
|
240
|
-
static
|
|
272
|
+
static bool TransformFromString(yyjson_val *vals[], Vector &result, const idx_t count, JSONTransformOptions &options) {
|
|
241
273
|
Vector string_vector(LogicalTypeId::VARCHAR, count);
|
|
242
|
-
JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector,
|
|
274
|
+
if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
|
|
275
|
+
return false;
|
|
276
|
+
}
|
|
243
277
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
278
|
+
if (!VectorOperations::DefaultTryCast(string_vector, result, count, &options.error_message, options.strict_cast) &&
|
|
279
|
+
options.strict_cast) {
|
|
280
|
+
options.object_index = 0; // Can't get line number information here
|
|
281
|
+
options.error_message += " (line/object number information is approximate)";
|
|
282
|
+
return false;
|
|
247
283
|
}
|
|
284
|
+
return true;
|
|
248
285
|
}
|
|
249
286
|
|
|
250
287
|
template <class OP, class T>
|
|
251
288
|
static bool TransformStringWithFormat(Vector &string_vector, StrpTimeFormat &format, const idx_t count, Vector &result,
|
|
252
|
-
|
|
289
|
+
JSONTransformOptions &options) {
|
|
253
290
|
const auto source_strings = FlatVector::GetData<string_t>(string_vector);
|
|
254
291
|
const auto &source_validity = FlatVector::Validity(string_vector);
|
|
255
292
|
|
|
256
293
|
auto target_vals = FlatVector::GetData<T>(result);
|
|
257
294
|
auto &target_validity = FlatVector::Validity(result);
|
|
258
295
|
|
|
259
|
-
bool success = true;
|
|
260
296
|
if (source_validity.AllValid()) {
|
|
261
297
|
for (idx_t i = 0; i < count; i++) {
|
|
262
|
-
if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], error_message)) {
|
|
298
|
+
if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
|
|
263
299
|
target_validity.SetInvalid(i);
|
|
264
|
-
|
|
300
|
+
if (options.strict_cast) {
|
|
301
|
+
options.object_index = i;
|
|
302
|
+
return false;
|
|
303
|
+
}
|
|
265
304
|
}
|
|
266
305
|
}
|
|
267
306
|
} else {
|
|
268
307
|
for (idx_t i = 0; i < count; i++) {
|
|
269
308
|
if (!source_validity.RowIsValid(i)) {
|
|
270
309
|
target_validity.SetInvalid(i);
|
|
271
|
-
} else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], error_message)) {
|
|
310
|
+
} else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
|
|
272
311
|
target_validity.SetInvalid(i);
|
|
273
|
-
|
|
312
|
+
if (options.strict_cast) {
|
|
313
|
+
options.object_index = i;
|
|
314
|
+
return false;
|
|
315
|
+
}
|
|
274
316
|
}
|
|
275
317
|
}
|
|
276
318
|
}
|
|
277
|
-
return
|
|
319
|
+
return true;
|
|
278
320
|
}
|
|
279
321
|
|
|
280
|
-
static
|
|
281
|
-
|
|
322
|
+
static bool TransformFromStringWithFormat(yyjson_val *vals[], Vector &result, const idx_t count,
|
|
323
|
+
JSONTransformOptions &options) {
|
|
282
324
|
Vector string_vector(LogicalTypeId::VARCHAR, count);
|
|
283
|
-
JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options
|
|
325
|
+
if (!JSONTransform::GetStringVector(vals, count, result.GetType(), string_vector, options)) {
|
|
326
|
+
return false;
|
|
327
|
+
}
|
|
284
328
|
|
|
285
329
|
const auto &result_type = result.GetType().id();
|
|
286
330
|
auto &format = options.date_format_map->GetFormat(result_type);
|
|
287
331
|
|
|
288
|
-
bool success;
|
|
289
|
-
string error_message;
|
|
290
332
|
switch (result_type) {
|
|
291
333
|
case LogicalTypeId::DATE:
|
|
292
|
-
|
|
293
|
-
break;
|
|
334
|
+
return TransformStringWithFormat<TryParseDate, date_t>(string_vector, format, count, result, options);
|
|
294
335
|
case LogicalTypeId::TIMESTAMP:
|
|
295
|
-
|
|
296
|
-
error_message);
|
|
297
|
-
break;
|
|
336
|
+
return TransformStringWithFormat<TryParseTimeStamp, timestamp_t>(string_vector, format, count, result, options);
|
|
298
337
|
default:
|
|
299
338
|
throw InternalException("No date/timestamp formats for %s", LogicalTypeIdToString(result.GetType().id()));
|
|
300
339
|
}
|
|
301
|
-
|
|
302
|
-
if (options.strict_cast && !success) {
|
|
303
|
-
throw CastException(error_message);
|
|
304
|
-
}
|
|
305
340
|
}
|
|
306
341
|
|
|
307
|
-
static
|
|
342
|
+
static bool TransformToString(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
|
|
308
343
|
auto data = (string_t *)FlatVector::GetData(result);
|
|
309
344
|
auto &validity = FlatVector::Validity(result);
|
|
310
345
|
for (idx_t i = 0; i < count; i++) {
|
|
311
346
|
const auto &val = vals[i];
|
|
312
|
-
if (!val ||
|
|
347
|
+
if (!val || unsafe_yyjson_is_null(vals[i])) {
|
|
348
|
+
validity.SetInvalid(i);
|
|
349
|
+
} else if (!GetValueString(val, alc, data[i], result)) {
|
|
313
350
|
validity.SetInvalid(i);
|
|
314
351
|
}
|
|
315
352
|
}
|
|
353
|
+
// Can always transform to string
|
|
354
|
+
return true;
|
|
316
355
|
}
|
|
317
356
|
|
|
318
|
-
|
|
319
|
-
const JSONTransformOptions &options);
|
|
320
|
-
|
|
321
|
-
void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count,
|
|
357
|
+
bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count,
|
|
322
358
|
const vector<string> &names, const vector<Vector *> &result_vectors,
|
|
323
|
-
|
|
359
|
+
JSONTransformOptions &options) {
|
|
324
360
|
D_ASSERT(alc);
|
|
325
361
|
D_ASSERT(names.size() == result_vectors.size());
|
|
326
362
|
const idx_t column_count = names.size();
|
|
@@ -337,6 +373,8 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
|
|
|
337
373
|
idx_t found_key_count;
|
|
338
374
|
auto found_keys = (bool *)alc->malloc(alc->ctx, sizeof(bool) * column_count);
|
|
339
375
|
|
|
376
|
+
bool success = true;
|
|
377
|
+
|
|
340
378
|
size_t idx, max;
|
|
341
379
|
yyjson_val *key, *val;
|
|
342
380
|
for (idx_t i = 0; i < count; i++) {
|
|
@@ -350,17 +388,22 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
|
|
|
350
388
|
if (it != key_map.end()) {
|
|
351
389
|
const auto &col_idx = it->second;
|
|
352
390
|
if (options.error_duplicate_key && found_keys[col_idx]) {
|
|
353
|
-
|
|
354
|
-
"Duplicate key \"" + string(key_ptr, key_len) + "\" in object %s",
|
|
391
|
+
options.error_message =
|
|
392
|
+
StringUtil::Format("Duplicate key \"" + string(key_ptr, key_len) + "\" in object %s",
|
|
393
|
+
JSONCommon::ValToString(objects[i]));
|
|
394
|
+
options.object_index = i;
|
|
395
|
+
success = false;
|
|
396
|
+
break;
|
|
355
397
|
}
|
|
356
398
|
nested_vals[col_idx][i] = val;
|
|
357
399
|
found_keys[col_idx] = true;
|
|
358
|
-
|
|
359
|
-
break;
|
|
360
|
-
}
|
|
400
|
+
found_key_count++;
|
|
361
401
|
} else if (options.error_unknown_key) {
|
|
362
|
-
|
|
363
|
-
|
|
402
|
+
options.error_message =
|
|
403
|
+
StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
|
|
404
|
+
JSONCommon::ValToString(objects[i]));
|
|
405
|
+
options.object_index = i;
|
|
406
|
+
success = false;
|
|
364
407
|
}
|
|
365
408
|
}
|
|
366
409
|
if (found_key_count != column_count) {
|
|
@@ -369,8 +412,11 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
|
|
|
369
412
|
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
|
370
413
|
if (!found_keys[col_idx]) {
|
|
371
414
|
if (options.error_missing_key) {
|
|
372
|
-
|
|
373
|
-
|
|
415
|
+
options.error_message =
|
|
416
|
+
StringUtil::Format("Object %s does not have key \"" + names[col_idx] + "\"",
|
|
417
|
+
JSONCommon::ValToString(objects[i]));
|
|
418
|
+
options.object_index = i;
|
|
419
|
+
success = false;
|
|
374
420
|
} else {
|
|
375
421
|
nested_vals[col_idx][i] = nullptr;
|
|
376
422
|
}
|
|
@@ -385,13 +431,28 @@ void JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
|
|
|
385
431
|
}
|
|
386
432
|
}
|
|
387
433
|
|
|
434
|
+
if (!success) {
|
|
435
|
+
if (!options.from_file) {
|
|
436
|
+
throw InvalidInputException(options.error_message);
|
|
437
|
+
}
|
|
438
|
+
return false;
|
|
439
|
+
}
|
|
440
|
+
|
|
388
441
|
for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
|
|
389
|
-
Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options)
|
|
442
|
+
if (JSONTransform::Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options)) {
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
if (!options.from_file) {
|
|
446
|
+
throw InvalidInputException(options.error_message);
|
|
447
|
+
}
|
|
448
|
+
return false;
|
|
390
449
|
}
|
|
450
|
+
|
|
451
|
+
return success;
|
|
391
452
|
}
|
|
392
453
|
|
|
393
|
-
static
|
|
394
|
-
|
|
454
|
+
static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
|
|
455
|
+
const LogicalType &type, JSONTransformOptions &options) {
|
|
395
456
|
// Get child vectors and names
|
|
396
457
|
auto &child_vs = StructVector::GetEntries(result);
|
|
397
458
|
vector<string> child_names;
|
|
@@ -403,11 +464,11 @@ static void TransformObject(yyjson_val *objects[], yyjson_alc *alc, Vector &resu
|
|
|
403
464
|
child_vectors.push_back(child_vs[child_i].get());
|
|
404
465
|
}
|
|
405
466
|
|
|
406
|
-
JSONTransform::TransformObject(objects, alc, count, child_names, child_vectors, options);
|
|
467
|
+
return JSONTransform::TransformObject(objects, alc, count, child_names, child_vectors, options);
|
|
407
468
|
}
|
|
408
469
|
|
|
409
|
-
static
|
|
410
|
-
|
|
470
|
+
static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result, const idx_t count,
|
|
471
|
+
JSONTransformOptions &options) {
|
|
411
472
|
// Initialize list vector
|
|
412
473
|
auto list_entries = FlatVector::GetData<list_entry_t>(result);
|
|
413
474
|
auto &list_validity = FlatVector::Validity(result);
|
|
@@ -442,57 +503,71 @@ static void TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
|
442
503
|
}
|
|
443
504
|
}
|
|
444
505
|
D_ASSERT(list_i == offset);
|
|
506
|
+
|
|
445
507
|
// Transform array values
|
|
446
|
-
Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options);
|
|
508
|
+
auto success = JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options);
|
|
509
|
+
if (!success && options.from_file) {
|
|
510
|
+
// Set object index in case of error in nested list so we can get accurate line number information
|
|
511
|
+
for (idx_t i = 0; i < count; i++) {
|
|
512
|
+
if (!list_validity.RowIsValid(i)) {
|
|
513
|
+
continue;
|
|
514
|
+
}
|
|
515
|
+
auto &entry = list_entries[i];
|
|
516
|
+
if (options.object_index >= entry.offset && options.object_index < entry.offset + entry.length) {
|
|
517
|
+
options.object_index = i;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
return success;
|
|
447
522
|
}
|
|
448
523
|
|
|
449
|
-
|
|
450
|
-
|
|
524
|
+
bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
|
|
525
|
+
JSONTransformOptions &options) {
|
|
451
526
|
auto result_type = result.GetType();
|
|
452
|
-
if (
|
|
453
|
-
|
|
454
|
-
return;
|
|
527
|
+
if ((result_type == LogicalTypeId::TIMESTAMP || result_type == LogicalTypeId::DATE) && options.date_format_map) {
|
|
528
|
+
// Auto-detected date/timestamp format during sampling
|
|
529
|
+
return TransformFromStringWithFormat(vals, result, count, options);
|
|
455
530
|
}
|
|
456
531
|
|
|
457
532
|
switch (result_type.id()) {
|
|
458
533
|
case LogicalTypeId::SQLNULL:
|
|
459
|
-
return;
|
|
534
|
+
return true;
|
|
460
535
|
case LogicalTypeId::BOOLEAN:
|
|
461
|
-
return TransformNumerical<bool>(vals, result, count, options
|
|
536
|
+
return TransformNumerical<bool>(vals, result, count, options);
|
|
462
537
|
case LogicalTypeId::TINYINT:
|
|
463
|
-
return TransformNumerical<int8_t>(vals, result, count, options
|
|
538
|
+
return TransformNumerical<int8_t>(vals, result, count, options);
|
|
464
539
|
case LogicalTypeId::SMALLINT:
|
|
465
|
-
return TransformNumerical<int16_t>(vals, result, count, options
|
|
540
|
+
return TransformNumerical<int16_t>(vals, result, count, options);
|
|
466
541
|
case LogicalTypeId::INTEGER:
|
|
467
|
-
return TransformNumerical<int32_t>(vals, result, count, options
|
|
542
|
+
return TransformNumerical<int32_t>(vals, result, count, options);
|
|
468
543
|
case LogicalTypeId::BIGINT:
|
|
469
|
-
return TransformNumerical<int64_t>(vals, result, count, options
|
|
544
|
+
return TransformNumerical<int64_t>(vals, result, count, options);
|
|
470
545
|
case LogicalTypeId::UTINYINT:
|
|
471
|
-
return TransformNumerical<uint8_t>(vals, result, count, options
|
|
546
|
+
return TransformNumerical<uint8_t>(vals, result, count, options);
|
|
472
547
|
case LogicalTypeId::USMALLINT:
|
|
473
|
-
return TransformNumerical<uint16_t>(vals, result, count, options
|
|
548
|
+
return TransformNumerical<uint16_t>(vals, result, count, options);
|
|
474
549
|
case LogicalTypeId::UINTEGER:
|
|
475
|
-
return TransformNumerical<uint32_t>(vals, result, count, options
|
|
550
|
+
return TransformNumerical<uint32_t>(vals, result, count, options);
|
|
476
551
|
case LogicalTypeId::UBIGINT:
|
|
477
|
-
return TransformNumerical<uint64_t>(vals, result, count, options
|
|
552
|
+
return TransformNumerical<uint64_t>(vals, result, count, options);
|
|
478
553
|
case LogicalTypeId::HUGEINT:
|
|
479
|
-
return TransformNumerical<hugeint_t>(vals, result, count, options
|
|
554
|
+
return TransformNumerical<hugeint_t>(vals, result, count, options);
|
|
480
555
|
case LogicalTypeId::FLOAT:
|
|
481
|
-
return TransformNumerical<float>(vals, result, count, options
|
|
556
|
+
return TransformNumerical<float>(vals, result, count, options);
|
|
482
557
|
case LogicalTypeId::DOUBLE:
|
|
483
|
-
return TransformNumerical<double>(vals, result, count, options
|
|
558
|
+
return TransformNumerical<double>(vals, result, count, options);
|
|
484
559
|
case LogicalTypeId::DECIMAL: {
|
|
485
560
|
auto width = DecimalType::GetWidth(result_type);
|
|
486
561
|
auto scale = DecimalType::GetScale(result_type);
|
|
487
562
|
switch (result_type.InternalType()) {
|
|
488
563
|
case PhysicalType::INT16:
|
|
489
|
-
return TransformDecimal<int16_t>(vals, result, count, width, scale, options
|
|
564
|
+
return TransformDecimal<int16_t>(vals, result, count, width, scale, options);
|
|
490
565
|
case PhysicalType::INT32:
|
|
491
|
-
return TransformDecimal<int32_t>(vals, result, count, width, scale, options
|
|
566
|
+
return TransformDecimal<int32_t>(vals, result, count, width, scale, options);
|
|
492
567
|
case PhysicalType::INT64:
|
|
493
|
-
return TransformDecimal<int64_t>(vals, result, count, width, scale, options
|
|
568
|
+
return TransformDecimal<int64_t>(vals, result, count, width, scale, options);
|
|
494
569
|
case PhysicalType::INT128:
|
|
495
|
-
return TransformDecimal<hugeint_t>(vals, result, count, width, scale, options
|
|
570
|
+
return TransformDecimal<hugeint_t>(vals, result, count, width, scale, options);
|
|
496
571
|
default:
|
|
497
572
|
throw InternalException("Unimplemented physical type for decimal");
|
|
498
573
|
}
|
|
@@ -509,12 +584,12 @@ static void Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const
|
|
|
509
584
|
case LogicalTypeId::TIMESTAMP_MS:
|
|
510
585
|
case LogicalTypeId::TIMESTAMP_SEC:
|
|
511
586
|
case LogicalTypeId::UUID:
|
|
512
|
-
return TransformFromString(vals, result, count, options
|
|
587
|
+
return TransformFromString(vals, result, count, options);
|
|
513
588
|
case LogicalTypeId::VARCHAR:
|
|
514
589
|
case LogicalTypeId::BLOB:
|
|
515
590
|
return TransformToString(vals, alc, result, count);
|
|
516
591
|
case LogicalTypeId::STRUCT:
|
|
517
|
-
return
|
|
592
|
+
return TransformObjectInternal(vals, alc, result, count, result_type, options);
|
|
518
593
|
case LogicalTypeId::LIST:
|
|
519
594
|
return TransformArray(vals, alc, result, count, options);
|
|
520
595
|
default:
|
|
@@ -548,9 +623,11 @@ static void TransformFunction(DataChunk &args, ExpressionState &state, Vector &r
|
|
|
548
623
|
}
|
|
549
624
|
}
|
|
550
625
|
|
|
551
|
-
|
|
626
|
+
JSONTransformOptions options(strict, strict, strict, false);
|
|
552
627
|
|
|
553
|
-
Transform(vals, alc, result, count, options)
|
|
628
|
+
if (!JSONTransform::Transform(vals, alc, result, count, options)) {
|
|
629
|
+
throw InvalidInputException(options.error_message);
|
|
630
|
+
}
|
|
554
631
|
|
|
555
632
|
if (args.AllConstant()) {
|
|
556
633
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
@@ -26,27 +26,25 @@ void AutoDetect(ClientContext &context, JSONScanData &bind_data, vector<LogicalT
|
|
|
26
26
|
// Read for the specified sample size
|
|
27
27
|
JSONStructureNode node;
|
|
28
28
|
Vector string_vector(LogicalType::VARCHAR);
|
|
29
|
-
idx_t
|
|
30
|
-
while (
|
|
29
|
+
idx_t remaining = bind_data.sample_size;
|
|
30
|
+
while (remaining != 0) {
|
|
31
31
|
allocator.Reset();
|
|
32
|
-
auto
|
|
33
|
-
if (
|
|
32
|
+
auto read_count = lstate.ReadNext(gstate);
|
|
33
|
+
if (read_count == 0) {
|
|
34
34
|
break;
|
|
35
35
|
}
|
|
36
|
-
idx_t
|
|
37
|
-
for (i = 0; i <
|
|
36
|
+
idx_t next = MinValue<idx_t>(read_count, remaining);
|
|
37
|
+
for (idx_t i = 0; i < next; i++) {
|
|
38
38
|
if (lstate.objects[i]) {
|
|
39
39
|
JSONStructure::ExtractStructure(lstate.objects[i], node);
|
|
40
40
|
}
|
|
41
|
-
if (++read == bind_data.sample_size) {
|
|
42
|
-
break;
|
|
43
|
-
}
|
|
44
41
|
}
|
|
45
42
|
if (!node.ContainsVarchar()) { // Can't refine non-VARCHAR types
|
|
46
43
|
continue;
|
|
47
44
|
}
|
|
48
45
|
node.InitializeCandidateTypes(bind_data.max_depth);
|
|
49
|
-
node.RefineCandidateTypes(lstate.objects,
|
|
46
|
+
node.RefineCandidateTypes(lstate.objects, next, string_vector, allocator, bind_data.date_format_map);
|
|
47
|
+
remaining -= next;
|
|
50
48
|
}
|
|
51
49
|
bind_data.type = original_scan_type;
|
|
52
50
|
bind_data.transform_options.date_format_map = &bind_data.date_format_map;
|
|
@@ -55,15 +53,15 @@ void AutoDetect(ClientContext &context, JSONScanData &bind_data, vector<LogicalT
|
|
|
55
53
|
if (type.id() != LogicalTypeId::STRUCT) {
|
|
56
54
|
return_types.emplace_back(type);
|
|
57
55
|
names.emplace_back("json");
|
|
58
|
-
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
56
|
+
bind_data.objects = false;
|
|
57
|
+
} else {
|
|
58
|
+
const auto &child_types = StructType::GetChildTypes(type);
|
|
59
|
+
return_types.reserve(child_types.size());
|
|
60
|
+
names.reserve(child_types.size());
|
|
61
|
+
for (auto &child_type : child_types) {
|
|
62
|
+
return_types.emplace_back(child_type.second);
|
|
63
|
+
names.emplace_back(child_type.first);
|
|
64
|
+
}
|
|
67
65
|
}
|
|
68
66
|
|
|
69
67
|
for (auto &reader : gstate.json_readers) {
|
|
@@ -141,6 +139,7 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
|
|
|
141
139
|
transform_options.error_duplicate_key = !bind_data.ignore_errors;
|
|
142
140
|
transform_options.error_missing_key = false;
|
|
143
141
|
transform_options.error_unknown_key = bind_data.auto_detect && !bind_data.ignore_errors;
|
|
142
|
+
transform_options.from_file = true;
|
|
144
143
|
|
|
145
144
|
return result;
|
|
146
145
|
}
|
|
@@ -160,9 +159,23 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
160
159
|
result_vectors.push_back(&output.data[col_idx]);
|
|
161
160
|
}
|
|
162
161
|
|
|
163
|
-
//
|
|
164
|
-
|
|
165
|
-
|
|
162
|
+
// Pass current reader to transform options so we can get line number information if an error occurs
|
|
163
|
+
bool success;
|
|
164
|
+
if (gstate.bind_data.objects) {
|
|
165
|
+
success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
|
|
166
|
+
result_vectors, lstate.transform_options);
|
|
167
|
+
} else {
|
|
168
|
+
success = JSONTransform::Transform(objects, lstate.GetAllocator(), *result_vectors[0], count,
|
|
169
|
+
lstate.transform_options);
|
|
170
|
+
}
|
|
171
|
+
if (!success) {
|
|
172
|
+
string hint =
|
|
173
|
+
gstate.bind_data.auto_detect
|
|
174
|
+
? "\nTry increasing 'sample_size', reducing 'maximum_depth', or specifying 'columns' manually."
|
|
175
|
+
: "";
|
|
176
|
+
lstate.ThrowTransformError(count, lstate.transform_options.object_index,
|
|
177
|
+
lstate.transform_options.error_message + hint);
|
|
178
|
+
}
|
|
166
179
|
output.SetCardinality(count);
|
|
167
180
|
}
|
|
168
181
|
|