duckdb 0.7.1-dev90.0 → 0.7.2-dev0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/binding.gyp +7 -7
- package/package.json +3 -3
- package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
- package/src/duckdb/extension/json/include/json_scan.hpp +45 -10
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
- package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +54 -10
- package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
- package/src/duckdb/extension/json/json_functions.cpp +7 -0
- package/src/duckdb/extension/json/json_scan.cpp +144 -38
- package/src/duckdb/extension/parquet/column_reader.cpp +7 -0
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -10
- package/src/duckdb/src/catalog/catalog.cpp +62 -13
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
- package/src/duckdb/src/common/bind_helpers.cpp +55 -0
- package/src/duckdb/src/common/file_system.cpp +23 -9
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
- package/src/duckdb/src/common/local_file_system.cpp +4 -4
- package/src/duckdb/src/common/string_util.cpp +8 -4
- package/src/duckdb/src/common/types/partitioned_column_data.cpp +1 -0
- package/src/duckdb/src/common/types.cpp +37 -11
- package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
- package/src/duckdb/src/execution/index/art/art.cpp +117 -67
- package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
- package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
- package/src/duckdb/src/execution/index/art/node.cpp +13 -27
- package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
- package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
- package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
- package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
- package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -27
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -9
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -9
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +9 -0
- package/src/duckdb/src/execution/physical_operator.cpp +6 -6
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
- package/src/duckdb/src/function/scalar/generic/current_setting.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/array_slice.cpp +2 -3
- package/src/duckdb/src/function/scalar/map/map.cpp +69 -21
- package/src/duckdb/src/function/scalar/string/like.cpp +6 -3
- package/src/duckdb/src/function/table/read_csv.cpp +16 -5
- package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +3 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +9 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/{extension_functions.hpp → extension_entries.hpp} +27 -5
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +11 -1
- package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
- package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
- package/src/duckdb/src/main/client_context.cpp +2 -0
- package/src/duckdb/src/main/config.cpp +1 -0
- package/src/duckdb/src/main/database.cpp +14 -5
- package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +15 -0
- package/src/duckdb/src/main/extension/extension_install.cpp +60 -16
- package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
- package/src/duckdb/src/main/settings/settings.cpp +16 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
- package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
- package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
- package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
- package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
- package/src/duckdb/src/planner/bind_context.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -14
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
- package/src/duckdb/src/planner/logical_operator.cpp +4 -1
- package/src/duckdb/src/storage/buffer_manager.cpp +105 -26
- package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
- package/src/duckdb/src/storage/data_table.cpp +66 -3
- package/src/duckdb/src/storage/index.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -2
- package/src/duckdb/src/storage/wal_replay.cpp +68 -0
- package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -2
- package/src/duckdb/third_party/concurrentqueue/blockingconcurrentqueue.h +2 -2
- package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/statement.cpp +46 -12
- package/test/arrow.test.ts +3 -3
- package/test/prepare.test.ts +39 -1
- package/test/typescript_decls.test.ts +1 -1
|
@@ -58,6 +58,9 @@ static LogicalType StructureToTypeObject(yyjson_val *obj, ClientContext &context
|
|
|
58
58
|
child_types.emplace_back(key_str, StructureStringToType(val, context));
|
|
59
59
|
}
|
|
60
60
|
D_ASSERT(yyjson_obj_size(obj) == names.size());
|
|
61
|
+
if (child_types.empty()) {
|
|
62
|
+
throw InvalidInputException("Empty object in JSON structure");
|
|
63
|
+
}
|
|
61
64
|
return LogicalType::STRUCT(child_types);
|
|
62
65
|
}
|
|
63
66
|
|
|
@@ -87,7 +90,7 @@ static unique_ptr<FunctionData> JSONTransformBind(ClientContext &context, Scalar
|
|
|
87
90
|
} else {
|
|
88
91
|
auto structure_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
|
|
89
92
|
if (!structure_val.DefaultTryCastAs(JSONCommon::JSONType())) {
|
|
90
|
-
throw InvalidInputException("
|
|
93
|
+
throw InvalidInputException("Cannot cast JSON structure to string");
|
|
91
94
|
}
|
|
92
95
|
auto structure_string = structure_val.GetValueUnsafe<string_t>();
|
|
93
96
|
JSONAllocator json_allocator(Allocator::DefaultAllocator());
|
|
@@ -251,7 +254,10 @@ static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t cou
|
|
|
251
254
|
|
|
252
255
|
bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
|
|
253
256
|
Vector &string_vector, JSONTransformOptions &options) {
|
|
254
|
-
|
|
257
|
+
if (count > STANDARD_VECTOR_SIZE) {
|
|
258
|
+
string_vector.Initialize(false, count);
|
|
259
|
+
}
|
|
260
|
+
auto data = FlatVector::GetData<string_t>(string_vector);
|
|
255
261
|
auto &validity = FlatVector::Validity(string_vector);
|
|
256
262
|
validity.SetAllValid(count);
|
|
257
263
|
|
|
@@ -380,12 +386,20 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
|
|
|
380
386
|
size_t idx, max;
|
|
381
387
|
yyjson_val *key, *val;
|
|
382
388
|
for (idx_t i = 0; i < count; i++) {
|
|
383
|
-
if (objects[i]) {
|
|
389
|
+
if (objects[i] && !unsafe_yyjson_is_null(objects[i])) {
|
|
390
|
+
if (!unsafe_yyjson_is_obj(objects[i]) && options.strict_cast) {
|
|
391
|
+
options.error_message =
|
|
392
|
+
StringUtil::Format("Expected OBJECT, but got %s: %s", JSONCommon::ValTypeToString(objects[i]),
|
|
393
|
+
JSONCommon::ValToString(objects[i], 50));
|
|
394
|
+
options.object_index = i;
|
|
395
|
+
success = false;
|
|
396
|
+
break;
|
|
397
|
+
}
|
|
384
398
|
found_key_count = 0;
|
|
385
399
|
memset(found_keys, false, column_count);
|
|
386
400
|
yyjson_obj_foreach(objects[i], idx, max, key, val) {
|
|
387
|
-
auto key_ptr =
|
|
388
|
-
auto key_len =
|
|
401
|
+
auto key_ptr = unsafe_yyjson_get_str(key);
|
|
402
|
+
auto key_len = unsafe_yyjson_get_len(key);
|
|
389
403
|
auto it = key_map.find({key_ptr, key_len});
|
|
390
404
|
if (it != key_map.end()) {
|
|
391
405
|
const auto &col_idx = it->second;
|
|
@@ -476,13 +490,24 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
|
476
490
|
auto &list_validity = FlatVector::Validity(result);
|
|
477
491
|
idx_t offset = 0;
|
|
478
492
|
for (idx_t i = 0; i < count; i++) {
|
|
479
|
-
if (!arrays[i] ||
|
|
493
|
+
if (!arrays[i] || unsafe_yyjson_is_null(arrays[i])) {
|
|
480
494
|
list_validity.SetInvalid(i);
|
|
495
|
+
} else if (!unsafe_yyjson_is_arr(arrays[i])) {
|
|
496
|
+
if (options.strict_cast) {
|
|
497
|
+
options.error_message =
|
|
498
|
+
StringUtil::Format("Expected ARRAY, but got %s: %s", JSONCommon::ValTypeToString(arrays[i]),
|
|
499
|
+
JSONCommon::ValToString(arrays[i], 50));
|
|
500
|
+
options.object_index = i;
|
|
501
|
+
return false;
|
|
502
|
+
} else {
|
|
503
|
+
list_validity.SetInvalid(i);
|
|
504
|
+
}
|
|
505
|
+
} else {
|
|
506
|
+
auto &entry = list_entries[i];
|
|
507
|
+
entry.offset = offset;
|
|
508
|
+
entry.length = unsafe_yyjson_get_len(arrays[i]);
|
|
509
|
+
offset += entry.length;
|
|
481
510
|
}
|
|
482
|
-
auto &entry = list_entries[i];
|
|
483
|
-
entry.offset = offset;
|
|
484
|
-
entry.length = yyjson_arr_size(arrays[i]);
|
|
485
|
-
offset += entry.length;
|
|
486
511
|
}
|
|
487
512
|
ListVector::SetListSize(result, offset);
|
|
488
513
|
ListVector::Reserve(result, offset);
|
|
@@ -523,6 +548,21 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
|
523
548
|
return success;
|
|
524
549
|
}
|
|
525
550
|
|
|
551
|
+
bool TransformToJSON(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
|
|
552
|
+
auto data = (string_t *)FlatVector::GetData(result);
|
|
553
|
+
auto &validity = FlatVector::Validity(result);
|
|
554
|
+
for (idx_t i = 0; i < count; i++) {
|
|
555
|
+
const auto &val = vals[i];
|
|
556
|
+
if (!val) {
|
|
557
|
+
validity.SetInvalid(i);
|
|
558
|
+
} else {
|
|
559
|
+
data[i] = JSONCommon::WriteVal(val, alc);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
// Can always transform to JSON
|
|
563
|
+
return true;
|
|
564
|
+
}
|
|
565
|
+
|
|
526
566
|
bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
|
|
527
567
|
JSONTransformOptions &options) {
|
|
528
568
|
auto result_type = result.GetType();
|
|
@@ -531,6 +571,10 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
|
|
|
531
571
|
return TransformFromStringWithFormat(vals, result, count, options);
|
|
532
572
|
}
|
|
533
573
|
|
|
574
|
+
if (JSONCommon::LogicalTypeIsJSON(result_type)) {
|
|
575
|
+
return TransformToJSON(vals, alc, result, count);
|
|
576
|
+
}
|
|
577
|
+
|
|
534
578
|
switch (result_type.id()) {
|
|
535
579
|
case LogicalTypeId::SQLNULL:
|
|
536
580
|
return true;
|
|
@@ -13,63 +13,88 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
|
|
|
13
13
|
JSONScanLocalState lstate(context, gstate);
|
|
14
14
|
ArenaAllocator allocator(BufferAllocator::Get(context));
|
|
15
15
|
|
|
16
|
-
static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
|
|
17
|
-
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
|
18
|
-
{LogicalTypeId::TIMESTAMP,
|
|
19
|
-
{"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
|
|
20
|
-
"%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
// Populate possible date/timestamp formats, assume this is consistent across columns
|
|
24
|
-
for (auto &kv : FORMAT_TEMPLATES) {
|
|
25
|
-
const auto &type = kv.first;
|
|
26
|
-
if (bind_data.date_format_map.HasFormats(type)) {
|
|
27
|
-
continue; // Already populated
|
|
28
|
-
}
|
|
29
|
-
const auto &format_strings = kv.second;
|
|
30
|
-
for (auto &format_string : format_strings) {
|
|
31
|
-
bind_data.date_format_map.AddFormat(type, format_string);
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
|
|
35
16
|
// Read for the specified sample size
|
|
36
17
|
JSONStructureNode node;
|
|
18
|
+
bool more_than_one = false;
|
|
37
19
|
Vector string_vector(LogicalType::VARCHAR);
|
|
38
20
|
idx_t remaining = bind_data.sample_size;
|
|
39
21
|
while (remaining != 0) {
|
|
40
22
|
allocator.Reset();
|
|
41
23
|
auto read_count = lstate.ReadNext(gstate);
|
|
24
|
+
if (lstate.scan_count > 1) {
|
|
25
|
+
more_than_one = true;
|
|
26
|
+
}
|
|
42
27
|
if (read_count == 0) {
|
|
43
28
|
break;
|
|
44
29
|
}
|
|
45
30
|
idx_t next = MinValue<idx_t>(read_count, remaining);
|
|
31
|
+
yyjson_val **values;
|
|
32
|
+
if (bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
|
33
|
+
bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
|
|
34
|
+
values = lstate.array_values;
|
|
35
|
+
} else {
|
|
36
|
+
values = lstate.values;
|
|
37
|
+
}
|
|
46
38
|
for (idx_t i = 0; i < next; i++) {
|
|
47
|
-
if (
|
|
48
|
-
JSONStructure::ExtractStructure(
|
|
39
|
+
if (values[i]) {
|
|
40
|
+
JSONStructure::ExtractStructure(values[i], node);
|
|
49
41
|
}
|
|
50
42
|
}
|
|
51
43
|
if (!node.ContainsVarchar()) { // Can't refine non-VARCHAR types
|
|
52
44
|
continue;
|
|
53
45
|
}
|
|
54
46
|
node.InitializeCandidateTypes(bind_data.max_depth);
|
|
55
|
-
node.RefineCandidateTypes(
|
|
47
|
+
node.RefineCandidateTypes(values, next, string_vector, allocator, bind_data.date_format_map);
|
|
56
48
|
remaining -= next;
|
|
49
|
+
|
|
50
|
+
if (gstate.file_index == 10) {
|
|
51
|
+
// We really shouldn't open more than 10 files when sampling
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
57
54
|
}
|
|
58
55
|
bind_data.type = original_scan_type;
|
|
59
|
-
bind_data.transform_options.date_format_map = &bind_data.date_format_map;
|
|
60
56
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
57
|
+
// Convert structure to logical type
|
|
58
|
+
auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
|
|
59
|
+
|
|
60
|
+
// Detect record type
|
|
61
|
+
if (bind_data.record_type == JSONRecordType::AUTO) {
|
|
62
|
+
switch (type.id()) {
|
|
63
|
+
case LogicalTypeId::STRUCT:
|
|
64
|
+
bind_data.record_type = JSONRecordType::RECORDS;
|
|
65
|
+
break;
|
|
66
|
+
case LogicalTypeId::LIST: {
|
|
67
|
+
if (more_than_one) {
|
|
68
|
+
bind_data.record_type = JSONRecordType::JSON;
|
|
69
|
+
} else {
|
|
70
|
+
type = ListType::GetChildType(type);
|
|
71
|
+
if (type.id() == LogicalTypeId::STRUCT) {
|
|
72
|
+
bind_data.record_type = JSONRecordType::ARRAY_OF_RECORDS;
|
|
73
|
+
} else {
|
|
74
|
+
bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
default:
|
|
80
|
+
bind_data.record_type = JSONRecordType::JSON;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Detect return type
|
|
85
|
+
if (bind_data.auto_detect) {
|
|
86
|
+
bind_data.transform_options.date_format_map = &bind_data.date_format_map;
|
|
87
|
+
if (type.id() != LogicalTypeId::STRUCT) {
|
|
88
|
+
return_types.emplace_back(type);
|
|
89
|
+
names.emplace_back("json");
|
|
90
|
+
} else {
|
|
91
|
+
const auto &child_types = StructType::GetChildTypes(type);
|
|
92
|
+
return_types.reserve(child_types.size());
|
|
93
|
+
names.reserve(child_types.size());
|
|
94
|
+
for (auto &child_type : child_types) {
|
|
95
|
+
return_types.emplace_back(child_type.second);
|
|
96
|
+
names.emplace_back(child_type.first);
|
|
97
|
+
}
|
|
73
98
|
}
|
|
74
99
|
}
|
|
75
100
|
|
|
@@ -150,6 +175,22 @@ void JSONScan::InitializeBindData(ClientContext &context, JSONScanData &bind_dat
|
|
|
150
175
|
if (!error.empty()) {
|
|
151
176
|
throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
|
|
152
177
|
}
|
|
178
|
+
} else if (loption == "json_format") {
|
|
179
|
+
auto arg = StringValue::Get(kv.second);
|
|
180
|
+
if (arg == "records") {
|
|
181
|
+
bind_data.record_type = JSONRecordType::RECORDS;
|
|
182
|
+
} else if (arg == "array_of_records") {
|
|
183
|
+
bind_data.record_type = JSONRecordType::ARRAY_OF_RECORDS;
|
|
184
|
+
} else if (arg == "values") {
|
|
185
|
+
bind_data.record_type = JSONRecordType::JSON;
|
|
186
|
+
} else if (arg == "array_of_values") {
|
|
187
|
+
bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
|
|
188
|
+
} else if (arg == "auto") {
|
|
189
|
+
bind_data.record_type = JSONRecordType::AUTO;
|
|
190
|
+
} else {
|
|
191
|
+
throw InvalidInputException("\"json_format\" must be one of ['records', 'array_of_records', 'json', "
|
|
192
|
+
"'array_of_json', 'auto']");
|
|
193
|
+
}
|
|
153
194
|
}
|
|
154
195
|
}
|
|
155
196
|
}
|
|
@@ -170,7 +211,7 @@ unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindI
|
|
|
170
211
|
|
|
171
212
|
bind_data.InitializeFormats();
|
|
172
213
|
|
|
173
|
-
if (bind_data.auto_detect) {
|
|
214
|
+
if (bind_data.auto_detect || bind_data.record_type == JSONRecordType::AUTO) {
|
|
174
215
|
JSONScan::AutoDetect(context, bind_data, return_types, names);
|
|
175
216
|
bind_data.names = names;
|
|
176
217
|
}
|
|
@@ -189,9 +230,16 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
189
230
|
auto &gstate = ((JSONGlobalTableFunctionState &)*data_p.global_state).state;
|
|
190
231
|
auto &lstate = ((JSONLocalTableFunctionState &)*data_p.local_state).state;
|
|
191
232
|
|
|
192
|
-
// Fetch next lines
|
|
193
233
|
const auto count = lstate.ReadNext(gstate);
|
|
194
|
-
|
|
234
|
+
yyjson_val **values;
|
|
235
|
+
if (gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
|
236
|
+
gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
|
|
237
|
+
values = lstate.array_values;
|
|
238
|
+
} else {
|
|
239
|
+
D_ASSERT(gstate.bind_data.record_type != JSONRecordType::AUTO);
|
|
240
|
+
values = lstate.values;
|
|
241
|
+
}
|
|
242
|
+
output.SetCardinality(count);
|
|
195
243
|
|
|
196
244
|
vector<Vector *> result_vectors;
|
|
197
245
|
result_vectors.reserve(output.ColumnCount());
|
|
@@ -202,22 +250,23 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
202
250
|
|
|
203
251
|
// Pass current reader to transform options so we can get line number information if an error occurs
|
|
204
252
|
bool success;
|
|
205
|
-
if (gstate.bind_data.
|
|
206
|
-
|
|
253
|
+
if (gstate.bind_data.record_type == JSONRecordType::RECORDS ||
|
|
254
|
+
gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS) {
|
|
255
|
+
success = JSONTransform::TransformObject(values, lstate.GetAllocator(), count, gstate.bind_data.names,
|
|
207
256
|
result_vectors, lstate.transform_options);
|
|
208
257
|
} else {
|
|
209
|
-
success = JSONTransform::Transform(
|
|
258
|
+
success = JSONTransform::Transform(values, lstate.GetAllocator(), *result_vectors[0], count,
|
|
210
259
|
lstate.transform_options);
|
|
211
260
|
}
|
|
261
|
+
|
|
212
262
|
if (!success) {
|
|
213
263
|
string hint = gstate.bind_data.auto_detect
|
|
214
264
|
? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns' manually, "
|
|
215
|
-
"or setting 'ignore_errors' to true."
|
|
216
|
-
: "";
|
|
217
|
-
lstate.ThrowTransformError(
|
|
265
|
+
"specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true."
|
|
266
|
+
: "\n Try specifying 'lines' or 'json_format' manually, or setting 'ignore_errors' to true.";
|
|
267
|
+
lstate.ThrowTransformError(lstate.transform_options.object_index,
|
|
218
268
|
lstate.transform_options.error_message + hint);
|
|
219
269
|
}
|
|
220
|
-
output.SetCardinality(count);
|
|
221
270
|
}
|
|
222
271
|
|
|
223
272
|
TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, shared_ptr<JSONScanInfo> function_info) {
|
|
@@ -233,8 +282,10 @@ TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, share
|
|
|
233
282
|
table_function.named_parameters["date_format"] = LogicalType::VARCHAR;
|
|
234
283
|
table_function.named_parameters["timestampformat"] = LogicalType::VARCHAR;
|
|
235
284
|
table_function.named_parameters["timestamp_format"] = LogicalType::VARCHAR;
|
|
285
|
+
table_function.named_parameters["json_format"] = LogicalType::VARCHAR;
|
|
236
286
|
|
|
237
287
|
table_function.projection_pushdown = true;
|
|
288
|
+
// TODO: might be able to do filter pushdown/prune too
|
|
238
289
|
|
|
239
290
|
table_function.function_info = std::move(function_info);
|
|
240
291
|
|
|
@@ -249,7 +300,8 @@ TableFunction GetReadJSONAutoTableFunction(bool list_parameter, shared_ptr<JSONS
|
|
|
249
300
|
|
|
250
301
|
CreateTableFunctionInfo JSONFunctions::GetReadJSONFunction() {
|
|
251
302
|
TableFunctionSet function_set("read_json");
|
|
252
|
-
auto function_info =
|
|
303
|
+
auto function_info =
|
|
304
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::UNSTRUCTURED, JSONRecordType::RECORDS, false);
|
|
253
305
|
function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(false, function_info));
|
|
254
306
|
function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(true, function_info));
|
|
255
307
|
return CreateTableFunctionInfo(function_set);
|
|
@@ -257,7 +309,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONFunction() {
|
|
|
257
309
|
|
|
258
310
|
CreateTableFunctionInfo JSONFunctions::GetReadNDJSONFunction() {
|
|
259
311
|
TableFunctionSet function_set("read_ndjson");
|
|
260
|
-
auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED,
|
|
312
|
+
auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED,
|
|
313
|
+
JSONRecordType::RECORDS, false);
|
|
261
314
|
function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(false, function_info));
|
|
262
315
|
function_set.AddFunction(JSONFunctions::GetReadJSONTableFunction(true, function_info));
|
|
263
316
|
return CreateTableFunctionInfo(function_set);
|
|
@@ -265,7 +318,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadNDJSONFunction() {
|
|
|
265
318
|
|
|
266
319
|
CreateTableFunctionInfo JSONFunctions::GetReadJSONAutoFunction() {
|
|
267
320
|
TableFunctionSet function_set("read_json_auto");
|
|
268
|
-
auto function_info =
|
|
321
|
+
auto function_info =
|
|
322
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::AUTO, true);
|
|
269
323
|
function_set.AddFunction(GetReadJSONAutoTableFunction(false, function_info));
|
|
270
324
|
function_set.AddFunction(GetReadJSONAutoTableFunction(true, function_info));
|
|
271
325
|
return CreateTableFunctionInfo(function_set);
|
|
@@ -273,7 +327,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONAutoFunction() {
|
|
|
273
327
|
|
|
274
328
|
CreateTableFunctionInfo JSONFunctions::GetReadNDJSONAutoFunction() {
|
|
275
329
|
TableFunctionSet function_set("read_ndjson_auto");
|
|
276
|
-
auto function_info =
|
|
330
|
+
auto function_info =
|
|
331
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, JSONRecordType::AUTO, true);
|
|
277
332
|
function_set.AddFunction(GetReadJSONAutoTableFunction(false, function_info));
|
|
278
333
|
function_set.AddFunction(GetReadJSONAutoTableFunction(true, function_info));
|
|
279
334
|
return CreateTableFunctionInfo(function_set);
|
|
@@ -20,7 +20,7 @@ static void ReadJSONObjectsFunction(ClientContext &context, TableFunctionInput &
|
|
|
20
20
|
// Fetch next lines
|
|
21
21
|
const auto count = lstate.ReadNext(gstate);
|
|
22
22
|
const auto lines = lstate.lines;
|
|
23
|
-
const auto objects = lstate.
|
|
23
|
+
const auto objects = lstate.values;
|
|
24
24
|
|
|
25
25
|
// Create the strings without copying them
|
|
26
26
|
auto strings = FlatVector::GetData<string_t>(output.data[0]);
|
|
@@ -48,7 +48,8 @@ TableFunction GetReadJSONObjectsTableFunction(bool list_parameter, shared_ptr<JS
|
|
|
48
48
|
|
|
49
49
|
CreateTableFunctionInfo JSONFunctions::GetReadJSONObjectsFunction() {
|
|
50
50
|
TableFunctionSet function_set("read_json_objects");
|
|
51
|
-
auto function_info =
|
|
51
|
+
auto function_info =
|
|
52
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::UNSTRUCTURED, JSONRecordType::JSON);
|
|
52
53
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
|
|
53
54
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
|
|
54
55
|
return CreateTableFunctionInfo(function_set);
|
|
@@ -56,7 +57,8 @@ CreateTableFunctionInfo JSONFunctions::GetReadJSONObjectsFunction() {
|
|
|
56
57
|
|
|
57
58
|
CreateTableFunctionInfo JSONFunctions::GetReadNDJSONObjectsFunction() {
|
|
58
59
|
TableFunctionSet function_set("read_ndjson_objects");
|
|
59
|
-
auto function_info =
|
|
60
|
+
auto function_info =
|
|
61
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::NEWLINE_DELIMITED, JSONRecordType::JSON);
|
|
60
62
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
|
|
61
63
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
|
|
62
64
|
return CreateTableFunctionInfo(function_set);
|
|
@@ -166,7 +166,14 @@ vector<CreateTableFunctionInfo> JSONFunctions::GetTableFunctions() {
|
|
|
166
166
|
unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context, const string &table_name,
|
|
167
167
|
ReplacementScanData *data) {
|
|
168
168
|
auto lower_name = StringUtil::Lower(table_name);
|
|
169
|
+
// remove any compression
|
|
170
|
+
if (StringUtil::EndsWith(lower_name, ".gz")) {
|
|
171
|
+
lower_name = lower_name.substr(0, lower_name.size() - 3);
|
|
172
|
+
} else if (StringUtil::EndsWith(lower_name, ".zst")) {
|
|
173
|
+
lower_name = lower_name.substr(0, lower_name.size() - 4);
|
|
174
|
+
}
|
|
169
175
|
if (!StringUtil::EndsWith(lower_name, ".json") && !StringUtil::Contains(lower_name, ".json?") &&
|
|
176
|
+
!StringUtil::EndsWith(lower_name, ".jsonl") && !StringUtil::Contains(lower_name, ".jsonl?") &&
|
|
170
177
|
!StringUtil::EndsWith(lower_name, ".ndjson") && !StringUtil::Contains(lower_name, ".ndjson?")) {
|
|
171
178
|
return nullptr;
|
|
172
179
|
}
|