duckdb 0.7.2-dev3515.0 → 0.7.2-dev3666.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/configure.py +2 -0
- package/package.json +1 -1
- package/src/database.cpp +1 -0
- package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
- package/src/duckdb/extension/json/include/json_common.hpp +5 -4
- package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
- package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
- package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
- package/src/duckdb/extension/json/json_common.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
- package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
- package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
- package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +10 -8
- package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/read_json.cpp +167 -169
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
- package/src/duckdb/extension/json/json_functions.cpp +11 -4
- package/src/duckdb/extension/json/json_scan.cpp +593 -374
- package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +42 -0
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -0
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/common/constants.cpp +1 -0
- package/src/duckdb/src/common/file_system.cpp +26 -6
- package/src/duckdb/src/common/local_file_system.cpp +0 -13
- package/src/duckdb/src/common/types/vector.cpp +3 -3
- package/src/duckdb/src/common/types/vector_buffer.cpp +11 -3
- package/src/duckdb/src/common/types/vector_cache.cpp +5 -5
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -2
- package/src/duckdb/src/function/macro_function.cpp +43 -0
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -3
- package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -0
- package/src/duckdb/src/function/scalar_macro_function.cpp +10 -0
- package/src/duckdb/src/function/table/copy_csv.cpp +68 -18
- package/src/duckdb/src/function/table/read_csv.cpp +30 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
- package/src/duckdb/src/function/table_macro_function.cpp +10 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp +0 -6
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp +0 -6
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -7
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +12 -2
- package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +9 -5
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +7 -1
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +3 -4
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +7 -2
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
- package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +4 -7
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +8 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +6 -20
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +6 -18
- package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +4 -8
- package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +4 -38
- package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +5 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -10
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +42 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +0 -7
- package/src/duckdb/src/parser/parsed_data/create_info.cpp +19 -8
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +46 -0
- package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +56 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +47 -0
- package/src/duckdb/src/parser/parsed_data/detach_info.cpp +34 -0
- package/src/duckdb/src/parser/parsed_data/drop_info.cpp +46 -0
- package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +24 -0
- package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +37 -0
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +27 -9
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -1
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
- package/src/duckdb/src/planner/logical_operator.cpp +1 -2
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -25
- package/src/duckdb/src/planner/operator/logical_insert.cpp +30 -0
- package/src/duckdb/src/planner/operator/logical_simple.cpp +33 -5
- package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +6 -16
- package/src/duckdb/src/planner/planner.cpp +4 -13
- package/src/duckdb/src/storage/checkpoint_manager.cpp +12 -6
- package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
- package/src/duckdb/ub_src_catalog_catalog_entry.cpp +1 -1
- package/src/duckdb/ub_src_parser_parsed_data.cpp +16 -0
- package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +0 -104
@@ -8,88 +8,83 @@ namespace duckdb {
|
|
8
8
|
|
9
9
|
void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vector<LogicalType> &return_types,
|
10
10
|
vector<string> &names) {
|
11
|
-
|
12
|
-
bind_data.type = JSONScanType::SAMPLE;
|
13
|
-
JSONScanGlobalState gstate(context, bind_data);
|
14
|
-
JSONScanLocalState lstate(context, gstate);
|
15
|
-
ArenaAllocator allocator(BufferAllocator::Get(context));
|
11
|
+
// Change scan type during detection
|
12
|
+
bind_data.type = JSONScanType::SAMPLE;
|
16
13
|
|
17
|
-
//
|
14
|
+
// These are used across files (if union_by_name)
|
18
15
|
JSONStructureNode node;
|
19
|
-
|
16
|
+
ArenaAllocator allocator(BufferAllocator::Get(context));
|
20
17
|
Vector string_vector(LogicalType::VARCHAR);
|
21
|
-
idx_t remaining = bind_data.sample_size;
|
22
|
-
while (remaining != 0) {
|
23
|
-
allocator.Reset();
|
24
|
-
|
25
|
-
if (gstate.file_index >= 10) {
|
26
|
-
// We really shouldn't open more than 10 files when sampling
|
27
|
-
break;
|
28
|
-
}
|
29
18
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
idx_t next = MinValue<idx_t>(read_count, remaining);
|
38
|
-
yyjson_val **values;
|
39
|
-
if (bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
40
|
-
bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
|
41
|
-
values = lstate.array_values;
|
19
|
+
// Loop through the files (if union_by_name, else just sample the first file)
|
20
|
+
for (idx_t file_idx = 0; file_idx < bind_data.files.size(); file_idx++) {
|
21
|
+
// Create global/local state and place the reader in the right field
|
22
|
+
JSONScanGlobalState gstate(context, bind_data);
|
23
|
+
JSONScanLocalState lstate(context, gstate);
|
24
|
+
if (file_idx == 0) {
|
25
|
+
gstate.json_readers.emplace_back(bind_data.initial_reader.get());
|
42
26
|
} else {
|
43
|
-
|
27
|
+
gstate.json_readers.emplace_back(bind_data.union_readers[file_idx - 1].get());
|
44
28
|
}
|
45
|
-
|
46
|
-
|
47
|
-
|
29
|
+
|
30
|
+
// Read and detect schema
|
31
|
+
idx_t remaining = bind_data.sample_size;
|
32
|
+
while (remaining != 0) {
|
33
|
+
allocator.Reset();
|
34
|
+
auto read_count = lstate.ReadNext(gstate);
|
35
|
+
if (read_count == 0) {
|
36
|
+
break;
|
37
|
+
}
|
38
|
+
|
39
|
+
idx_t next = MinValue<idx_t>(read_count, remaining);
|
40
|
+
for (idx_t i = 0; i < next; i++) {
|
41
|
+
const auto &val = lstate.values[i];
|
42
|
+
if (val) {
|
43
|
+
JSONStructure::ExtractStructure(val, node);
|
44
|
+
}
|
48
45
|
}
|
46
|
+
if (!node.ContainsVarchar()) { // Can't refine non-VARCHAR types
|
47
|
+
continue;
|
48
|
+
}
|
49
|
+
node.InitializeCandidateTypes(bind_data.max_depth);
|
50
|
+
node.RefineCandidateTypes(lstate.values, next, string_vector, allocator, bind_data.date_format_map);
|
51
|
+
remaining -= next;
|
52
|
+
}
|
53
|
+
|
54
|
+
if (file_idx == 0 && lstate.total_tuple_count != 0) {
|
55
|
+
bind_data.avg_tuple_size = lstate.total_read_size / lstate.total_tuple_count;
|
49
56
|
}
|
50
|
-
|
51
|
-
|
57
|
+
|
58
|
+
// Close the file and stop detection if not union_by_name
|
59
|
+
if (!bind_data.options.file_options.union_by_name) {
|
60
|
+
break;
|
52
61
|
}
|
53
|
-
node.InitializeCandidateTypes(bind_data.max_depth);
|
54
|
-
node.RefineCandidateTypes(values, next, string_vector, allocator, bind_data.date_format_map);
|
55
|
-
remaining -= next;
|
56
62
|
}
|
57
|
-
|
63
|
+
|
64
|
+
// Restore the scan type
|
65
|
+
bind_data.type = JSONScanType::READ_JSON;
|
58
66
|
|
59
67
|
// Convert structure to logical type
|
60
68
|
auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
|
61
69
|
|
62
|
-
//
|
63
|
-
if (bind_data.record_type == JSONRecordType::
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
case LogicalTypeId::LIST: {
|
69
|
-
if (more_than_one) {
|
70
|
-
bind_data.record_type = JSONRecordType::JSON;
|
71
|
-
} else {
|
72
|
-
type = ListType::GetChildType(type);
|
73
|
-
if (type.id() == LogicalTypeId::STRUCT) {
|
74
|
-
bind_data.record_type = JSONRecordType::ARRAY_OF_RECORDS;
|
75
|
-
} else {
|
76
|
-
bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
|
77
|
-
}
|
78
|
-
}
|
79
|
-
break;
|
80
|
-
}
|
81
|
-
default:
|
82
|
-
bind_data.record_type = JSONRecordType::JSON;
|
70
|
+
// Auto-detect record type
|
71
|
+
if (bind_data.options.record_type == JSONRecordType::AUTO_DETECT) {
|
72
|
+
if (type.id() == LogicalTypeId::STRUCT) {
|
73
|
+
bind_data.options.record_type = JSONRecordType::RECORDS;
|
74
|
+
} else {
|
75
|
+
bind_data.options.record_type = JSONRecordType::VALUES;
|
83
76
|
}
|
84
77
|
}
|
85
78
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
79
|
+
if (!bind_data.auto_detect) {
|
80
|
+
return;
|
81
|
+
}
|
82
|
+
|
83
|
+
bind_data.transform_options.date_format_map = &bind_data.date_format_map;
|
84
|
+
|
85
|
+
// Auto-detect columns
|
86
|
+
if (bind_data.options.record_type == JSONRecordType::RECORDS) {
|
87
|
+
if (type.id() == LogicalTypeId::STRUCT) {
|
93
88
|
const auto &child_types = StructType::GetChildTypes(type);
|
94
89
|
return_types.reserve(child_types.size());
|
95
90
|
names.reserve(child_types.size());
|
@@ -97,26 +92,29 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
|
|
97
92
|
return_types.emplace_back(child_type.second);
|
98
93
|
names.emplace_back(child_type.first);
|
99
94
|
}
|
95
|
+
} else {
|
96
|
+
throw BinderException("json_read expected records, but got non-record JSON instead."
|
97
|
+
"\n Try setting records='auto' or records='false'.");
|
100
98
|
}
|
99
|
+
} else {
|
100
|
+
D_ASSERT(bind_data.options.record_type == JSONRecordType::VALUES);
|
101
|
+
return_types.emplace_back(type);
|
102
|
+
names.emplace_back("json");
|
101
103
|
}
|
102
|
-
|
103
|
-
for (auto &reader : gstate.json_readers) {
|
104
|
-
if (reader->IsOpen()) {
|
105
|
-
reader->Reset();
|
106
|
-
}
|
107
|
-
}
|
108
|
-
bind_data.stored_readers = std::move(gstate.json_readers);
|
109
104
|
}
|
110
105
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
106
|
+
unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindInput &input,
|
107
|
+
vector<LogicalType> &return_types, vector<string> &names) {
|
108
|
+
// First bind default params
|
109
|
+
auto bind_data = make_uniq<JSONScanData>();
|
110
|
+
bind_data->Bind(context, input);
|
111
|
+
|
112
|
+
for (auto &kv : input.named_parameters) {
|
115
113
|
auto loption = StringUtil::Lower(kv.first);
|
116
114
|
if (loption == "columns") {
|
117
115
|
auto &child_type = kv.second.type();
|
118
116
|
if (child_type.id() != LogicalTypeId::STRUCT) {
|
119
|
-
throw BinderException("read_json \"columns\" parameter requires a struct as input");
|
117
|
+
throw BinderException("read_json \"columns\" parameter requires a struct as input.");
|
120
118
|
}
|
121
119
|
auto &struct_children = StructValue::GetChildren(kv.second);
|
122
120
|
D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
|
@@ -125,157 +123,158 @@ void JSONScan::InitializeBindData(ClientContext &context, JSONScanData &bind_dat
|
|
125
123
|
auto &val = struct_children[i];
|
126
124
|
names.push_back(name);
|
127
125
|
if (val.type().id() != LogicalTypeId::VARCHAR) {
|
128
|
-
throw BinderException("read_json \"columns\" parameter type specification must be VARCHAR");
|
126
|
+
throw BinderException("read_json \"columns\" parameter type specification must be VARCHAR.");
|
129
127
|
}
|
130
128
|
return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
|
131
129
|
}
|
132
130
|
D_ASSERT(names.size() == return_types.size());
|
133
131
|
if (names.empty()) {
|
134
|
-
throw BinderException("read_json \"columns\" parameter needs at least one column");
|
132
|
+
throw BinderException("read_json \"columns\" parameter needs at least one column.");
|
135
133
|
}
|
136
|
-
bind_data
|
134
|
+
bind_data->names = names;
|
137
135
|
} else if (loption == "auto_detect") {
|
138
|
-
bind_data
|
136
|
+
bind_data->auto_detect = BooleanValue::Get(kv.second);
|
139
137
|
} else if (loption == "sample_size") {
|
140
138
|
auto arg = BigIntValue::Get(kv.second);
|
141
139
|
if (arg == -1) {
|
142
|
-
bind_data
|
140
|
+
bind_data->sample_size = NumericLimits<idx_t>::Maximum();
|
143
141
|
} else if (arg > 0) {
|
144
|
-
bind_data
|
142
|
+
bind_data->sample_size = arg;
|
145
143
|
} else {
|
146
144
|
throw BinderException(
|
147
|
-
"read_json \"sample_size\" parameter must be positive, or -1 to sample the entire file");
|
145
|
+
"read_json \"sample_size\" parameter must be positive, or -1 to sample the entire file.");
|
148
146
|
}
|
149
147
|
} else if (loption == "maximum_depth") {
|
150
148
|
auto arg = BigIntValue::Get(kv.second);
|
151
149
|
if (arg == -1) {
|
152
|
-
bind_data
|
150
|
+
bind_data->max_depth = NumericLimits<idx_t>::Maximum();
|
153
151
|
} else {
|
154
|
-
bind_data
|
152
|
+
bind_data->max_depth = arg;
|
155
153
|
}
|
156
154
|
} else if (loption == "dateformat" || loption == "date_format") {
|
157
155
|
auto format_string = StringValue::Get(kv.second);
|
158
156
|
if (StringUtil::Lower(format_string) == "iso") {
|
159
157
|
format_string = "%Y-%m-%d";
|
160
158
|
}
|
161
|
-
bind_data
|
159
|
+
bind_data->date_format = format_string;
|
162
160
|
|
163
161
|
StrpTimeFormat format;
|
164
162
|
auto error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
|
165
163
|
if (!error.empty()) {
|
166
|
-
throw InvalidInputException("
|
164
|
+
throw InvalidInputException("read_json could not parse \"dateformat\": '%s'.", error.c_str());
|
167
165
|
}
|
168
166
|
} else if (loption == "timestampformat" || loption == "timestamp_format") {
|
169
167
|
auto format_string = StringValue::Get(kv.second);
|
170
168
|
if (StringUtil::Lower(format_string) == "iso") {
|
171
169
|
format_string = "%Y-%m-%dT%H:%M:%S.%fZ";
|
172
170
|
}
|
173
|
-
bind_data
|
171
|
+
bind_data->timestamp_format = format_string;
|
174
172
|
|
175
173
|
StrpTimeFormat format;
|
176
174
|
auto error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
|
177
175
|
if (!error.empty()) {
|
178
|
-
throw InvalidInputException("
|
176
|
+
throw InvalidInputException("read_json could not parse \"timestampformat\": '%s'.", error.c_str());
|
179
177
|
}
|
180
|
-
} else if (loption == "
|
178
|
+
} else if (loption == "records") {
|
181
179
|
auto arg = StringValue::Get(kv.second);
|
182
|
-
if (arg == "
|
183
|
-
bind_data.record_type = JSONRecordType::
|
184
|
-
} else if (arg == "
|
185
|
-
bind_data.record_type = JSONRecordType::
|
186
|
-
} else if (arg == "
|
187
|
-
bind_data.record_type = JSONRecordType::
|
188
|
-
} else if (arg == "array_of_values") {
|
189
|
-
bind_data.record_type = JSONRecordType::ARRAY_OF_JSON;
|
190
|
-
} else if (arg == "auto") {
|
191
|
-
bind_data.record_type = JSONRecordType::AUTO;
|
180
|
+
if (arg == "auto") {
|
181
|
+
bind_data->options.record_type = JSONRecordType::AUTO_DETECT;
|
182
|
+
} else if (arg == "true") {
|
183
|
+
bind_data->options.record_type = JSONRecordType::RECORDS;
|
184
|
+
} else if (arg == "false") {
|
185
|
+
bind_data->options.record_type = JSONRecordType::VALUES;
|
192
186
|
} else {
|
193
|
-
throw InvalidInputException("\"
|
194
|
-
"'array_of_json', 'auto']");
|
187
|
+
throw InvalidInputException("read_json requires \"records\" to be one of ['auto', 'true', 'false'].");
|
195
188
|
}
|
196
189
|
}
|
197
190
|
}
|
198
|
-
}
|
199
|
-
|
200
|
-
unique_ptr<FunctionData> ReadJSONBind(ClientContext &context, TableFunctionBindInput &input,
|
201
|
-
vector<LogicalType> &return_types, vector<string> &names) {
|
202
|
-
// First bind default params
|
203
|
-
auto result = JSONScanData::Bind(context, input);
|
204
|
-
auto &bind_data = (JSONScanData &)*result;
|
205
191
|
|
206
|
-
|
192
|
+
// Specifying column names overrides auto-detect
|
193
|
+
if (!return_types.empty()) {
|
194
|
+
bind_data->auto_detect = false;
|
195
|
+
}
|
207
196
|
|
208
|
-
if (!bind_data
|
209
|
-
|
210
|
-
|
211
|
-
|
197
|
+
if (!bind_data->auto_detect) {
|
198
|
+
// Need to specify columns if RECORDS and not auto-detecting
|
199
|
+
if (return_types.empty()) {
|
200
|
+
throw BinderException("read_json requires columns to be specified through the \"columns\" parameter."
|
201
|
+
"\n Use read_json_auto or set auto_detect=true to automatically guess columns.");
|
202
|
+
}
|
203
|
+
// If we are reading VALUES, we can only have one column
|
204
|
+
if (bind_data->options.record_type == JSONRecordType::VALUES && return_types.size() != 1) {
|
205
|
+
throw BinderException("read_json requires a single column to be specified through the \"columns\" "
|
206
|
+
"parameter when \"records\" is set to 'false'.");
|
207
|
+
}
|
212
208
|
}
|
213
209
|
|
214
|
-
bind_data
|
210
|
+
bind_data->InitializeFormats();
|
215
211
|
|
216
|
-
if (bind_data
|
217
|
-
JSONScan::AutoDetect(context, bind_data, return_types, names);
|
218
|
-
bind_data
|
212
|
+
if (bind_data->auto_detect || bind_data->options.record_type == JSONRecordType::AUTO_DETECT) {
|
213
|
+
JSONScan::AutoDetect(context, *bind_data, return_types, names);
|
214
|
+
bind_data->names = names;
|
215
|
+
D_ASSERT(return_types.size() == names.size());
|
219
216
|
}
|
220
217
|
|
221
|
-
|
222
|
-
|
223
|
-
|
218
|
+
bind_data->reader_bind =
|
219
|
+
MultiFileReader::BindOptions(bind_data->options.file_options, bind_data->files, return_types, names);
|
220
|
+
|
221
|
+
auto &transform_options = bind_data->transform_options;
|
222
|
+
transform_options.strict_cast = !bind_data->ignore_errors;
|
223
|
+
transform_options.error_duplicate_key = !bind_data->ignore_errors;
|
224
224
|
transform_options.error_missing_key = false;
|
225
|
-
transform_options.error_unknown_key = bind_data
|
225
|
+
transform_options.error_unknown_key = bind_data->auto_detect && !bind_data->ignore_errors;
|
226
226
|
transform_options.delay_error = true;
|
227
227
|
|
228
|
-
return
|
228
|
+
return bind_data;
|
229
229
|
}
|
230
230
|
|
231
231
|
static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
232
|
-
auto &gstate =
|
233
|
-
auto &lstate =
|
232
|
+
auto &gstate = data_p.global_state->Cast<JSONGlobalTableFunctionState>().state;
|
233
|
+
auto &lstate = data_p.local_state->Cast<JSONLocalTableFunctionState>().state;
|
234
234
|
|
235
235
|
const auto count = lstate.ReadNext(gstate);
|
236
|
-
yyjson_val **values;
|
237
|
-
if (gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
238
|
-
gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
|
239
|
-
values = lstate.array_values;
|
240
|
-
} else {
|
241
|
-
D_ASSERT(gstate.bind_data.record_type != JSONRecordType::AUTO);
|
242
|
-
values = lstate.values;
|
243
|
-
}
|
236
|
+
yyjson_val **values = lstate.values;
|
244
237
|
output.SetCardinality(count);
|
245
238
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
239
|
+
if (!gstate.names.empty()) {
|
240
|
+
vector<Vector *> result_vectors;
|
241
|
+
result_vectors.reserve(gstate.column_indices.size());
|
242
|
+
for (const auto &col_idx : gstate.column_indices) {
|
243
|
+
result_vectors.emplace_back(&output.data[col_idx]);
|
244
|
+
}
|
245
|
+
|
246
|
+
D_ASSERT(gstate.bind_data.options.record_type != JSONRecordType::AUTO_DETECT);
|
247
|
+
bool success;
|
248
|
+
if (gstate.bind_data.options.record_type == JSONRecordType::RECORDS) {
|
249
|
+
success = JSONTransform::TransformObject(values, lstate.GetAllocator(), count, gstate.names, result_vectors,
|
250
|
+
lstate.transform_options);
|
251
|
+
} else {
|
252
|
+
D_ASSERT(gstate.bind_data.options.record_type == JSONRecordType::VALUES);
|
253
|
+
success = JSONTransform::Transform(values, lstate.GetAllocator(), *result_vectors[0], count,
|
254
|
+
lstate.transform_options);
|
255
|
+
}
|
256
|
+
|
257
|
+
if (!success) {
|
258
|
+
string hint =
|
259
|
+
gstate.bind_data.auto_detect
|
260
|
+
? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns', 'format' or "
|
261
|
+
"'records' manually, or setting 'ignore_errors' to true."
|
262
|
+
: "\nTry setting 'auto_detect' to true, specifying 'format' or 'records' manually, or setting "
|
263
|
+
"'ignore_errors' to true.";
|
264
|
+
lstate.ThrowTransformError(lstate.transform_options.object_index,
|
265
|
+
lstate.transform_options.error_message + hint);
|
266
|
+
}
|
262
267
|
}
|
263
268
|
|
264
|
-
if (
|
265
|
-
|
266
|
-
gstate.bind_data.auto_detect
|
267
|
-
? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns', 'lines' or "
|
268
|
-
"'json_format' manually, or setting 'ignore_errors' to true."
|
269
|
-
: "\nTry setting 'auto_detect' to true, specifying 'lines' or 'json_format' manually, or setting "
|
270
|
-
"'ignore_errors' to true.";
|
271
|
-
lstate.ThrowTransformError(lstate.transform_options.object_index,
|
272
|
-
lstate.transform_options.error_message + hint);
|
269
|
+
if (output.size() != 0) {
|
270
|
+
MultiFileReader::FinalizeChunk(gstate.bind_data.reader_bind, lstate.GetReaderData(), output);
|
273
271
|
}
|
274
272
|
}
|
275
273
|
|
276
274
|
TableFunction JSONFunctions::GetReadJSONTableFunction(shared_ptr<JSONScanInfo> function_info) {
|
277
275
|
TableFunction table_function({LogicalType::VARCHAR}, ReadJSONFunction, ReadJSONBind,
|
278
276
|
JSONGlobalTableFunctionState::Init, JSONLocalTableFunctionState::Init);
|
277
|
+
table_function.name = "read_json";
|
279
278
|
|
280
279
|
JSONScan::TableFunctionDefaults(table_function);
|
281
280
|
table_function.named_parameters["columns"] = LogicalType::ANY;
|
@@ -285,10 +284,9 @@ TableFunction JSONFunctions::GetReadJSONTableFunction(shared_ptr<JSONScanInfo> f
|
|
285
284
|
table_function.named_parameters["date_format"] = LogicalType::VARCHAR;
|
286
285
|
table_function.named_parameters["timestampformat"] = LogicalType::VARCHAR;
|
287
286
|
table_function.named_parameters["timestamp_format"] = LogicalType::VARCHAR;
|
288
|
-
table_function.named_parameters["
|
287
|
+
table_function.named_parameters["records"] = LogicalType::VARCHAR;
|
289
288
|
|
290
|
-
|
291
|
-
// TODO: might be able to do filter pushdown/prune too
|
289
|
+
// TODO: might be able to do filter pushdown/prune ?
|
292
290
|
|
293
291
|
table_function.function_info = std::move(function_info);
|
294
292
|
|
@@ -305,25 +303,25 @@ TableFunctionSet CreateJSONFunctionInfo(string name, shared_ptr<JSONScanInfo> in
|
|
305
303
|
}
|
306
304
|
|
307
305
|
TableFunctionSet JSONFunctions::GetReadJSONFunction() {
|
308
|
-
auto info =
|
309
|
-
make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::UNSTRUCTURED, JSONRecordType::RECORDS, false);
|
306
|
+
auto info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::ARRAY, JSONRecordType::RECORDS);
|
310
307
|
return CreateJSONFunctionInfo("read_json", std::move(info));
|
311
308
|
}
|
312
309
|
|
313
310
|
TableFunctionSet JSONFunctions::GetReadNDJSONFunction() {
|
314
|
-
auto info =
|
315
|
-
|
311
|
+
auto info =
|
312
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED, JSONRecordType::RECORDS);
|
316
313
|
return CreateJSONFunctionInfo("read_ndjson", std::move(info));
|
317
314
|
}
|
318
315
|
|
319
316
|
TableFunctionSet JSONFunctions::GetReadJSONAutoFunction() {
|
320
|
-
auto info =
|
317
|
+
auto info =
|
318
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::AUTO_DETECT, true);
|
321
319
|
return CreateJSONFunctionInfo("read_json_auto", std::move(info), true);
|
322
320
|
}
|
323
321
|
|
324
322
|
TableFunctionSet JSONFunctions::GetReadNDJSONAutoFunction() {
|
325
|
-
auto info =
|
326
|
-
|
323
|
+
auto info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::NEWLINE_DELIMITED,
|
324
|
+
JSONRecordType::AUTO_DETECT, true);
|
327
325
|
return CreateJSONFunctionInfo("read_ndjson_auto", std::move(info), true);
|
328
326
|
}
|
329
327
|
|
@@ -6,34 +6,46 @@ namespace duckdb {
|
|
6
6
|
|
7
7
|
unique_ptr<FunctionData> ReadJSONObjectsBind(ClientContext &context, TableFunctionBindInput &input,
|
8
8
|
vector<LogicalType> &return_types, vector<string> &names) {
|
9
|
+
auto bind_data = make_uniq<JSONScanData>();
|
10
|
+
bind_data->Bind(context, input);
|
11
|
+
|
12
|
+
bind_data->names.emplace_back("json");
|
9
13
|
return_types.push_back(JSONCommon::JSONType());
|
10
14
|
names.emplace_back("json");
|
11
|
-
|
15
|
+
|
16
|
+
bind_data->reader_bind =
|
17
|
+
MultiFileReader::BindOptions(bind_data->options.file_options, bind_data->files, return_types, names);
|
18
|
+
|
19
|
+
return bind_data;
|
12
20
|
}
|
13
21
|
|
14
22
|
static void ReadJSONObjectsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
15
|
-
|
16
|
-
|
17
|
-
auto &gstate = ((JSONGlobalTableFunctionState &)*data_p.global_state).state;
|
18
|
-
auto &lstate = ((JSONLocalTableFunctionState &)*data_p.local_state).state;
|
23
|
+
auto &gstate = data_p.global_state->Cast<JSONGlobalTableFunctionState>().state;
|
24
|
+
auto &lstate = data_p.local_state->Cast<JSONLocalTableFunctionState>().state;
|
19
25
|
|
20
26
|
// Fetch next lines
|
21
27
|
const auto count = lstate.ReadNext(gstate);
|
22
|
-
const auto
|
28
|
+
const auto units = lstate.units;
|
23
29
|
const auto objects = lstate.values;
|
24
30
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
31
|
+
if (!gstate.names.empty()) {
|
32
|
+
// Create the strings without copying them
|
33
|
+
auto strings = FlatVector::GetData<string_t>(output.data[0]);
|
34
|
+
auto &validity = FlatVector::Validity(output.data[0]);
|
35
|
+
for (idx_t i = 0; i < count; i++) {
|
36
|
+
if (objects[i]) {
|
37
|
+
strings[i] = string_t(units[i].pointer, units[i].size);
|
38
|
+
} else {
|
39
|
+
validity.SetInvalid(i);
|
40
|
+
}
|
33
41
|
}
|
34
42
|
}
|
35
43
|
|
36
44
|
output.SetCardinality(count);
|
45
|
+
|
46
|
+
if (output.size() != 0) {
|
47
|
+
MultiFileReader::FinalizeChunk(gstate.bind_data.reader_bind, lstate.GetReaderData(), output);
|
48
|
+
}
|
37
49
|
}
|
38
50
|
|
39
51
|
TableFunction GetReadJSONObjectsTableFunction(bool list_parameter, shared_ptr<JSONScanInfo> function_info) {
|
@@ -49,7 +61,7 @@ TableFunction GetReadJSONObjectsTableFunction(bool list_parameter, shared_ptr<JS
|
|
49
61
|
TableFunctionSet JSONFunctions::GetReadJSONObjectsFunction() {
|
50
62
|
TableFunctionSet function_set("read_json_objects");
|
51
63
|
auto function_info =
|
52
|
-
make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::
|
64
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::ARRAY, JSONRecordType::RECORDS);
|
53
65
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
|
54
66
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
|
55
67
|
return function_set;
|
@@ -57,8 +69,17 @@ TableFunctionSet JSONFunctions::GetReadJSONObjectsFunction() {
|
|
57
69
|
|
58
70
|
TableFunctionSet JSONFunctions::GetReadNDJSONObjectsFunction() {
|
59
71
|
TableFunctionSet function_set("read_ndjson_objects");
|
72
|
+
auto function_info = make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::NEWLINE_DELIMITED,
|
73
|
+
JSONRecordType::RECORDS);
|
74
|
+
function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
|
75
|
+
function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
|
76
|
+
return function_set;
|
77
|
+
}
|
78
|
+
|
79
|
+
TableFunctionSet JSONFunctions::GetReadJSONObjectsAutoFunction() {
|
80
|
+
TableFunctionSet function_set("read_json_objects_auto");
|
60
81
|
auto function_info =
|
61
|
-
make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::
|
82
|
+
make_shared<JSONScanInfo>(JSONScanType::READ_JSON_OBJECTS, JSONFormat::AUTO_DETECT, JSONRecordType::RECORDS);
|
62
83
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(false, function_info));
|
63
84
|
function_set.AddFunction(GetReadJSONObjectsTableFunction(true, function_info));
|
64
85
|
return function_set;
|
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "json_functions.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/common/file_system.hpp"
|
3
4
|
#include "duckdb/execution/expression_executor.hpp"
|
4
5
|
#include "duckdb/function/cast/cast_function_set.hpp"
|
5
6
|
#include "duckdb/function/cast/default_casts.hpp"
|
@@ -50,7 +51,7 @@ bool JSONReadFunctionData::Equals(const FunctionData &other_p) const {
|
|
50
51
|
}
|
51
52
|
|
52
53
|
unique_ptr<FunctionData> JSONReadFunctionData::Bind(ClientContext &context, ScalarFunction &bound_function,
|
53
|
-
vector<
|
54
|
+
vector<unique_ptr<Expression>> &arguments) {
|
54
55
|
D_ASSERT(bound_function.arguments.size() == 2);
|
55
56
|
bool constant = false;
|
56
57
|
string path = "";
|
@@ -80,7 +81,7 @@ bool JSONReadManyFunctionData::Equals(const FunctionData &other_p) const {
|
|
80
81
|
}
|
81
82
|
|
82
83
|
unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context, ScalarFunction &bound_function,
|
83
|
-
vector<
|
84
|
+
vector<unique_ptr<Expression>> &arguments) {
|
84
85
|
D_ASSERT(bound_function.arguments.size() == 2);
|
85
86
|
if (arguments[1]->HasParameter()) {
|
86
87
|
throw ParameterNotResolvedException();
|
@@ -173,6 +174,7 @@ vector<TableFunctionSet> JSONFunctions::GetTableFunctions() {
|
|
173
174
|
// Reads JSON as string
|
174
175
|
functions.push_back(GetReadJSONObjectsFunction());
|
175
176
|
functions.push_back(GetReadNDJSONObjectsFunction());
|
177
|
+
functions.push_back(GetReadJSONObjectsAutoFunction());
|
176
178
|
|
177
179
|
// Read JSON as columnar data
|
178
180
|
functions.push_back(GetReadJSONFunction());
|
@@ -199,16 +201,21 @@ unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context,
|
|
199
201
|
return nullptr;
|
200
202
|
}
|
201
203
|
auto table_function = make_uniq<TableFunctionRef>();
|
202
|
-
vector<
|
204
|
+
vector<unique_ptr<ParsedExpression>> children;
|
203
205
|
children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
|
204
206
|
table_function->function = make_uniq<FunctionExpression>("read_json_auto", std::move(children));
|
207
|
+
|
208
|
+
if (!FileSystem::HasGlob(table_name)) {
|
209
|
+
table_function->alias = FileSystem::ExtractBaseName(table_name);
|
210
|
+
}
|
211
|
+
|
205
212
|
return std::move(table_function);
|
206
213
|
}
|
207
214
|
|
208
215
|
static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
209
216
|
auto &lstate = parameters.local_state->Cast<JSONFunctionLocalState>();
|
210
217
|
lstate.json_allocator.Reset();
|
211
|
-
auto alc = lstate.json_allocator.
|
218
|
+
auto alc = lstate.json_allocator.GetYYAlc();
|
212
219
|
|
213
220
|
bool success = true;
|
214
221
|
UnaryExecutor::ExecuteWithNulls<string_t, string_t>(
|