duckdb 0.7.1-dev90.0 → 0.7.2-dev0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/binding.gyp +7 -7
- package/package.json +3 -3
- package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
- package/src/duckdb/extension/json/include/json_scan.hpp +45 -10
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
- package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +54 -10
- package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
- package/src/duckdb/extension/json/json_functions.cpp +7 -0
- package/src/duckdb/extension/json/json_scan.cpp +144 -38
- package/src/duckdb/extension/parquet/column_reader.cpp +7 -0
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -10
- package/src/duckdb/src/catalog/catalog.cpp +62 -13
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
- package/src/duckdb/src/common/bind_helpers.cpp +55 -0
- package/src/duckdb/src/common/file_system.cpp +23 -9
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
- package/src/duckdb/src/common/local_file_system.cpp +4 -4
- package/src/duckdb/src/common/string_util.cpp +8 -4
- package/src/duckdb/src/common/types/partitioned_column_data.cpp +1 -0
- package/src/duckdb/src/common/types.cpp +37 -11
- package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
- package/src/duckdb/src/execution/index/art/art.cpp +117 -67
- package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
- package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
- package/src/duckdb/src/execution/index/art/node.cpp +13 -27
- package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
- package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
- package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
- package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
- package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -27
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -9
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -9
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +9 -0
- package/src/duckdb/src/execution/physical_operator.cpp +6 -6
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
- package/src/duckdb/src/function/scalar/generic/current_setting.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/array_slice.cpp +2 -3
- package/src/duckdb/src/function/scalar/map/map.cpp +69 -21
- package/src/duckdb/src/function/scalar/string/like.cpp +6 -3
- package/src/duckdb/src/function/table/read_csv.cpp +16 -5
- package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +3 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +9 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/{extension_functions.hpp → extension_entries.hpp} +27 -5
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +11 -1
- package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
- package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
- package/src/duckdb/src/main/client_context.cpp +2 -0
- package/src/duckdb/src/main/config.cpp +1 -0
- package/src/duckdb/src/main/database.cpp +14 -5
- package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +15 -0
- package/src/duckdb/src/main/extension/extension_install.cpp +60 -16
- package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
- package/src/duckdb/src/main/settings/settings.cpp +16 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
- package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
- package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
- package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
- package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
- package/src/duckdb/src/planner/bind_context.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -14
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
- package/src/duckdb/src/planner/logical_operator.cpp +4 -1
- package/src/duckdb/src/storage/buffer_manager.cpp +105 -26
- package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
- package/src/duckdb/src/storage/data_table.cpp +66 -3
- package/src/duckdb/src/storage/index.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -2
- package/src/duckdb/src/storage/wal_replay.cpp +68 -0
- package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -2
- package/src/duckdb/third_party/concurrentqueue/blockingconcurrentqueue.h +2 -2
- package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/statement.cpp +46 -12
- package/test/arrow.test.ts +3 -3
- package/test/prepare.test.ts +39 -1
- package/test/typescript_decls.test.ts +1 -1
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
#include "json_scan.hpp"
|
|
2
2
|
|
|
3
3
|
#include "duckdb/main/database.hpp"
|
|
4
|
+
#include "duckdb/main/extension_helper.hpp"
|
|
4
5
|
#include "duckdb/parallel/task_scheduler.hpp"
|
|
5
6
|
#include "duckdb/storage/buffer_manager.hpp"
|
|
6
|
-
#include "duckdb/main/extension_helper.hpp"
|
|
7
7
|
|
|
8
8
|
namespace duckdb {
|
|
9
9
|
|
|
@@ -20,8 +20,9 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
20
20
|
auto &options = result->options;
|
|
21
21
|
|
|
22
22
|
auto &info = (JSONScanInfo &)*input.info;
|
|
23
|
-
options.format = info.format;
|
|
24
23
|
result->type = info.type;
|
|
24
|
+
options.format = info.format;
|
|
25
|
+
result->record_type = info.record_type;
|
|
25
26
|
result->auto_detect = info.auto_detect;
|
|
26
27
|
|
|
27
28
|
vector<string> patterns;
|
|
@@ -40,16 +41,16 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
40
41
|
result->ignore_errors = BooleanValue::Get(kv.second);
|
|
41
42
|
} else if (loption == "maximum_object_size") {
|
|
42
43
|
result->maximum_object_size = MaxValue<idx_t>(UIntegerValue::Get(kv.second), result->maximum_object_size);
|
|
43
|
-
} else if (loption == "
|
|
44
|
+
} else if (loption == "lines") {
|
|
44
45
|
auto format = StringUtil::Lower(StringValue::Get(kv.second));
|
|
45
46
|
if (format == "auto") {
|
|
46
47
|
options.format = JSONFormat::AUTO_DETECT;
|
|
47
|
-
} else if (format == "
|
|
48
|
+
} else if (format == "false") {
|
|
48
49
|
options.format = JSONFormat::UNSTRUCTURED;
|
|
49
|
-
} else if (format == "
|
|
50
|
+
} else if (format == "true") {
|
|
50
51
|
options.format = JSONFormat::NEWLINE_DELIMITED;
|
|
51
52
|
} else {
|
|
52
|
-
throw BinderException("
|
|
53
|
+
throw BinderException("\"lines\" must be one of ['auto', 'true', 'false']");
|
|
53
54
|
}
|
|
54
55
|
} else if (loption == "compression") {
|
|
55
56
|
auto compression = StringUtil::Lower(StringValue::Get(kv.second));
|
|
@@ -74,10 +75,7 @@ void JSONScanData::InitializeFilePaths(ClientContext &context, const vector<stri
|
|
|
74
75
|
vector<string> &file_paths) {
|
|
75
76
|
auto &fs = FileSystem::GetFileSystem(context);
|
|
76
77
|
for (auto &file_pattern : patterns) {
|
|
77
|
-
auto found_files = fs.
|
|
78
|
-
if (found_files.empty()) {
|
|
79
|
-
throw FileSystem::MissingFileException(file_pattern, context);
|
|
80
|
-
}
|
|
78
|
+
auto found_files = fs.GlobFiles(file_pattern, context);
|
|
81
79
|
file_paths.insert(file_paths.end(), found_files.begin(), found_files.end());
|
|
82
80
|
}
|
|
83
81
|
}
|
|
@@ -98,6 +96,27 @@ void JSONScanData::InitializeFormats() {
|
|
|
98
96
|
if (!timestamp_format.empty()) {
|
|
99
97
|
date_format_map.AddFormat(LogicalTypeId::TIMESTAMP, timestamp_format);
|
|
100
98
|
}
|
|
99
|
+
|
|
100
|
+
if (auto_detect) {
|
|
101
|
+
static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
|
|
102
|
+
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
|
103
|
+
{LogicalTypeId::TIMESTAMP,
|
|
104
|
+
{"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
|
|
105
|
+
"%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// Populate possible date/timestamp formats, assume this is consistent across columns
|
|
109
|
+
for (auto &kv : FORMAT_TEMPLATES) {
|
|
110
|
+
const auto &type = kv.first;
|
|
111
|
+
if (date_format_map.HasFormats(type)) {
|
|
112
|
+
continue; // Already populated
|
|
113
|
+
}
|
|
114
|
+
const auto &format_strings = kv.second;
|
|
115
|
+
for (auto &format_string : format_strings) {
|
|
116
|
+
date_format_map.AddFormat(type, format_string);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
101
120
|
}
|
|
102
121
|
|
|
103
122
|
void JSONScanData::Serialize(FieldWriter &writer) {
|
|
@@ -112,9 +131,17 @@ void JSONScanData::Serialize(FieldWriter &writer) {
|
|
|
112
131
|
writer.WriteList<string>(names);
|
|
113
132
|
writer.WriteList<idx_t>(valid_cols);
|
|
114
133
|
writer.WriteField<idx_t>(max_depth);
|
|
115
|
-
writer.WriteField<
|
|
116
|
-
|
|
117
|
-
|
|
134
|
+
writer.WriteField<JSONRecordType>(record_type);
|
|
135
|
+
if (!date_format.empty()) {
|
|
136
|
+
writer.WriteString(date_format);
|
|
137
|
+
} else {
|
|
138
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::DATE).format_specifier);
|
|
139
|
+
}
|
|
140
|
+
if (!timestamp_format.empty()) {
|
|
141
|
+
writer.WriteString(timestamp_format);
|
|
142
|
+
} else {
|
|
143
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::TIMESTAMP).format_specifier);
|
|
144
|
+
}
|
|
118
145
|
}
|
|
119
146
|
|
|
120
147
|
void JSONScanData::Deserialize(FieldReader &reader) {
|
|
@@ -129,9 +156,12 @@ void JSONScanData::Deserialize(FieldReader &reader) {
|
|
|
129
156
|
names = reader.ReadRequiredList<string>();
|
|
130
157
|
valid_cols = reader.ReadRequiredList<idx_t>();
|
|
131
158
|
max_depth = reader.ReadRequired<idx_t>();
|
|
132
|
-
|
|
159
|
+
record_type = reader.ReadRequired<JSONRecordType>();
|
|
133
160
|
date_format = reader.ReadRequired<string>();
|
|
134
161
|
timestamp_format = reader.ReadRequired<string>();
|
|
162
|
+
|
|
163
|
+
InitializeFormats();
|
|
164
|
+
transform_options.date_format_map = &date_format_map;
|
|
135
165
|
}
|
|
136
166
|
|
|
137
167
|
JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &bind_data_p)
|
|
@@ -150,11 +180,11 @@ JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &b
|
|
|
150
180
|
}
|
|
151
181
|
|
|
152
182
|
JSONScanLocalState::JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate)
|
|
153
|
-
: batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
183
|
+
: scan_count(0), array_idx(0), array_offset(0), batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
154
184
|
json_allocator(BufferAllocator::Get(context)), current_reader(nullptr), current_buffer_handle(nullptr),
|
|
155
|
-
buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
185
|
+
is_last(false), buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
156
186
|
|
|
157
|
-
// Buffer to reconstruct JSON
|
|
187
|
+
// Buffer to reconstruct JSON values when they cross a buffer boundary
|
|
158
188
|
reconstruct_buffer = gstate.allocator.Allocate(gstate.bind_data.maximum_object_size + YYJSON_PADDING_SIZE);
|
|
159
189
|
|
|
160
190
|
// This is needed for JSONFormat::UNSTRUCTURED, to make use of YYJSON_READ_INSITU
|
|
@@ -174,11 +204,6 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
174
204
|
// Perform projection pushdown
|
|
175
205
|
if (bind_data.type == JSONScanType::READ_JSON) {
|
|
176
206
|
D_ASSERT(input.column_ids.size() <= bind_data.names.size()); // Can't project to have more columns
|
|
177
|
-
if (bind_data.auto_detect && input.column_ids.size() < bind_data.names.size()) {
|
|
178
|
-
// If we are auto-detecting, but don't need all columns present in the file,
|
|
179
|
-
// then we don't need to throw an error if we encounter an unseen column
|
|
180
|
-
bind_data.transform_options.error_unknown_key = false;
|
|
181
|
-
}
|
|
182
207
|
vector<string> names;
|
|
183
208
|
names.reserve(input.column_ids.size());
|
|
184
209
|
for (idx_t i = 0; i < input.column_ids.size(); i++) {
|
|
@@ -189,13 +214,37 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
189
214
|
names.push_back(std::move(bind_data.names[id]));
|
|
190
215
|
bind_data.valid_cols.push_back(i);
|
|
191
216
|
}
|
|
217
|
+
if (names.size() < bind_data.names.size()) {
|
|
218
|
+
// If we are auto-detecting, but don't need all columns present in the file,
|
|
219
|
+
// then we don't need to throw an error if we encounter an unseen column
|
|
220
|
+
bind_data.transform_options.error_unknown_key = false;
|
|
221
|
+
}
|
|
192
222
|
bind_data.names = std::move(names);
|
|
193
223
|
}
|
|
194
224
|
return result;
|
|
195
225
|
}
|
|
196
226
|
|
|
197
227
|
idx_t JSONGlobalTableFunctionState::MaxThreads() const {
|
|
198
|
-
|
|
228
|
+
auto &bind_data = state.bind_data;
|
|
229
|
+
|
|
230
|
+
auto num_files = bind_data.file_paths.size();
|
|
231
|
+
idx_t readers_per_file;
|
|
232
|
+
if (bind_data.options.format == JSONFormat::UNSTRUCTURED) {
|
|
233
|
+
// Unstructured necessitates single thread
|
|
234
|
+
readers_per_file = 1;
|
|
235
|
+
} else if (!state.json_readers.empty() && state.json_readers[0]->IsOpen()) {
|
|
236
|
+
auto &reader = *state.json_readers[0];
|
|
237
|
+
const auto &options = reader.GetOptions();
|
|
238
|
+
if (options.format == JSONFormat::UNSTRUCTURED || options.compression != FileCompressionType::UNCOMPRESSED) {
|
|
239
|
+
// Auto-detected unstructured - same story, compression also really limits parallelism
|
|
240
|
+
readers_per_file = 1;
|
|
241
|
+
} else {
|
|
242
|
+
return state.system_threads;
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
return state.system_threads;
|
|
246
|
+
}
|
|
247
|
+
return num_files * readers_per_file;
|
|
199
248
|
}
|
|
200
249
|
|
|
201
250
|
JSONLocalTableFunctionState::JSONLocalTableFunctionState(ClientContext &context, JSONScanGlobalState &gstate)
|
|
@@ -231,6 +280,12 @@ static inline void SkipWhitespace(const char *buffer_ptr, idx_t &buffer_offset,
|
|
|
231
280
|
idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
232
281
|
json_allocator.Reset();
|
|
233
282
|
|
|
283
|
+
if ((gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
|
284
|
+
gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) &&
|
|
285
|
+
array_idx < scan_count) {
|
|
286
|
+
return GetObjectsFromArray(gstate);
|
|
287
|
+
}
|
|
288
|
+
|
|
234
289
|
idx_t count = 0;
|
|
235
290
|
if (buffer_offset == buffer_size) {
|
|
236
291
|
if (!ReadNextBuffer(gstate)) {
|
|
@@ -254,10 +309,18 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
|
254
309
|
default:
|
|
255
310
|
throw InternalException("Unknown JSON format");
|
|
256
311
|
}
|
|
312
|
+
scan_count = count;
|
|
257
313
|
|
|
258
314
|
// Skip over any remaining whitespace for the next scan
|
|
259
315
|
SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
|
|
260
316
|
|
|
317
|
+
if (gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
|
318
|
+
gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
|
|
319
|
+
array_idx = 0;
|
|
320
|
+
array_offset = 0;
|
|
321
|
+
return GetObjectsFromArray(gstate);
|
|
322
|
+
}
|
|
323
|
+
|
|
261
324
|
return count;
|
|
262
325
|
}
|
|
263
326
|
|
|
@@ -332,10 +395,48 @@ yyjson_val *JSONScanLocalState::ParseLine(char *line_start, idx_t line_size, idx
|
|
|
332
395
|
}
|
|
333
396
|
}
|
|
334
397
|
|
|
398
|
+
idx_t JSONScanLocalState::GetObjectsFromArray(JSONScanGlobalState &gstate) {
|
|
399
|
+
idx_t arr_count = 0;
|
|
400
|
+
|
|
401
|
+
size_t idx, max;
|
|
402
|
+
yyjson_val *val;
|
|
403
|
+
for (; array_idx < scan_count; array_idx++, array_offset = 0) {
|
|
404
|
+
auto &value = values[array_idx];
|
|
405
|
+
if (!value) {
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
408
|
+
if (unsafe_yyjson_is_arr(value)) {
|
|
409
|
+
yyjson_arr_foreach(value, idx, max, val) {
|
|
410
|
+
if (idx < array_offset) {
|
|
411
|
+
continue;
|
|
412
|
+
}
|
|
413
|
+
array_values[arr_count++] = val;
|
|
414
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
415
|
+
break;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
array_offset = idx + 1;
|
|
419
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
422
|
+
} else if (!gstate.bind_data.ignore_errors) {
|
|
423
|
+
ThrowTransformError(
|
|
424
|
+
array_idx,
|
|
425
|
+
StringUtil::Format("Expected JSON ARRAY but got %s: %s\nTry setting json_format to 'records'",
|
|
426
|
+
JSONCommon::ValTypeToString(value), JSONCommon::ValToString(value, 50)));
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
return arr_count;
|
|
430
|
+
}
|
|
431
|
+
|
|
335
432
|
bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
336
433
|
if (current_reader) {
|
|
337
434
|
D_ASSERT(current_buffer_handle);
|
|
338
435
|
current_reader->SetBufferLineOrObjectCount(current_buffer_handle->buffer_index, lines_or_objects_in_buffer);
|
|
436
|
+
if (is_last && gstate.bind_data.type != JSONScanType::SAMPLE) {
|
|
437
|
+
// Close files that are done if we're not sampling
|
|
438
|
+
current_reader->CloseJSONFile();
|
|
439
|
+
}
|
|
339
440
|
}
|
|
340
441
|
|
|
341
442
|
AllocatedData buffer;
|
|
@@ -396,7 +497,9 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
396
497
|
// Unopened file
|
|
397
498
|
current_reader->OpenJSONFile();
|
|
398
499
|
batch_index = gstate.batch_index++;
|
|
399
|
-
if (options.format == JSONFormat::UNSTRUCTURED
|
|
500
|
+
if (options.format == JSONFormat::UNSTRUCTURED || (options.format == JSONFormat::NEWLINE_DELIMITED &&
|
|
501
|
+
options.compression != FileCompressionType::UNCOMPRESSED &&
|
|
502
|
+
gstate.file_index < gstate.json_readers.size())) {
|
|
400
503
|
gstate.file_index++; // UNSTRUCTURED necessitates single-threaded read
|
|
401
504
|
}
|
|
402
505
|
if (options.format != JSONFormat::AUTO_DETECT) {
|
|
@@ -450,9 +553,6 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
450
553
|
auto json_buffer_handle = make_unique<JSONBufferHandle>(buffer_index, readers, std::move(buffer), buffer_size);
|
|
451
554
|
current_buffer_handle = json_buffer_handle.get();
|
|
452
555
|
current_reader->InsertBuffer(buffer_index, std::move(json_buffer_handle));
|
|
453
|
-
if (!current_reader->GetFileHandle().PlainFileSource() && gstate.bind_data.type == JSONScanType::SAMPLE) {
|
|
454
|
-
// TODO: store buffer
|
|
455
|
-
}
|
|
456
556
|
|
|
457
557
|
buffer_offset = 0;
|
|
458
558
|
prev_buffer_remainder = 0;
|
|
@@ -508,16 +608,18 @@ void JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, idx_t &
|
|
|
508
608
|
}
|
|
509
609
|
|
|
510
610
|
void JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, idx_t &buffer_index) {
|
|
511
|
-
auto &file_handle = current_reader->GetFileHandle();
|
|
512
|
-
|
|
513
611
|
idx_t request_size = gstate.buffer_capacity - prev_buffer_remainder - YYJSON_PADDING_SIZE;
|
|
514
612
|
idx_t read_size;
|
|
515
613
|
{
|
|
516
614
|
lock_guard<mutex> reader_guard(current_reader->lock);
|
|
517
615
|
buffer_index = current_reader->GetBufferIndex();
|
|
518
616
|
|
|
519
|
-
|
|
520
|
-
|
|
617
|
+
if (current_reader->IsOpen()) {
|
|
618
|
+
read_size = current_reader->GetFileHandle().Read(buffer_ptr + prev_buffer_remainder, request_size,
|
|
619
|
+
gstate.bind_data.type == JSONScanType::SAMPLE);
|
|
620
|
+
} else {
|
|
621
|
+
read_size = 0;
|
|
622
|
+
}
|
|
521
623
|
is_last = read_size < request_size;
|
|
522
624
|
|
|
523
625
|
if (!gstate.bind_data.ignore_errors && read_size == 0 && prev_buffer_remainder != 0) {
|
|
@@ -579,10 +681,15 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
|
|
|
579
681
|
current_reader->RemoveBuffer(current_buffer_handle->buffer_index - 1);
|
|
580
682
|
}
|
|
581
683
|
|
|
582
|
-
|
|
684
|
+
values[0] = ParseLine((char *)reconstruct_ptr, line_size, line_size, lines[0]);
|
|
583
685
|
}
|
|
584
686
|
|
|
585
687
|
void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
688
|
+
// yyjson does not always return YYJSON_READ_ERROR_UNEXPECTED_END properly
|
|
689
|
+
// if a different error code happens within the last 50 bytes
|
|
690
|
+
// we assume it should be YYJSON_READ_ERROR_UNEXPECTED_END instead
|
|
691
|
+
static constexpr idx_t END_BOUND = 50;
|
|
692
|
+
|
|
586
693
|
const auto max_obj_size = reconstruct_buffer.GetSize();
|
|
587
694
|
yyjson_read_err error;
|
|
588
695
|
for (; count < STANDARD_VECTOR_SIZE; count++) {
|
|
@@ -608,8 +715,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
|
608
715
|
} else if (error.pos > max_obj_size) {
|
|
609
716
|
current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error,
|
|
610
717
|
"Try increasing \"maximum_object_size\".");
|
|
611
|
-
|
|
612
|
-
} else if (error.code == YYJSON_READ_ERROR_UNEXPECTED_END && !is_last) {
|
|
718
|
+
} else if (!is_last && (error.code == YYJSON_READ_ERROR_UNEXPECTED_END || remaining - error.pos < END_BOUND)) {
|
|
613
719
|
// Copy remaining to reconstruct_buffer
|
|
614
720
|
const auto reconstruct_ptr = reconstruct_buffer.get();
|
|
615
721
|
memcpy(reconstruct_ptr, obj_copy_start, remaining);
|
|
@@ -619,7 +725,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
|
619
725
|
} else {
|
|
620
726
|
current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error);
|
|
621
727
|
}
|
|
622
|
-
|
|
728
|
+
values[count] = read_doc->root;
|
|
623
729
|
}
|
|
624
730
|
}
|
|
625
731
|
|
|
@@ -645,7 +751,7 @@ void JSONScanLocalState::ReadNewlineDelimited(idx_t &count) {
|
|
|
645
751
|
}
|
|
646
752
|
idx_t line_size = line_end - line_start;
|
|
647
753
|
|
|
648
|
-
|
|
754
|
+
values[count] = ParseLine((char *)line_start, line_size, remaining, lines[count]);
|
|
649
755
|
|
|
650
756
|
buffer_offset += line_size;
|
|
651
757
|
SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
|
|
@@ -656,11 +762,11 @@ yyjson_alc *JSONScanLocalState::GetAllocator() {
|
|
|
656
762
|
return json_allocator.GetYYJSONAllocator();
|
|
657
763
|
}
|
|
658
764
|
|
|
659
|
-
void JSONScanLocalState::ThrowTransformError(idx_t
|
|
765
|
+
void JSONScanLocalState::ThrowTransformError(idx_t object_index, const string &error_message) {
|
|
660
766
|
D_ASSERT(current_reader);
|
|
661
767
|
D_ASSERT(current_buffer_handle);
|
|
662
768
|
D_ASSERT(object_index != DConstants::INVALID_INDEX);
|
|
663
|
-
auto line_or_object_in_buffer = lines_or_objects_in_buffer -
|
|
769
|
+
auto line_or_object_in_buffer = lines_or_objects_in_buffer - scan_count + object_index;
|
|
664
770
|
current_reader->ThrowTransformError(current_buffer_handle->buffer_index, line_or_object_in_buffer, error_message);
|
|
665
771
|
}
|
|
666
772
|
|
|
@@ -589,6 +589,7 @@ void StringColumnReader::PrepareDeltaLengthByteArray(ResizeableBuffer &buffer) {
|
|
|
589
589
|
}
|
|
590
590
|
auto length_data = (uint32_t *)length_buffer->ptr;
|
|
591
591
|
byte_array_data = make_unique<Vector>(LogicalType::VARCHAR, value_count);
|
|
592
|
+
byte_array_count = value_count;
|
|
592
593
|
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
|
|
593
594
|
for (idx_t i = 0; i < value_count; i++) {
|
|
594
595
|
auto str_len = length_data[i];
|
|
@@ -615,6 +616,7 @@ void StringColumnReader::PrepareDeltaByteArray(ResizeableBuffer &buffer) {
|
|
|
615
616
|
auto prefix_data = (uint32_t *)prefix_buffer->ptr;
|
|
616
617
|
auto suffix_data = (uint32_t *)suffix_buffer->ptr;
|
|
617
618
|
byte_array_data = make_unique<Vector>(LogicalType::VARCHAR, prefix_count);
|
|
619
|
+
byte_array_count = prefix_count;
|
|
618
620
|
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
|
|
619
621
|
for (idx_t i = 0; i < prefix_count; i++) {
|
|
620
622
|
auto str_len = prefix_data[i] + suffix_data[i];
|
|
@@ -646,6 +648,11 @@ void StringColumnReader::DeltaByteArray(uint8_t *defines, idx_t num_values, parq
|
|
|
646
648
|
continue;
|
|
647
649
|
}
|
|
648
650
|
if (filter[row_idx + result_offset]) {
|
|
651
|
+
if (delta_offset >= byte_array_count) {
|
|
652
|
+
throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
|
|
653
|
+
"read of %d from %d entries) - corrupt file?",
|
|
654
|
+
delta_offset + 1, byte_array_count);
|
|
655
|
+
}
|
|
649
656
|
result_ptr[row_idx + result_offset] = string_data[delta_offset++];
|
|
650
657
|
} else {
|
|
651
658
|
delta_offset++;
|
|
@@ -221,10 +221,7 @@ public:
|
|
|
221
221
|
}
|
|
222
222
|
|
|
223
223
|
FileSystem &fs = FileSystem::GetFileSystem(context);
|
|
224
|
-
auto files = fs.
|
|
225
|
-
if (files.empty()) {
|
|
226
|
-
throw FileSystem::MissingFileException(info.file_path, context);
|
|
227
|
-
}
|
|
224
|
+
auto files = fs.GlobFiles(info.file_path, context);
|
|
228
225
|
|
|
229
226
|
// The most likely path (Parquet read without union by name option)
|
|
230
227
|
if (!parquet_options.union_by_name) {
|
|
@@ -362,12 +359,7 @@ public:
|
|
|
362
359
|
}
|
|
363
360
|
|
|
364
361
|
static vector<string> ParquetGlob(FileSystem &fs, const string &glob, ClientContext &context) {
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
if (files.empty()) {
|
|
368
|
-
throw FileSystem::MissingFileException(glob, context);
|
|
369
|
-
}
|
|
370
|
-
return files;
|
|
362
|
+
return fs.GlobFiles(glob, context);
|
|
371
363
|
}
|
|
372
364
|
|
|
373
365
|
static unique_ptr<FunctionData> ParquetScanBind(ClientContext &context, TableFunctionBindInput &input,
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
#include "duckdb/catalog/catalog_search_path.hpp"
|
|
4
4
|
#include "duckdb/catalog/catalog_entry/list.hpp"
|
|
5
|
+
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
|
5
6
|
#include "duckdb/catalog/catalog_set.hpp"
|
|
6
7
|
#include "duckdb/catalog/default/default_schemas.hpp"
|
|
7
8
|
#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
|
|
@@ -26,7 +27,7 @@
|
|
|
26
27
|
#include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
|
|
27
28
|
#include "duckdb/planner/binder.hpp"
|
|
28
29
|
#include "duckdb/catalog/default/default_types.hpp"
|
|
29
|
-
#include "duckdb/main/
|
|
30
|
+
#include "duckdb/main/extension_entries.hpp"
|
|
30
31
|
#include "duckdb/main/connection.hpp"
|
|
31
32
|
#include "duckdb/main/attached_database.hpp"
|
|
32
33
|
#include "duckdb/main/database_manager.hpp"
|
|
@@ -251,6 +252,20 @@ CatalogEntry *Catalog::CreateCollation(CatalogTransaction transaction, SchemaCat
|
|
|
251
252
|
return schema->CreateCollation(transaction, info);
|
|
252
253
|
}
|
|
253
254
|
|
|
255
|
+
//===--------------------------------------------------------------------===//
|
|
256
|
+
// Index
|
|
257
|
+
//===--------------------------------------------------------------------===//
|
|
258
|
+
CatalogEntry *Catalog::CreateIndex(CatalogTransaction transaction, CreateIndexInfo *info) {
|
|
259
|
+
auto &context = transaction.GetContext();
|
|
260
|
+
return CreateIndex(context, info);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
CatalogEntry *Catalog::CreateIndex(ClientContext &context, CreateIndexInfo *info) {
|
|
264
|
+
auto schema = GetSchema(context, info->schema);
|
|
265
|
+
auto table = GetEntry<TableCatalogEntry>(context, schema->name, info->table->table_name);
|
|
266
|
+
return schema->CreateIndex(context, info, table);
|
|
267
|
+
}
|
|
268
|
+
|
|
254
269
|
//===--------------------------------------------------------------------===//
|
|
255
270
|
// Lookup Structures
|
|
256
271
|
//===--------------------------------------------------------------------===//
|
|
@@ -317,17 +332,26 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
|
|
|
317
332
|
return result;
|
|
318
333
|
}
|
|
319
334
|
|
|
320
|
-
string
|
|
321
|
-
auto
|
|
322
|
-
auto it = std::lower_bound(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
|
|
335
|
+
string FindExtensionGeneric(const string &name, const ExtensionEntry entries[], idx_t size) {
|
|
336
|
+
auto lcase = StringUtil::Lower(name);
|
|
337
|
+
auto it = std::lower_bound(entries, entries + size, lcase,
|
|
338
|
+
[](const ExtensionEntry &element, const string &value) { return element.name < value; });
|
|
339
|
+
if (it != entries + size && it->name == lcase) {
|
|
326
340
|
return it->extension;
|
|
327
341
|
}
|
|
328
342
|
return "";
|
|
329
343
|
}
|
|
330
344
|
|
|
345
|
+
string FindExtensionForFunction(const string &name) {
|
|
346
|
+
idx_t size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionEntry);
|
|
347
|
+
return FindExtensionGeneric(name, EXTENSION_FUNCTIONS, size);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
string FindExtensionForSetting(const string &name) {
|
|
351
|
+
idx_t size = sizeof(EXTENSION_SETTINGS) / sizeof(ExtensionEntry);
|
|
352
|
+
return FindExtensionGeneric(name, EXTENSION_SETTINGS, size);
|
|
353
|
+
}
|
|
354
|
+
|
|
331
355
|
vector<CatalogSearchEntry> GetCatalogEntries(ClientContext &context, const string &catalog, const string &schema) {
|
|
332
356
|
vector<CatalogSearchEntry> entries;
|
|
333
357
|
auto &search_path = *context.client_data->catalog_search_path;
|
|
@@ -392,6 +416,26 @@ void FindMinimalQualification(ClientContext &context, const string &catalog_name
|
|
|
392
416
|
qualify_schema = true;
|
|
393
417
|
}
|
|
394
418
|
|
|
419
|
+
CatalogException Catalog::UnrecognizedConfigurationError(ClientContext &context, const string &name) {
|
|
420
|
+
// check if the setting exists in any extensions
|
|
421
|
+
auto extension_name = FindExtensionForSetting(name);
|
|
422
|
+
if (!extension_name.empty()) {
|
|
423
|
+
return CatalogException(
|
|
424
|
+
"Setting with name \"%s\" is not in the catalog, but it exists in the %s extension.\n\nTo "
|
|
425
|
+
"install and load the extension, run:\nINSTALL %s;\nLOAD %s;",
|
|
426
|
+
name, extension_name, extension_name, extension_name);
|
|
427
|
+
}
|
|
428
|
+
// the setting is not in an extension
|
|
429
|
+
// get a list of all options
|
|
430
|
+
vector<string> potential_names = DBConfig::GetOptionNames();
|
|
431
|
+
for (auto &entry : DBConfig::GetConfig(context).extension_parameters) {
|
|
432
|
+
potential_names.push_back(entry.first);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
throw CatalogException("unrecognized configuration parameter \"%s\"\n%s", name,
|
|
436
|
+
StringUtil::CandidatesErrorMessage(potential_names, name, "Did you mean"));
|
|
437
|
+
}
|
|
438
|
+
|
|
395
439
|
CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
|
|
396
440
|
CatalogType type,
|
|
397
441
|
const unordered_set<SchemaCatalogEntry *> &schemas,
|
|
@@ -408,13 +452,18 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
|
|
|
408
452
|
unseen_schemas.insert(current_schema);
|
|
409
453
|
}
|
|
410
454
|
}
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
455
|
+
// check if the entry exists in any extension
|
|
456
|
+
if (type == CatalogType::TABLE_FUNCTION_ENTRY || type == CatalogType::SCALAR_FUNCTION_ENTRY ||
|
|
457
|
+
type == CatalogType::AGGREGATE_FUNCTION_ENTRY) {
|
|
458
|
+
auto extension_name = FindExtensionForFunction(entry_name);
|
|
459
|
+
if (!extension_name.empty()) {
|
|
460
|
+
return CatalogException(
|
|
461
|
+
"Function with name \"%s\" is not in the catalog, but it exists in the %s extension.\n\nTo "
|
|
462
|
+
"install and load the extension, run:\nINSTALL %s;\nLOAD %s;",
|
|
463
|
+
entry_name, extension_name, extension_name, extension_name);
|
|
464
|
+
}
|
|
417
465
|
}
|
|
466
|
+
auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
|
|
418
467
|
string did_you_mean;
|
|
419
468
|
if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
|
|
420
469
|
// the closest matching entry requires qualification as it is not in the default search path
|
|
@@ -19,10 +19,11 @@ string IndexCatalogEntry::ToSQL() {
|
|
|
19
19
|
return sql;
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
void IndexCatalogEntry::Serialize(
|
|
23
|
-
//
|
|
24
|
-
// schema name, table name, index name, sql, index type, index constraint type, expression list
|
|
25
|
-
//
|
|
22
|
+
void IndexCatalogEntry::Serialize(Serializer &serializer) {
|
|
23
|
+
// here we serialize the index metadata in the following order:
|
|
24
|
+
// schema name, table name, index name, sql, index type, index constraint type, expression list, parsed expressions,
|
|
25
|
+
// column IDs
|
|
26
|
+
|
|
26
27
|
FieldWriter writer(serializer);
|
|
27
28
|
writer.WriteString(GetSchemaName());
|
|
28
29
|
writer.WriteString(GetTableName());
|
|
@@ -37,9 +38,9 @@ void IndexCatalogEntry::Serialize(duckdb::MetaBlockWriter &serializer) {
|
|
|
37
38
|
}
|
|
38
39
|
|
|
39
40
|
unique_ptr<CreateIndexInfo> IndexCatalogEntry::Deserialize(Deserializer &source, ClientContext &context) {
|
|
40
|
-
//
|
|
41
|
-
//
|
|
42
|
-
// list
|
|
41
|
+
// here we deserialize the index metadata in the following order:
|
|
42
|
+
// schema name, table schema name, table name, index name, sql, index type, index constraint type, expression list,
|
|
43
|
+
// parsed expression list, column IDs
|
|
43
44
|
|
|
44
45
|
auto create_index_info = make_unique<CreateIndexInfo>();
|
|
45
46
|
|
|
@@ -24,7 +24,7 @@ SimilarCatalogEntry SchemaCatalogEntry::GetSimilarEntry(CatalogTransaction trans
|
|
|
24
24
|
const string &name) {
|
|
25
25
|
SimilarCatalogEntry result;
|
|
26
26
|
Scan(transaction.GetContext(), type, [&](CatalogEntry *entry) {
|
|
27
|
-
auto ldist = StringUtil::
|
|
27
|
+
auto ldist = StringUtil::SimilarityScore(entry->name, name);
|
|
28
28
|
if (ldist < result.distance) {
|
|
29
29
|
result.distance = ldist;
|
|
30
30
|
result.name = entry->name;
|
|
@@ -460,7 +460,7 @@ SimilarCatalogEntry CatalogSet::SimilarEntry(CatalogTransaction transaction, con
|
|
|
460
460
|
for (auto &kv : mapping) {
|
|
461
461
|
auto mapping_value = GetMapping(transaction, kv.first);
|
|
462
462
|
if (mapping_value && !mapping_value->deleted) {
|
|
463
|
-
auto ldist = StringUtil::
|
|
463
|
+
auto ldist = StringUtil::SimilarityScore(kv.first, name);
|
|
464
464
|
if (ldist < result.distance) {
|
|
465
465
|
result.distance = ldist;
|
|
466
466
|
result.name = kv.first;
|
|
@@ -93,6 +93,7 @@ static DefaultMacro internal_macros[] = {
|
|
|
93
93
|
{DEFAULT_SCHEMA, "fdiv", {"x", "y", nullptr}, "floor(x/y)"},
|
|
94
94
|
{DEFAULT_SCHEMA, "fmod", {"x", "y", nullptr}, "(x-y*floor(x/y))"},
|
|
95
95
|
{DEFAULT_SCHEMA, "count_if", {"l", nullptr}, "sum(if(l, 1, 0))"},
|
|
96
|
+
{DEFAULT_SCHEMA, "split_part", {"string", "delimiter", "position", nullptr}, "coalesce(string_split(string, delimiter)[position],'')"},
|
|
96
97
|
|
|
97
98
|
// algebraic list aggregates
|
|
98
99
|
{DEFAULT_SCHEMA, "list_avg", {"l", nullptr}, "list_aggr(l, 'avg')"},
|
|
@@ -48,7 +48,7 @@ static DefaultView internal_views[] = {
|
|
|
48
48
|
{"pg_catalog", "pg_views", "SELECT schema_name schemaname, view_name viewname, 'duckdb' viewowner, sql definition FROM duckdb_views()"},
|
|
49
49
|
{"information_schema", "columns", "SELECT database_name table_catalog, schema_name table_schema, table_name, column_name, column_index ordinal_position, column_default, CASE WHEN is_nullable THEN 'YES' ELSE 'NO' END is_nullable, data_type, character_maximum_length, NULL character_octet_length, numeric_precision, numeric_precision_radix, numeric_scale, NULL datetime_precision, NULL interval_type, NULL interval_precision, NULL character_set_catalog, NULL character_set_schema, NULL character_set_name, NULL collation_catalog, NULL collation_schema, NULL collation_name, NULL domain_catalog, NULL domain_schema, NULL domain_name, NULL udt_catalog, NULL udt_schema, NULL udt_name, NULL scope_catalog, NULL scope_schema, NULL scope_name, NULL maximum_cardinality, NULL dtd_identifier, NULL is_self_referencing, NULL is_identity, NULL identity_generation, NULL identity_start, NULL identity_increment, NULL identity_maximum, NULL identity_minimum, NULL identity_cycle, NULL is_generated, NULL generation_expression, NULL is_updatable FROM duckdb_columns;"},
|
|
50
50
|
{"information_schema", "schemata", "SELECT database_name catalog_name, schema_name, 'duckdb' schema_owner, NULL default_character_set_catalog, NULL default_character_set_schema, NULL default_character_set_name, sql sql_path FROM duckdb_schemas()"},
|
|
51
|
-
{"information_schema", "tables", "SELECT database_name table_catalog, schema_name table_schema, table_name, CASE WHEN temporary THEN 'LOCAL TEMPORARY' ELSE 'BASE TABLE' END table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'YES' is_insertable_into, 'NO' is_typed, CASE WHEN temporary THEN 'PRESERVE' ELSE NULL END commit_action FROM duckdb_tables() UNION ALL SELECT
|
|
51
|
+
{"information_schema", "tables", "SELECT database_name table_catalog, schema_name table_schema, table_name, CASE WHEN temporary THEN 'LOCAL TEMPORARY' ELSE 'BASE TABLE' END table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'YES' is_insertable_into, 'NO' is_typed, CASE WHEN temporary THEN 'PRESERVE' ELSE NULL END commit_action FROM duckdb_tables() UNION ALL SELECT database_name table_catalog, schema_name table_schema, view_name table_name, 'VIEW' table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'NO' is_insertable_into, 'NO' is_typed, NULL commit_action FROM duckdb_views;"},
|
|
52
52
|
{nullptr, nullptr, nullptr}};
|
|
53
53
|
|
|
54
54
|
static unique_ptr<CreateViewInfo> GetDefaultView(ClientContext &context, const string &input_schema, const string &input_name) {
|