duckdb 0.7.1-dev37.0 → 0.7.1-dev415.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/binding.gyp +7 -7
- package/package.json +3 -3
- package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
- package/src/duckdb/extension/json/include/json_scan.hpp +45 -10
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
- package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +54 -10
- package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
- package/src/duckdb/extension/json/json_functions.cpp +7 -0
- package/src/duckdb/extension/json/json_scan.cpp +144 -37
- package/src/duckdb/extension/parquet/column_reader.cpp +7 -0
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -9
- package/src/duckdb/src/catalog/catalog.cpp +62 -13
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
- package/src/duckdb/src/common/bind_helpers.cpp +55 -0
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
- package/src/duckdb/src/common/file_system.cpp +28 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
- package/src/duckdb/src/common/local_file_system.cpp +4 -4
- package/src/duckdb/src/common/operator/cast_operators.cpp +10 -4
- package/src/duckdb/src/common/string_util.cpp +8 -4
- package/src/duckdb/src/common/types/partitioned_column_data.cpp +1 -0
- package/src/duckdb/src/common/types/time.cpp +1 -1
- package/src/duckdb/src/common/types/timestamp.cpp +35 -4
- package/src/duckdb/src/common/types.cpp +37 -11
- package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
- package/src/duckdb/src/execution/index/art/art.cpp +117 -67
- package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
- package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
- package/src/duckdb/src/execution/index/art/node.cpp +13 -27
- package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
- package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
- package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
- package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
- package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -27
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -9
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -9
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +6 -11
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +13 -13
- package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +37 -0
- package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -5
- package/src/duckdb/src/execution/physical_operator.cpp +6 -6
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +4 -0
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -0
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
- package/src/duckdb/src/function/scalar/generic/current_setting.cpp +2 -2
- package/src/duckdb/src/function/scalar/map/map.cpp +69 -21
- package/src/duckdb/src/function/table/read_csv.cpp +17 -5
- package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +3 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +9 -2
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +32 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -3
- package/src/duckdb/src/include/duckdb/main/{extension_functions.hpp → extension_entries.hpp} +26 -5
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +32 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/statement/detach_statement.hpp +29 -0
- package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
- package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +7 -0
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
- package/src/duckdb/src/main/client_context.cpp +2 -0
- package/src/duckdb/src/main/config.cpp +1 -0
- package/src/duckdb/src/main/database.cpp +14 -5
- package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
- package/src/duckdb/src/main/extension/extension_install.cpp +43 -9
- package/src/duckdb/src/main/extension/extension_load.cpp +29 -5
- package/src/duckdb/src/main/settings/settings.cpp +16 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
- package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
- package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
- package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
- package/src/duckdb/src/parser/statement/detach_statement.cpp +15 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
- package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
- package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_detach.cpp +19 -0
- package/src/duckdb/src/parser/transformer.cpp +2 -0
- package/src/duckdb/src/planner/bind_context.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -14
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +16 -14
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_detach.cpp +19 -0
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +29 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
- package/src/duckdb/src/planner/binder.cpp +2 -0
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
- package/src/duckdb/src/planner/logical_operator.cpp +6 -1
- package/src/duckdb/src/planner/planner.cpp +1 -0
- package/src/duckdb/src/storage/buffer_manager.cpp +105 -26
- package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
- package/src/duckdb/src/storage/data_table.cpp +66 -3
- package/src/duckdb/src/storage/index.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +1 -1
- package/src/duckdb/src/storage/table/column_data.cpp +4 -2
- package/src/duckdb/src/storage/table/update_segment.cpp +15 -0
- package/src/duckdb/src/storage/table_index_list.cpp +1 -2
- package/src/duckdb/src/storage/wal_replay.cpp +68 -0
- package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -2
- package/src/duckdb/third_party/concurrentqueue/blockingconcurrentqueue.h +2 -2
- package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +14 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +530 -1006
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17659 -17626
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_src_execution_operator_schema.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_parser_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -0
- package/src/statement.cpp +46 -12
- package/test/arrow.test.ts +3 -3
- package/test/prepare.test.ts +39 -1
- package/test/typescript_decls.test.ts +1 -1
- package/src/duckdb/src/include/duckdb/function/create_database_extension.hpp +0 -37
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "json_scan.hpp"
|
|
2
2
|
|
|
3
3
|
#include "duckdb/main/database.hpp"
|
|
4
|
+
#include "duckdb/main/extension_helper.hpp"
|
|
4
5
|
#include "duckdb/parallel/task_scheduler.hpp"
|
|
5
6
|
#include "duckdb/storage/buffer_manager.hpp"
|
|
6
7
|
|
|
@@ -19,8 +20,9 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
19
20
|
auto &options = result->options;
|
|
20
21
|
|
|
21
22
|
auto &info = (JSONScanInfo &)*input.info;
|
|
22
|
-
options.format = info.format;
|
|
23
23
|
result->type = info.type;
|
|
24
|
+
options.format = info.format;
|
|
25
|
+
result->record_type = info.record_type;
|
|
24
26
|
result->auto_detect = info.auto_detect;
|
|
25
27
|
|
|
26
28
|
vector<string> patterns;
|
|
@@ -39,16 +41,16 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
39
41
|
result->ignore_errors = BooleanValue::Get(kv.second);
|
|
40
42
|
} else if (loption == "maximum_object_size") {
|
|
41
43
|
result->maximum_object_size = MaxValue<idx_t>(UIntegerValue::Get(kv.second), result->maximum_object_size);
|
|
42
|
-
} else if (loption == "
|
|
44
|
+
} else if (loption == "lines") {
|
|
43
45
|
auto format = StringUtil::Lower(StringValue::Get(kv.second));
|
|
44
46
|
if (format == "auto") {
|
|
45
47
|
options.format = JSONFormat::AUTO_DETECT;
|
|
46
|
-
} else if (format == "
|
|
48
|
+
} else if (format == "false") {
|
|
47
49
|
options.format = JSONFormat::UNSTRUCTURED;
|
|
48
|
-
} else if (format == "
|
|
50
|
+
} else if (format == "true") {
|
|
49
51
|
options.format = JSONFormat::NEWLINE_DELIMITED;
|
|
50
52
|
} else {
|
|
51
|
-
throw BinderException("
|
|
53
|
+
throw BinderException("\"lines\" must be one of ['auto', 'true', 'false']");
|
|
52
54
|
}
|
|
53
55
|
} else if (loption == "compression") {
|
|
54
56
|
auto compression = StringUtil::Lower(StringValue::Get(kv.second));
|
|
@@ -73,10 +75,7 @@ void JSONScanData::InitializeFilePaths(ClientContext &context, const vector<stri
|
|
|
73
75
|
vector<string> &file_paths) {
|
|
74
76
|
auto &fs = FileSystem::GetFileSystem(context);
|
|
75
77
|
for (auto &file_pattern : patterns) {
|
|
76
|
-
auto found_files = fs.
|
|
77
|
-
if (found_files.empty()) {
|
|
78
|
-
throw IOException("No files found that match the pattern \"%s\"", file_pattern);
|
|
79
|
-
}
|
|
78
|
+
auto found_files = fs.GlobFiles(file_pattern, context);
|
|
80
79
|
file_paths.insert(file_paths.end(), found_files.begin(), found_files.end());
|
|
81
80
|
}
|
|
82
81
|
}
|
|
@@ -97,6 +96,27 @@ void JSONScanData::InitializeFormats() {
|
|
|
97
96
|
if (!timestamp_format.empty()) {
|
|
98
97
|
date_format_map.AddFormat(LogicalTypeId::TIMESTAMP, timestamp_format);
|
|
99
98
|
}
|
|
99
|
+
|
|
100
|
+
if (auto_detect) {
|
|
101
|
+
static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
|
|
102
|
+
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
|
103
|
+
{LogicalTypeId::TIMESTAMP,
|
|
104
|
+
{"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
|
|
105
|
+
"%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// Populate possible date/timestamp formats, assume this is consistent across columns
|
|
109
|
+
for (auto &kv : FORMAT_TEMPLATES) {
|
|
110
|
+
const auto &type = kv.first;
|
|
111
|
+
if (date_format_map.HasFormats(type)) {
|
|
112
|
+
continue; // Already populated
|
|
113
|
+
}
|
|
114
|
+
const auto &format_strings = kv.second;
|
|
115
|
+
for (auto &format_string : format_strings) {
|
|
116
|
+
date_format_map.AddFormat(type, format_string);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
100
120
|
}
|
|
101
121
|
|
|
102
122
|
void JSONScanData::Serialize(FieldWriter &writer) {
|
|
@@ -111,9 +131,17 @@ void JSONScanData::Serialize(FieldWriter &writer) {
|
|
|
111
131
|
writer.WriteList<string>(names);
|
|
112
132
|
writer.WriteList<idx_t>(valid_cols);
|
|
113
133
|
writer.WriteField<idx_t>(max_depth);
|
|
114
|
-
writer.WriteField<
|
|
115
|
-
|
|
116
|
-
|
|
134
|
+
writer.WriteField<JSONRecordType>(record_type);
|
|
135
|
+
if (!date_format.empty()) {
|
|
136
|
+
writer.WriteString(date_format);
|
|
137
|
+
} else {
|
|
138
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::DATE).format_specifier);
|
|
139
|
+
}
|
|
140
|
+
if (!timestamp_format.empty()) {
|
|
141
|
+
writer.WriteString(timestamp_format);
|
|
142
|
+
} else {
|
|
143
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::TIMESTAMP).format_specifier);
|
|
144
|
+
}
|
|
117
145
|
}
|
|
118
146
|
|
|
119
147
|
void JSONScanData::Deserialize(FieldReader &reader) {
|
|
@@ -128,9 +156,12 @@ void JSONScanData::Deserialize(FieldReader &reader) {
|
|
|
128
156
|
names = reader.ReadRequiredList<string>();
|
|
129
157
|
valid_cols = reader.ReadRequiredList<idx_t>();
|
|
130
158
|
max_depth = reader.ReadRequired<idx_t>();
|
|
131
|
-
|
|
159
|
+
record_type = reader.ReadRequired<JSONRecordType>();
|
|
132
160
|
date_format = reader.ReadRequired<string>();
|
|
133
161
|
timestamp_format = reader.ReadRequired<string>();
|
|
162
|
+
|
|
163
|
+
InitializeFormats();
|
|
164
|
+
transform_options.date_format_map = &date_format_map;
|
|
134
165
|
}
|
|
135
166
|
|
|
136
167
|
JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &bind_data_p)
|
|
@@ -149,11 +180,11 @@ JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &b
|
|
|
149
180
|
}
|
|
150
181
|
|
|
151
182
|
JSONScanLocalState::JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate)
|
|
152
|
-
: batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
183
|
+
: scan_count(0), array_idx(0), array_offset(0), batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
153
184
|
json_allocator(BufferAllocator::Get(context)), current_reader(nullptr), current_buffer_handle(nullptr),
|
|
154
|
-
buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
185
|
+
is_last(false), buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
155
186
|
|
|
156
|
-
// Buffer to reconstruct JSON
|
|
187
|
+
// Buffer to reconstruct JSON values when they cross a buffer boundary
|
|
157
188
|
reconstruct_buffer = gstate.allocator.Allocate(gstate.bind_data.maximum_object_size + YYJSON_PADDING_SIZE);
|
|
158
189
|
|
|
159
190
|
// This is needed for JSONFormat::UNSTRUCTURED, to make use of YYJSON_READ_INSITU
|
|
@@ -173,11 +204,6 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
173
204
|
// Perform projection pushdown
|
|
174
205
|
if (bind_data.type == JSONScanType::READ_JSON) {
|
|
175
206
|
D_ASSERT(input.column_ids.size() <= bind_data.names.size()); // Can't project to have more columns
|
|
176
|
-
if (bind_data.auto_detect && input.column_ids.size() < bind_data.names.size()) {
|
|
177
|
-
// If we are auto-detecting, but don't need all columns present in the file,
|
|
178
|
-
// then we don't need to throw an error if we encounter an unseen column
|
|
179
|
-
bind_data.transform_options.error_unknown_key = false;
|
|
180
|
-
}
|
|
181
207
|
vector<string> names;
|
|
182
208
|
names.reserve(input.column_ids.size());
|
|
183
209
|
for (idx_t i = 0; i < input.column_ids.size(); i++) {
|
|
@@ -188,13 +214,37 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
188
214
|
names.push_back(std::move(bind_data.names[id]));
|
|
189
215
|
bind_data.valid_cols.push_back(i);
|
|
190
216
|
}
|
|
217
|
+
if (names.size() < bind_data.names.size()) {
|
|
218
|
+
// If we are auto-detecting, but don't need all columns present in the file,
|
|
219
|
+
// then we don't need to throw an error if we encounter an unseen column
|
|
220
|
+
bind_data.transform_options.error_unknown_key = false;
|
|
221
|
+
}
|
|
191
222
|
bind_data.names = std::move(names);
|
|
192
223
|
}
|
|
193
224
|
return result;
|
|
194
225
|
}
|
|
195
226
|
|
|
196
227
|
idx_t JSONGlobalTableFunctionState::MaxThreads() const {
|
|
197
|
-
|
|
228
|
+
auto &bind_data = state.bind_data;
|
|
229
|
+
|
|
230
|
+
auto num_files = bind_data.file_paths.size();
|
|
231
|
+
idx_t readers_per_file;
|
|
232
|
+
if (bind_data.options.format == JSONFormat::UNSTRUCTURED) {
|
|
233
|
+
// Unstructured necessitates single thread
|
|
234
|
+
readers_per_file = 1;
|
|
235
|
+
} else if (!state.json_readers.empty() && state.json_readers[0]->IsOpen()) {
|
|
236
|
+
auto &reader = *state.json_readers[0];
|
|
237
|
+
const auto &options = reader.GetOptions();
|
|
238
|
+
if (options.format == JSONFormat::UNSTRUCTURED || options.compression != FileCompressionType::UNCOMPRESSED) {
|
|
239
|
+
// Auto-detected unstructured - same story, compression also really limits parallelism
|
|
240
|
+
readers_per_file = 1;
|
|
241
|
+
} else {
|
|
242
|
+
return state.system_threads;
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
return state.system_threads;
|
|
246
|
+
}
|
|
247
|
+
return num_files * readers_per_file;
|
|
198
248
|
}
|
|
199
249
|
|
|
200
250
|
JSONLocalTableFunctionState::JSONLocalTableFunctionState(ClientContext &context, JSONScanGlobalState &gstate)
|
|
@@ -230,6 +280,12 @@ static inline void SkipWhitespace(const char *buffer_ptr, idx_t &buffer_offset,
|
|
|
230
280
|
idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
231
281
|
json_allocator.Reset();
|
|
232
282
|
|
|
283
|
+
if ((gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
|
284
|
+
gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) &&
|
|
285
|
+
array_idx < scan_count) {
|
|
286
|
+
return GetObjectsFromArray(gstate);
|
|
287
|
+
}
|
|
288
|
+
|
|
233
289
|
idx_t count = 0;
|
|
234
290
|
if (buffer_offset == buffer_size) {
|
|
235
291
|
if (!ReadNextBuffer(gstate)) {
|
|
@@ -253,10 +309,18 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
|
253
309
|
default:
|
|
254
310
|
throw InternalException("Unknown JSON format");
|
|
255
311
|
}
|
|
312
|
+
scan_count = count;
|
|
256
313
|
|
|
257
314
|
// Skip over any remaining whitespace for the next scan
|
|
258
315
|
SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
|
|
259
316
|
|
|
317
|
+
if (gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_RECORDS ||
|
|
318
|
+
gstate.bind_data.record_type == JSONRecordType::ARRAY_OF_JSON) {
|
|
319
|
+
array_idx = 0;
|
|
320
|
+
array_offset = 0;
|
|
321
|
+
return GetObjectsFromArray(gstate);
|
|
322
|
+
}
|
|
323
|
+
|
|
260
324
|
return count;
|
|
261
325
|
}
|
|
262
326
|
|
|
@@ -331,10 +395,48 @@ yyjson_val *JSONScanLocalState::ParseLine(char *line_start, idx_t line_size, idx
|
|
|
331
395
|
}
|
|
332
396
|
}
|
|
333
397
|
|
|
398
|
+
idx_t JSONScanLocalState::GetObjectsFromArray(JSONScanGlobalState &gstate) {
|
|
399
|
+
idx_t arr_count = 0;
|
|
400
|
+
|
|
401
|
+
size_t idx, max;
|
|
402
|
+
yyjson_val *val;
|
|
403
|
+
for (; array_idx < scan_count; array_idx++, array_offset = 0) {
|
|
404
|
+
auto &value = values[array_idx];
|
|
405
|
+
if (!value) {
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
408
|
+
if (unsafe_yyjson_is_arr(value)) {
|
|
409
|
+
yyjson_arr_foreach(value, idx, max, val) {
|
|
410
|
+
if (idx < array_offset) {
|
|
411
|
+
continue;
|
|
412
|
+
}
|
|
413
|
+
array_values[arr_count++] = val;
|
|
414
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
415
|
+
break;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
array_offset = idx + 1;
|
|
419
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
422
|
+
} else if (!gstate.bind_data.ignore_errors) {
|
|
423
|
+
ThrowTransformError(
|
|
424
|
+
array_idx,
|
|
425
|
+
StringUtil::Format("Expected JSON ARRAY but got %s: %s\nTry setting json_format to 'records'",
|
|
426
|
+
JSONCommon::ValTypeToString(value), JSONCommon::ValToString(value, 50)));
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
return arr_count;
|
|
430
|
+
}
|
|
431
|
+
|
|
334
432
|
bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
335
433
|
if (current_reader) {
|
|
336
434
|
D_ASSERT(current_buffer_handle);
|
|
337
435
|
current_reader->SetBufferLineOrObjectCount(current_buffer_handle->buffer_index, lines_or_objects_in_buffer);
|
|
436
|
+
if (is_last && gstate.bind_data.type != JSONScanType::SAMPLE) {
|
|
437
|
+
// Close files that are done if we're not sampling
|
|
438
|
+
current_reader->CloseJSONFile();
|
|
439
|
+
}
|
|
338
440
|
}
|
|
339
441
|
|
|
340
442
|
AllocatedData buffer;
|
|
@@ -395,7 +497,9 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
395
497
|
// Unopened file
|
|
396
498
|
current_reader->OpenJSONFile();
|
|
397
499
|
batch_index = gstate.batch_index++;
|
|
398
|
-
if (options.format == JSONFormat::UNSTRUCTURED
|
|
500
|
+
if (options.format == JSONFormat::UNSTRUCTURED || (options.format == JSONFormat::NEWLINE_DELIMITED &&
|
|
501
|
+
options.compression != FileCompressionType::UNCOMPRESSED &&
|
|
502
|
+
gstate.file_index < gstate.json_readers.size())) {
|
|
399
503
|
gstate.file_index++; // UNSTRUCTURED necessitates single-threaded read
|
|
400
504
|
}
|
|
401
505
|
if (options.format != JSONFormat::AUTO_DETECT) {
|
|
@@ -449,9 +553,6 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
449
553
|
auto json_buffer_handle = make_unique<JSONBufferHandle>(buffer_index, readers, std::move(buffer), buffer_size);
|
|
450
554
|
current_buffer_handle = json_buffer_handle.get();
|
|
451
555
|
current_reader->InsertBuffer(buffer_index, std::move(json_buffer_handle));
|
|
452
|
-
if (!current_reader->GetFileHandle().PlainFileSource() && gstate.bind_data.type == JSONScanType::SAMPLE) {
|
|
453
|
-
// TODO: store buffer
|
|
454
|
-
}
|
|
455
556
|
|
|
456
557
|
buffer_offset = 0;
|
|
457
558
|
prev_buffer_remainder = 0;
|
|
@@ -507,16 +608,18 @@ void JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, idx_t &
|
|
|
507
608
|
}
|
|
508
609
|
|
|
509
610
|
void JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, idx_t &buffer_index) {
|
|
510
|
-
auto &file_handle = current_reader->GetFileHandle();
|
|
511
|
-
|
|
512
611
|
idx_t request_size = gstate.buffer_capacity - prev_buffer_remainder - YYJSON_PADDING_SIZE;
|
|
513
612
|
idx_t read_size;
|
|
514
613
|
{
|
|
515
614
|
lock_guard<mutex> reader_guard(current_reader->lock);
|
|
516
615
|
buffer_index = current_reader->GetBufferIndex();
|
|
517
616
|
|
|
518
|
-
|
|
519
|
-
|
|
617
|
+
if (current_reader->IsOpen()) {
|
|
618
|
+
read_size = current_reader->GetFileHandle().Read(buffer_ptr + prev_buffer_remainder, request_size,
|
|
619
|
+
gstate.bind_data.type == JSONScanType::SAMPLE);
|
|
620
|
+
} else {
|
|
621
|
+
read_size = 0;
|
|
622
|
+
}
|
|
520
623
|
is_last = read_size < request_size;
|
|
521
624
|
|
|
522
625
|
if (!gstate.bind_data.ignore_errors && read_size == 0 && prev_buffer_remainder != 0) {
|
|
@@ -578,10 +681,15 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
|
|
|
578
681
|
current_reader->RemoveBuffer(current_buffer_handle->buffer_index - 1);
|
|
579
682
|
}
|
|
580
683
|
|
|
581
|
-
|
|
684
|
+
values[0] = ParseLine((char *)reconstruct_ptr, line_size, line_size, lines[0]);
|
|
582
685
|
}
|
|
583
686
|
|
|
584
687
|
void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
688
|
+
// yyjson does not always return YYJSON_READ_ERROR_UNEXPECTED_END properly
|
|
689
|
+
// if a different error code happens within the last 50 bytes
|
|
690
|
+
// we assume it should be YYJSON_READ_ERROR_UNEXPECTED_END instead
|
|
691
|
+
static constexpr idx_t END_BOUND = 50;
|
|
692
|
+
|
|
585
693
|
const auto max_obj_size = reconstruct_buffer.GetSize();
|
|
586
694
|
yyjson_read_err error;
|
|
587
695
|
for (; count < STANDARD_VECTOR_SIZE; count++) {
|
|
@@ -607,8 +715,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
|
607
715
|
} else if (error.pos > max_obj_size) {
|
|
608
716
|
current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error,
|
|
609
717
|
"Try increasing \"maximum_object_size\".");
|
|
610
|
-
|
|
611
|
-
} else if (error.code == YYJSON_READ_ERROR_UNEXPECTED_END && !is_last) {
|
|
718
|
+
} else if (!is_last && (error.code == YYJSON_READ_ERROR_UNEXPECTED_END || remaining - error.pos < END_BOUND)) {
|
|
612
719
|
// Copy remaining to reconstruct_buffer
|
|
613
720
|
const auto reconstruct_ptr = reconstruct_buffer.get();
|
|
614
721
|
memcpy(reconstruct_ptr, obj_copy_start, remaining);
|
|
@@ -618,7 +725,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
|
618
725
|
} else {
|
|
619
726
|
current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error);
|
|
620
727
|
}
|
|
621
|
-
|
|
728
|
+
values[count] = read_doc->root;
|
|
622
729
|
}
|
|
623
730
|
}
|
|
624
731
|
|
|
@@ -644,7 +751,7 @@ void JSONScanLocalState::ReadNewlineDelimited(idx_t &count) {
|
|
|
644
751
|
}
|
|
645
752
|
idx_t line_size = line_end - line_start;
|
|
646
753
|
|
|
647
|
-
|
|
754
|
+
values[count] = ParseLine((char *)line_start, line_size, remaining, lines[count]);
|
|
648
755
|
|
|
649
756
|
buffer_offset += line_size;
|
|
650
757
|
SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
|
|
@@ -655,11 +762,11 @@ yyjson_alc *JSONScanLocalState::GetAllocator() {
|
|
|
655
762
|
return json_allocator.GetYYJSONAllocator();
|
|
656
763
|
}
|
|
657
764
|
|
|
658
|
-
void JSONScanLocalState::ThrowTransformError(idx_t
|
|
765
|
+
void JSONScanLocalState::ThrowTransformError(idx_t object_index, const string &error_message) {
|
|
659
766
|
D_ASSERT(current_reader);
|
|
660
767
|
D_ASSERT(current_buffer_handle);
|
|
661
768
|
D_ASSERT(object_index != DConstants::INVALID_INDEX);
|
|
662
|
-
auto line_or_object_in_buffer = lines_or_objects_in_buffer -
|
|
769
|
+
auto line_or_object_in_buffer = lines_or_objects_in_buffer - scan_count + object_index;
|
|
663
770
|
current_reader->ThrowTransformError(current_buffer_handle->buffer_index, line_or_object_in_buffer, error_message);
|
|
664
771
|
}
|
|
665
772
|
|
|
@@ -589,6 +589,7 @@ void StringColumnReader::PrepareDeltaLengthByteArray(ResizeableBuffer &buffer) {
|
|
|
589
589
|
}
|
|
590
590
|
auto length_data = (uint32_t *)length_buffer->ptr;
|
|
591
591
|
byte_array_data = make_unique<Vector>(LogicalType::VARCHAR, value_count);
|
|
592
|
+
byte_array_count = value_count;
|
|
592
593
|
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
|
|
593
594
|
for (idx_t i = 0; i < value_count; i++) {
|
|
594
595
|
auto str_len = length_data[i];
|
|
@@ -615,6 +616,7 @@ void StringColumnReader::PrepareDeltaByteArray(ResizeableBuffer &buffer) {
|
|
|
615
616
|
auto prefix_data = (uint32_t *)prefix_buffer->ptr;
|
|
616
617
|
auto suffix_data = (uint32_t *)suffix_buffer->ptr;
|
|
617
618
|
byte_array_data = make_unique<Vector>(LogicalType::VARCHAR, prefix_count);
|
|
619
|
+
byte_array_count = prefix_count;
|
|
618
620
|
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
|
|
619
621
|
for (idx_t i = 0; i < prefix_count; i++) {
|
|
620
622
|
auto str_len = prefix_data[i] + suffix_data[i];
|
|
@@ -646,6 +648,11 @@ void StringColumnReader::DeltaByteArray(uint8_t *defines, idx_t num_values, parq
|
|
|
646
648
|
continue;
|
|
647
649
|
}
|
|
648
650
|
if (filter[row_idx + result_offset]) {
|
|
651
|
+
if (delta_offset >= byte_array_count) {
|
|
652
|
+
throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
|
|
653
|
+
"read of %d from %d entries) - corrupt file?",
|
|
654
|
+
delta_offset + 1, byte_array_count);
|
|
655
|
+
}
|
|
649
656
|
result_ptr[row_idx + result_offset] = string_data[delta_offset++];
|
|
650
657
|
} else {
|
|
651
658
|
delta_offset++;
|
|
@@ -221,10 +221,7 @@ public:
|
|
|
221
221
|
}
|
|
222
222
|
|
|
223
223
|
FileSystem &fs = FileSystem::GetFileSystem(context);
|
|
224
|
-
auto files = fs.
|
|
225
|
-
if (files.empty()) {
|
|
226
|
-
throw IOException("No files found that match the pattern \"%s\"", info.file_path);
|
|
227
|
-
}
|
|
224
|
+
auto files = fs.GlobFiles(info.file_path, context);
|
|
228
225
|
|
|
229
226
|
// The most likely path (Parquet read without union by name option)
|
|
230
227
|
if (!parquet_options.union_by_name) {
|
|
@@ -362,11 +359,7 @@ public:
|
|
|
362
359
|
}
|
|
363
360
|
|
|
364
361
|
static vector<string> ParquetGlob(FileSystem &fs, const string &glob, ClientContext &context) {
|
|
365
|
-
|
|
366
|
-
if (files.empty()) {
|
|
367
|
-
throw IOException("No files found that match the pattern \"%s\"", glob);
|
|
368
|
-
}
|
|
369
|
-
return files;
|
|
362
|
+
return fs.GlobFiles(glob, context);
|
|
370
363
|
}
|
|
371
364
|
|
|
372
365
|
static unique_ptr<FunctionData> ParquetScanBind(ClientContext &context, TableFunctionBindInput &input,
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
#include "duckdb/catalog/catalog_search_path.hpp"
|
|
4
4
|
#include "duckdb/catalog/catalog_entry/list.hpp"
|
|
5
|
+
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
|
5
6
|
#include "duckdb/catalog/catalog_set.hpp"
|
|
6
7
|
#include "duckdb/catalog/default/default_schemas.hpp"
|
|
7
8
|
#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
|
|
@@ -26,7 +27,7 @@
|
|
|
26
27
|
#include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
|
|
27
28
|
#include "duckdb/planner/binder.hpp"
|
|
28
29
|
#include "duckdb/catalog/default/default_types.hpp"
|
|
29
|
-
#include "duckdb/main/
|
|
30
|
+
#include "duckdb/main/extension_entries.hpp"
|
|
30
31
|
#include "duckdb/main/connection.hpp"
|
|
31
32
|
#include "duckdb/main/attached_database.hpp"
|
|
32
33
|
#include "duckdb/main/database_manager.hpp"
|
|
@@ -251,6 +252,20 @@ CatalogEntry *Catalog::CreateCollation(CatalogTransaction transaction, SchemaCat
|
|
|
251
252
|
return schema->CreateCollation(transaction, info);
|
|
252
253
|
}
|
|
253
254
|
|
|
255
|
+
//===--------------------------------------------------------------------===//
|
|
256
|
+
// Index
|
|
257
|
+
//===--------------------------------------------------------------------===//
|
|
258
|
+
CatalogEntry *Catalog::CreateIndex(CatalogTransaction transaction, CreateIndexInfo *info) {
|
|
259
|
+
auto &context = transaction.GetContext();
|
|
260
|
+
return CreateIndex(context, info);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
CatalogEntry *Catalog::CreateIndex(ClientContext &context, CreateIndexInfo *info) {
|
|
264
|
+
auto schema = GetSchema(context, info->schema);
|
|
265
|
+
auto table = GetEntry<TableCatalogEntry>(context, schema->name, info->table->table_name);
|
|
266
|
+
return schema->CreateIndex(context, info, table);
|
|
267
|
+
}
|
|
268
|
+
|
|
254
269
|
//===--------------------------------------------------------------------===//
|
|
255
270
|
// Lookup Structures
|
|
256
271
|
//===--------------------------------------------------------------------===//
|
|
@@ -317,17 +332,26 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
|
|
|
317
332
|
return result;
|
|
318
333
|
}
|
|
319
334
|
|
|
320
|
-
string
|
|
321
|
-
auto
|
|
322
|
-
auto it = std::lower_bound(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
|
|
335
|
+
string FindExtensionGeneric(const string &name, const ExtensionEntry entries[], idx_t size) {
|
|
336
|
+
auto lcase = StringUtil::Lower(name);
|
|
337
|
+
auto it = std::lower_bound(entries, entries + size, lcase,
|
|
338
|
+
[](const ExtensionEntry &element, const string &value) { return element.name < value; });
|
|
339
|
+
if (it != entries + size && it->name == lcase) {
|
|
326
340
|
return it->extension;
|
|
327
341
|
}
|
|
328
342
|
return "";
|
|
329
343
|
}
|
|
330
344
|
|
|
345
|
+
string FindExtensionForFunction(const string &name) {
|
|
346
|
+
idx_t size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionEntry);
|
|
347
|
+
return FindExtensionGeneric(name, EXTENSION_FUNCTIONS, size);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
string FindExtensionForSetting(const string &name) {
|
|
351
|
+
idx_t size = sizeof(EXTENSION_SETTINGS) / sizeof(ExtensionEntry);
|
|
352
|
+
return FindExtensionGeneric(name, EXTENSION_SETTINGS, size);
|
|
353
|
+
}
|
|
354
|
+
|
|
331
355
|
vector<CatalogSearchEntry> GetCatalogEntries(ClientContext &context, const string &catalog, const string &schema) {
|
|
332
356
|
vector<CatalogSearchEntry> entries;
|
|
333
357
|
auto &search_path = *context.client_data->catalog_search_path;
|
|
@@ -392,6 +416,26 @@ void FindMinimalQualification(ClientContext &context, const string &catalog_name
|
|
|
392
416
|
qualify_schema = true;
|
|
393
417
|
}
|
|
394
418
|
|
|
419
|
+
CatalogException Catalog::UnrecognizedConfigurationError(ClientContext &context, const string &name) {
|
|
420
|
+
// check if the setting exists in any extensions
|
|
421
|
+
auto extension_name = FindExtensionForSetting(name);
|
|
422
|
+
if (!extension_name.empty()) {
|
|
423
|
+
return CatalogException(
|
|
424
|
+
"Setting with name \"%s\" is not in the catalog, but it exists in the %s extension.\n\nTo "
|
|
425
|
+
"install and load the extension, run:\nINSTALL %s;\nLOAD %s;",
|
|
426
|
+
name, extension_name, extension_name, extension_name);
|
|
427
|
+
}
|
|
428
|
+
// the setting is not in an extension
|
|
429
|
+
// get a list of all options
|
|
430
|
+
vector<string> potential_names = DBConfig::GetOptionNames();
|
|
431
|
+
for (auto &entry : DBConfig::GetConfig(context).extension_parameters) {
|
|
432
|
+
potential_names.push_back(entry.first);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
throw CatalogException("unrecognized configuration parameter \"%s\"\n%s", name,
|
|
436
|
+
StringUtil::CandidatesErrorMessage(potential_names, name, "Did you mean"));
|
|
437
|
+
}
|
|
438
|
+
|
|
395
439
|
CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
|
|
396
440
|
CatalogType type,
|
|
397
441
|
const unordered_set<SchemaCatalogEntry *> &schemas,
|
|
@@ -408,13 +452,18 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
|
|
|
408
452
|
unseen_schemas.insert(current_schema);
|
|
409
453
|
}
|
|
410
454
|
}
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
455
|
+
// check if the entry exists in any extension
|
|
456
|
+
if (type == CatalogType::TABLE_FUNCTION_ENTRY || type == CatalogType::SCALAR_FUNCTION_ENTRY ||
|
|
457
|
+
type == CatalogType::AGGREGATE_FUNCTION_ENTRY) {
|
|
458
|
+
auto extension_name = FindExtensionForFunction(entry_name);
|
|
459
|
+
if (!extension_name.empty()) {
|
|
460
|
+
return CatalogException(
|
|
461
|
+
"Function with name \"%s\" is not in the catalog, but it exists in the %s extension.\n\nTo "
|
|
462
|
+
"install and load the extension, run:\nINSTALL %s;\nLOAD %s;",
|
|
463
|
+
entry_name, extension_name, extension_name, extension_name);
|
|
464
|
+
}
|
|
417
465
|
}
|
|
466
|
+
auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
|
|
418
467
|
string did_you_mean;
|
|
419
468
|
if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
|
|
420
469
|
// the closest matching entry requires qualification as it is not in the default search path
|
|
@@ -19,10 +19,11 @@ string IndexCatalogEntry::ToSQL() {
|
|
|
19
19
|
return sql;
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
void IndexCatalogEntry::Serialize(
|
|
23
|
-
//
|
|
24
|
-
// schema name, table name, index name, sql, index type, index constraint type, expression list
|
|
25
|
-
//
|
|
22
|
+
void IndexCatalogEntry::Serialize(Serializer &serializer) {
|
|
23
|
+
// here we serialize the index metadata in the following order:
|
|
24
|
+
// schema name, table name, index name, sql, index type, index constraint type, expression list, parsed expressions,
|
|
25
|
+
// column IDs
|
|
26
|
+
|
|
26
27
|
FieldWriter writer(serializer);
|
|
27
28
|
writer.WriteString(GetSchemaName());
|
|
28
29
|
writer.WriteString(GetTableName());
|
|
@@ -37,9 +38,9 @@ void IndexCatalogEntry::Serialize(duckdb::MetaBlockWriter &serializer) {
|
|
|
37
38
|
}
|
|
38
39
|
|
|
39
40
|
unique_ptr<CreateIndexInfo> IndexCatalogEntry::Deserialize(Deserializer &source, ClientContext &context) {
|
|
40
|
-
//
|
|
41
|
-
//
|
|
42
|
-
// list
|
|
41
|
+
// here we deserialize the index metadata in the following order:
|
|
42
|
+
// schema name, table schema name, table name, index name, sql, index type, index constraint type, expression list,
|
|
43
|
+
// parsed expression list, column IDs
|
|
43
44
|
|
|
44
45
|
auto create_index_info = make_unique<CreateIndexInfo>();
|
|
45
46
|
|
|
@@ -24,7 +24,7 @@ SimilarCatalogEntry SchemaCatalogEntry::GetSimilarEntry(CatalogTransaction trans
|
|
|
24
24
|
const string &name) {
|
|
25
25
|
SimilarCatalogEntry result;
|
|
26
26
|
Scan(transaction.GetContext(), type, [&](CatalogEntry *entry) {
|
|
27
|
-
auto ldist = StringUtil::
|
|
27
|
+
auto ldist = StringUtil::SimilarityScore(entry->name, name);
|
|
28
28
|
if (ldist < result.distance) {
|
|
29
29
|
result.distance = ldist;
|
|
30
30
|
result.name = entry->name;
|
|
@@ -460,7 +460,7 @@ SimilarCatalogEntry CatalogSet::SimilarEntry(CatalogTransaction transaction, con
|
|
|
460
460
|
for (auto &kv : mapping) {
|
|
461
461
|
auto mapping_value = GetMapping(transaction, kv.first);
|
|
462
462
|
if (mapping_value && !mapping_value->deleted) {
|
|
463
|
-
auto ldist = StringUtil::
|
|
463
|
+
auto ldist = StringUtil::SimilarityScore(kv.first, name);
|
|
464
464
|
if (ldist < result.distance) {
|
|
465
465
|
result.distance = ldist;
|
|
466
466
|
result.name = kv.first;
|
|
@@ -48,7 +48,7 @@ static DefaultView internal_views[] = {
|
|
|
48
48
|
{"pg_catalog", "pg_views", "SELECT schema_name schemaname, view_name viewname, 'duckdb' viewowner, sql definition FROM duckdb_views()"},
|
|
49
49
|
{"information_schema", "columns", "SELECT database_name table_catalog, schema_name table_schema, table_name, column_name, column_index ordinal_position, column_default, CASE WHEN is_nullable THEN 'YES' ELSE 'NO' END is_nullable, data_type, character_maximum_length, NULL character_octet_length, numeric_precision, numeric_precision_radix, numeric_scale, NULL datetime_precision, NULL interval_type, NULL interval_precision, NULL character_set_catalog, NULL character_set_schema, NULL character_set_name, NULL collation_catalog, NULL collation_schema, NULL collation_name, NULL domain_catalog, NULL domain_schema, NULL domain_name, NULL udt_catalog, NULL udt_schema, NULL udt_name, NULL scope_catalog, NULL scope_schema, NULL scope_name, NULL maximum_cardinality, NULL dtd_identifier, NULL is_self_referencing, NULL is_identity, NULL identity_generation, NULL identity_start, NULL identity_increment, NULL identity_maximum, NULL identity_minimum, NULL identity_cycle, NULL is_generated, NULL generation_expression, NULL is_updatable FROM duckdb_columns;"},
|
|
50
50
|
{"information_schema", "schemata", "SELECT database_name catalog_name, schema_name, 'duckdb' schema_owner, NULL default_character_set_catalog, NULL default_character_set_schema, NULL default_character_set_name, sql sql_path FROM duckdb_schemas()"},
|
|
51
|
-
{"information_schema", "tables", "SELECT database_name table_catalog, schema_name table_schema, table_name, CASE WHEN temporary THEN 'LOCAL TEMPORARY' ELSE 'BASE TABLE' END table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'YES' is_insertable_into, 'NO' is_typed, CASE WHEN temporary THEN 'PRESERVE' ELSE NULL END commit_action FROM duckdb_tables() UNION ALL SELECT
|
|
51
|
+
{"information_schema", "tables", "SELECT database_name table_catalog, schema_name table_schema, table_name, CASE WHEN temporary THEN 'LOCAL TEMPORARY' ELSE 'BASE TABLE' END table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'YES' is_insertable_into, 'NO' is_typed, CASE WHEN temporary THEN 'PRESERVE' ELSE NULL END commit_action FROM duckdb_tables() UNION ALL SELECT database_name table_catalog, schema_name table_schema, view_name table_name, 'VIEW' table_type, NULL self_referencing_column_name, NULL reference_generation, NULL user_defined_type_catalog, NULL user_defined_type_schema, NULL user_defined_type_name, 'NO' is_insertable_into, 'NO' is_typed, NULL commit_action FROM duckdb_views;"},
|
|
52
52
|
{nullptr, nullptr, nullptr}};
|
|
53
53
|
|
|
54
54
|
static unique_ptr<CreateViewInfo> GetDefaultView(ClientContext &context, const string &input_schema, const string &input_name) {
|