duckdb 0.9.1-dev67.0 → 0.9.1-dev86.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/core_functions/scalar/map/map.cpp +66 -32
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +19 -21
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +2 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +7 -0
package/package.json
CHANGED
@@ -87,11 +87,24 @@ static bool ListEntriesEqual(Vector &keys, Vector &values, idx_t count) {
|
|
87
87
|
return true;
|
88
88
|
}
|
89
89
|
|
90
|
+
static list_entry_t *GetBiggestList(Vector &key, Vector &value, idx_t &size) {
|
91
|
+
auto key_size = ListVector::GetListSize(key);
|
92
|
+
auto value_size = ListVector::GetListSize(value);
|
93
|
+
if (key_size > value_size) {
|
94
|
+
size = key_size;
|
95
|
+
return ListVector::GetData(key);
|
96
|
+
}
|
97
|
+
size = value_size;
|
98
|
+
return ListVector::GetData(value);
|
99
|
+
}
|
100
|
+
|
90
101
|
static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
91
102
|
D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
|
92
103
|
|
93
|
-
auto
|
94
|
-
|
104
|
+
auto count = args.size();
|
105
|
+
|
106
|
+
auto &map_key_vector = MapVector::GetKeys(result);
|
107
|
+
auto &map_value_vector = MapVector::GetValues(result);
|
95
108
|
auto result_data = ListVector::GetData(result);
|
96
109
|
|
97
110
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
@@ -99,52 +112,73 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
|
|
99
112
|
ListVector::SetListSize(result, 0);
|
100
113
|
result_data->offset = 0;
|
101
114
|
result_data->length = 0;
|
102
|
-
result.Verify(
|
115
|
+
result.Verify(count);
|
103
116
|
return;
|
104
117
|
}
|
105
118
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
119
|
+
D_ASSERT(args.ColumnCount() == 2);
|
120
|
+
auto &key_vector = args.data[0];
|
121
|
+
auto &value_vector = args.data[1];
|
122
|
+
|
123
|
+
if (args.AllConstant()) {
|
124
|
+
auto key_data = ListVector::GetData(key_vector);
|
125
|
+
auto value_data = ListVector::GetData(value_vector);
|
126
|
+
auto key_entry = key_data[0];
|
127
|
+
auto value_entry = value_data[0];
|
128
|
+
if (key_entry != value_entry) {
|
129
|
+
throw BinderException("Key and value list sizes don't match");
|
130
|
+
}
|
131
|
+
result_data[0] = key_entry;
|
132
|
+
ListVector::SetListSize(result, ListVector::GetListSize(key_vector));
|
133
|
+
map_key_vector.Reference(ListVector::GetEntry(key_vector));
|
134
|
+
map_value_vector.Reference(ListVector::GetEntry(value_vector));
|
135
|
+
MapVector::MapConversionVerify(result, count);
|
136
|
+
result.Verify(count);
|
137
|
+
return;
|
110
138
|
}
|
111
139
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
140
|
+
result.SetVectorType(VectorType::FLAT_VECTOR);
|
141
|
+
|
142
|
+
if (key_vector.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
143
|
+
D_ASSERT(value_vector.GetVectorType() != VectorType::CONSTANT_VECTOR);
|
144
|
+
Vector expanded_const(ListType::GetChildType(key_vector.GetType()), count);
|
145
|
+
AlignVectorToReference(key_vector, value_vector, count, expanded_const);
|
146
|
+
map_key_vector.Reference(expanded_const);
|
147
|
+
|
148
|
+
value_vector.Flatten(count);
|
149
|
+
map_value_vector.Reference(ListVector::GetEntry(value_vector));
|
150
|
+
} else if (value_vector.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
151
|
+
D_ASSERT(key_vector.GetVectorType() != VectorType::CONSTANT_VECTOR);
|
152
|
+
Vector expanded_const(ListType::GetChildType(value_vector.GetType()), count);
|
153
|
+
AlignVectorToReference(value_vector, key_vector, count, expanded_const);
|
154
|
+
map_value_vector.Reference(expanded_const);
|
155
|
+
|
156
|
+
key_vector.Flatten(count);
|
157
|
+
map_key_vector.Reference(ListVector::GetEntry(key_vector));
|
123
158
|
} else {
|
124
|
-
|
159
|
+
key_vector.Flatten(count);
|
160
|
+
value_vector.Flatten(count);
|
161
|
+
|
162
|
+
if (!ListEntriesEqual(key_vector, value_vector, count)) {
|
125
163
|
throw InvalidInputException("Error in MAP creation: key list and value list do not align. i.e. different "
|
126
164
|
"size or incompatible structure");
|
127
165
|
}
|
166
|
+
|
167
|
+
map_value_vector.Reference(ListVector::GetEntry(value_vector));
|
168
|
+
map_key_vector.Reference(ListVector::GetEntry(key_vector));
|
128
169
|
}
|
129
170
|
|
130
|
-
|
171
|
+
idx_t list_size;
|
172
|
+
auto src_data = GetBiggestList(key_vector, value_vector, list_size);
|
173
|
+
ListVector::SetListSize(result, list_size);
|
131
174
|
|
132
175
|
result_data = ListVector::GetData(result);
|
133
|
-
for (idx_t i = 0; i <
|
176
|
+
for (idx_t i = 0; i < count; i++) {
|
134
177
|
result_data[i] = src_data[i];
|
135
178
|
}
|
136
179
|
|
137
|
-
|
138
|
-
|
139
|
-
if (!(keys_are_const && !values_are_const)) {
|
140
|
-
key_vector.Reference(ListVector::GetEntry(args.data[0]));
|
141
|
-
}
|
142
|
-
if (!(values_are_const && !keys_are_const)) {
|
143
|
-
value_vector.Reference(ListVector::GetEntry(args.data[1]));
|
144
|
-
}
|
145
|
-
|
146
|
-
MapVector::MapConversionVerify(result, args.size());
|
147
|
-
result.Verify(args.size());
|
180
|
+
MapVector::MapConversionVerify(result, count);
|
181
|
+
result.Verify(count);
|
148
182
|
}
|
149
183
|
|
150
184
|
static unique_ptr<FunctionData> MapBind(ClientContext &context, ScalarFunction &bound_function,
|
@@ -22,39 +22,37 @@ CSVSniffer::CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager>
|
|
22
22
|
}
|
23
23
|
}
|
24
24
|
|
25
|
+
void CSVSniffer::SetResultOptions() {
|
26
|
+
options.dialect_options = best_candidate->dialect_options;
|
27
|
+
options.dialect_options.new_line = best_candidate->dialect_options.new_line;
|
28
|
+
options.has_header = best_candidate->dialect_options.header;
|
29
|
+
options.skip_rows_set = options.dialect_options.skip_rows > 0;
|
30
|
+
if (options.has_header) {
|
31
|
+
options.dialect_options.true_start = best_start_with_header;
|
32
|
+
} else {
|
33
|
+
options.dialect_options.true_start = best_start_without_header;
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
25
37
|
SnifferResult CSVSniffer::SniffCSV() {
|
26
38
|
// 1. Dialect Detection
|
27
39
|
DetectDialect();
|
28
|
-
if (explicit_set_columns) {
|
29
|
-
if (!candidates.empty()) {
|
30
|
-
options.dialect_options.state_machine_options = candidates[0]->dialect_options.state_machine_options;
|
31
|
-
options.dialect_options.new_line = candidates[0]->dialect_options.new_line;
|
32
|
-
}
|
33
|
-
// We do not need to run type and header detection as these were defined by the user
|
34
|
-
return SnifferResult(detected_types, names);
|
35
|
-
}
|
36
40
|
// 2. Type Detection
|
37
41
|
DetectTypes();
|
38
42
|
// 3. Header Detection
|
39
43
|
DetectHeader();
|
44
|
+
if (explicit_set_columns) {
|
45
|
+
SetResultOptions();
|
46
|
+
// We do not need to run type refinement, since the types have been given by the user
|
47
|
+
return SnifferResult({}, {});
|
48
|
+
}
|
40
49
|
D_ASSERT(best_sql_types_candidates_per_column_idx.size() == names.size());
|
41
50
|
// 4. Type Replacement
|
42
51
|
ReplaceTypes();
|
43
52
|
// 5. Type Refinement
|
44
53
|
RefineTypes();
|
45
|
-
// We are done,
|
46
|
-
|
47
|
-
// Set the CSV Options in the reference
|
48
|
-
options.dialect_options = best_candidate->dialect_options;
|
49
|
-
options.has_header = best_candidate->dialect_options.header;
|
50
|
-
options.skip_rows_set = options.dialect_options.skip_rows > 0;
|
51
|
-
if (options.has_header) {
|
52
|
-
options.dialect_options.true_start = best_start_with_header;
|
53
|
-
} else {
|
54
|
-
options.dialect_options.true_start = best_start_without_header;
|
55
|
-
}
|
56
|
-
|
57
|
-
// Return the types and names
|
54
|
+
// We are done, Set the CSV Options in the reference. Construct and return the result.
|
55
|
+
SetResultOptions();
|
58
56
|
return SnifferResult(detected_types, names);
|
59
57
|
}
|
60
58
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "v0.9.1-
|
2
|
+
#define DUCKDB_VERSION "v0.9.1-dev86"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "c0dfff9198"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -50,6 +50,8 @@ private:
|
|
50
50
|
CSVReaderOptions &options;
|
51
51
|
//! Buffer being used on sniffer
|
52
52
|
shared_ptr<CSVBufferManager> buffer_manager;
|
53
|
+
//! Sets the result options
|
54
|
+
void SetResultOptions();
|
53
55
|
|
54
56
|
//! ------------------------------------------------------//
|
55
57
|
//! ----------------- Dialect Detection ----------------- //
|
@@ -196,6 +196,9 @@ string ExtensionHelper::AddExtensionInstallHintToErrorMsg(ClientContext &context
|
|
196
196
|
}
|
197
197
|
|
198
198
|
bool ExtensionHelper::TryAutoLoadExtension(ClientContext &context, const string &extension_name) noexcept {
|
199
|
+
if (context.db->ExtensionIsLoaded(extension_name)) {
|
200
|
+
return true;
|
201
|
+
}
|
199
202
|
auto &dbconfig = DBConfig::GetConfig(context);
|
200
203
|
try {
|
201
204
|
if (dbconfig.options.autoinstall_known_extensions) {
|
@@ -211,6 +214,10 @@ bool ExtensionHelper::TryAutoLoadExtension(ClientContext &context, const string
|
|
211
214
|
}
|
212
215
|
|
213
216
|
void ExtensionHelper::AutoLoadExtension(ClientContext &context, const string &extension_name) {
|
217
|
+
if (context.db->ExtensionIsLoaded(extension_name)) {
|
218
|
+
// Avoid downloading again
|
219
|
+
return;
|
220
|
+
}
|
214
221
|
auto &dbconfig = DBConfig::GetConfig(context);
|
215
222
|
try {
|
216
223
|
#ifndef DUCKDB_WASM
|