duckdb 0.6.2-dev2115.0 → 0.6.2-dev2226.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -5
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +6 -1
- package/src/duckdb/extension/json/include/json_common.hpp +1 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +7 -0
- package/src/duckdb/extension/json/include/json_transform.hpp +25 -10
- package/src/duckdb/extension/json/json_common.cpp +6 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +47 -9
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +183 -106
- package/src/duckdb/extension/json/json_functions/read_json.cpp +35 -22
- package/src/duckdb/extension/json/json_scan.cpp +26 -5
- package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
- package/src/duckdb/src/catalog/catalog.cpp +11 -12
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +9 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
- package/src/duckdb/src/common/enums/relation_type.cpp +2 -0
- package/src/duckdb/src/common/gzip_file_system.cpp +1 -1
- package/src/duckdb/src/common/local_file_system.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +2 -2
- package/src/duckdb/src/common/types/column_data_allocator.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +7 -2
- package/src/duckdb/src/common/types/vector.cpp +3 -2
- package/src/duckdb/src/common/virtual_file_system.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +5 -5
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -5
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +2 -0
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +182 -123
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +22 -18
- package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +1 -1
- package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -3
- package/src/duckdb/src/function/scalar/math/setseed.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/substring.cpp +8 -0
- package/src/duckdb/src/function/table/read_csv.cpp +1 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/http_stats.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/limits.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +1 -9
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_unnest.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -4
- package/src/duckdb/src/include/duckdb/main/database.hpp +6 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
- package/src/duckdb/src/include/duckdb/main/relation/write_csv_relation.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +34 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
- package/src/duckdb/src/include/duckdb.h +7 -0
- package/src/duckdb/src/main/capi/threading-c.cpp +8 -0
- package/src/duckdb/src/main/client_context.cpp +7 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +14 -0
- package/src/duckdb/src/main/database.cpp +57 -40
- package/src/duckdb/src/main/extension/extension_load.cpp +20 -28
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +4 -2
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +37 -0
- package/src/duckdb/src/main/relation.cpp +12 -2
- package/src/duckdb/src/parallel/executor.cpp +4 -0
- package/src/duckdb/src/parser/statement/copy_statement.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +4 -3
- package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +24 -3
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
- package/src/duckdb/src/storage/compression/bitpacking.cpp +2 -1
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/index.cpp +1 -1
- package/src/duckdb/src/storage/meta_block_writer.cpp +1 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +3 -3
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1 -2
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +539 -300
- package/src/duckdb/ub_src_main.cpp +0 -2
- package/src/duckdb/ub_src_main_relation.cpp +2 -0
- package/src/duckdb/src/include/duckdb/function/replacement_open.hpp +0 -54
- package/src/duckdb/src/include/duckdb/main/replacement_opens.hpp +0 -20
- package/src/duckdb/src/main/extension_prefix_opener.cpp +0 -55
|
@@ -11,7 +11,11 @@ namespace duckdb {
|
|
|
11
11
|
class UnnestOperatorState : public OperatorState {
|
|
12
12
|
public:
|
|
13
13
|
UnnestOperatorState(ClientContext &context, const vector<unique_ptr<Expression>> &select_list)
|
|
14
|
-
:
|
|
14
|
+
: current_row(0), list_position(0), longest_list_length(DConstants::INVALID_INDEX), first_fetch(true),
|
|
15
|
+
executor(context) {
|
|
16
|
+
|
|
17
|
+
// for each UNNEST in the select_list, we add the child expression to the expression executor
|
|
18
|
+
// and set the return type in the list_data chunk, which will contain the evaluated expression results
|
|
15
19
|
vector<LogicalType> list_data_types;
|
|
16
20
|
for (auto &exp : select_list) {
|
|
17
21
|
D_ASSERT(exp->type == ExpressionType::BOUND_UNNEST);
|
|
@@ -19,6 +23,7 @@ public:
|
|
|
19
23
|
list_data_types.push_back(bue->child->return_type);
|
|
20
24
|
executor.AddExpression(*bue->child.get());
|
|
21
25
|
}
|
|
26
|
+
|
|
22
27
|
auto &allocator = Allocator::Get(context);
|
|
23
28
|
list_data.Initialize(allocator, list_data_types);
|
|
24
29
|
|
|
@@ -26,18 +31,50 @@ public:
|
|
|
26
31
|
list_child_data.resize(list_data.ColumnCount());
|
|
27
32
|
}
|
|
28
33
|
|
|
29
|
-
idx_t
|
|
34
|
+
idx_t current_row;
|
|
30
35
|
idx_t list_position;
|
|
31
|
-
|
|
36
|
+
idx_t longest_list_length;
|
|
32
37
|
bool first_fetch;
|
|
33
38
|
|
|
34
39
|
ExpressionExecutor executor;
|
|
35
40
|
DataChunk list_data;
|
|
36
41
|
vector<UnifiedVectorFormat> list_vector_data;
|
|
37
42
|
vector<UnifiedVectorFormat> list_child_data;
|
|
43
|
+
|
|
44
|
+
public:
|
|
45
|
+
//! Reset the fields of the unnest operator state
|
|
46
|
+
void Reset();
|
|
47
|
+
//! Set the longest list's length for the current row
|
|
48
|
+
void SetLongestListLength();
|
|
38
49
|
};
|
|
39
50
|
|
|
40
|
-
|
|
51
|
+
void UnnestOperatorState::Reset() {
|
|
52
|
+
current_row = 0;
|
|
53
|
+
list_position = 0;
|
|
54
|
+
longest_list_length = DConstants::INVALID_INDEX;
|
|
55
|
+
first_fetch = true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
void UnnestOperatorState::SetLongestListLength() {
|
|
59
|
+
|
|
60
|
+
longest_list_length = 0;
|
|
61
|
+
for (idx_t col_idx = 0; col_idx < list_data.ColumnCount(); col_idx++) {
|
|
62
|
+
|
|
63
|
+
auto &vector_data = list_vector_data[col_idx];
|
|
64
|
+
auto current_idx = vector_data.sel->get_index(current_row);
|
|
65
|
+
|
|
66
|
+
if (vector_data.validity.RowIsValid(current_idx)) {
|
|
67
|
+
|
|
68
|
+
// check if this list is longer
|
|
69
|
+
auto list_data = (list_entry_t *)vector_data.data;
|
|
70
|
+
auto list_entry = list_data[current_idx];
|
|
71
|
+
if (list_entry.length > longest_list_length) {
|
|
72
|
+
longest_list_length = list_entry.length;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
41
78
|
PhysicalUnnest::PhysicalUnnest(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list,
|
|
42
79
|
idx_t estimated_cardinality, PhysicalOperatorType type)
|
|
43
80
|
: PhysicalOperator(type, std::move(types), estimated_cardinality), select_list(std::move(select_list)) {
|
|
@@ -45,6 +82,8 @@ PhysicalUnnest::PhysicalUnnest(vector<LogicalType> types, vector<unique_ptr<Expr
|
|
|
45
82
|
}
|
|
46
83
|
|
|
47
84
|
static void UnnestNull(idx_t start, idx_t end, Vector &result) {
|
|
85
|
+
|
|
86
|
+
D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR);
|
|
48
87
|
auto &validity = FlatVector::Validity(result);
|
|
49
88
|
for (idx_t i = start; i < end; i++) {
|
|
50
89
|
validity.SetInvalid(i);
|
|
@@ -58,14 +97,17 @@ static void UnnestNull(idx_t start, idx_t end, Vector &result) {
|
|
|
58
97
|
}
|
|
59
98
|
|
|
60
99
|
template <class T>
|
|
61
|
-
static void TemplatedUnnest(UnifiedVectorFormat &
|
|
62
|
-
|
|
63
|
-
auto
|
|
100
|
+
static void TemplatedUnnest(UnifiedVectorFormat &vector_data, idx_t start, idx_t end, Vector &result) {
|
|
101
|
+
|
|
102
|
+
auto source_data = (T *)vector_data.data;
|
|
103
|
+
auto &source_mask = vector_data.validity;
|
|
104
|
+
|
|
105
|
+
D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR);
|
|
64
106
|
auto result_data = FlatVector::GetData<T>(result);
|
|
65
107
|
auto &result_mask = FlatVector::Validity(result);
|
|
66
108
|
|
|
67
109
|
for (idx_t i = start; i < end; i++) {
|
|
68
|
-
auto source_idx =
|
|
110
|
+
auto source_idx = vector_data.sel->get_index(i);
|
|
69
111
|
auto target_idx = i - start;
|
|
70
112
|
if (source_mask.RowIsValid(source_idx)) {
|
|
71
113
|
result_data[target_idx] = source_data[source_idx];
|
|
@@ -76,84 +118,131 @@ static void TemplatedUnnest(UnifiedVectorFormat &vdata, idx_t start, idx_t end,
|
|
|
76
118
|
}
|
|
77
119
|
}
|
|
78
120
|
|
|
79
|
-
static void UnnestValidity(UnifiedVectorFormat &
|
|
80
|
-
|
|
121
|
+
static void UnnestValidity(UnifiedVectorFormat &vector_data, idx_t start, idx_t end, Vector &result) {
|
|
122
|
+
|
|
123
|
+
auto &source_mask = vector_data.validity;
|
|
124
|
+
D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR);
|
|
81
125
|
auto &result_mask = FlatVector::Validity(result);
|
|
82
126
|
|
|
83
127
|
for (idx_t i = start; i < end; i++) {
|
|
84
|
-
auto source_idx =
|
|
128
|
+
auto source_idx = vector_data.sel->get_index(i);
|
|
85
129
|
auto target_idx = i - start;
|
|
86
130
|
result_mask.Set(target_idx, source_mask.RowIsValid(source_idx));
|
|
87
131
|
}
|
|
88
132
|
}
|
|
89
133
|
|
|
90
|
-
static void UnnestVector(UnifiedVectorFormat &
|
|
91
|
-
Vector &result) {
|
|
92
|
-
|
|
134
|
+
static void UnnestVector(UnifiedVectorFormat &child_vector_data, Vector &child_vector, idx_t list_size, idx_t start,
|
|
135
|
+
idx_t end, Vector &result) {
|
|
136
|
+
|
|
137
|
+
D_ASSERT(child_vector.GetType() == result.GetType());
|
|
93
138
|
switch (result.GetType().InternalType()) {
|
|
94
139
|
case PhysicalType::BOOL:
|
|
95
140
|
case PhysicalType::INT8:
|
|
96
|
-
TemplatedUnnest<int8_t>(
|
|
141
|
+
TemplatedUnnest<int8_t>(child_vector_data, start, end, result);
|
|
97
142
|
break;
|
|
98
143
|
case PhysicalType::INT16:
|
|
99
|
-
TemplatedUnnest<int16_t>(
|
|
144
|
+
TemplatedUnnest<int16_t>(child_vector_data, start, end, result);
|
|
100
145
|
break;
|
|
101
146
|
case PhysicalType::INT32:
|
|
102
|
-
TemplatedUnnest<int32_t>(
|
|
147
|
+
TemplatedUnnest<int32_t>(child_vector_data, start, end, result);
|
|
103
148
|
break;
|
|
104
149
|
case PhysicalType::INT64:
|
|
105
|
-
TemplatedUnnest<int64_t>(
|
|
150
|
+
TemplatedUnnest<int64_t>(child_vector_data, start, end, result);
|
|
106
151
|
break;
|
|
107
152
|
case PhysicalType::INT128:
|
|
108
|
-
TemplatedUnnest<hugeint_t>(
|
|
153
|
+
TemplatedUnnest<hugeint_t>(child_vector_data, start, end, result);
|
|
109
154
|
break;
|
|
110
155
|
case PhysicalType::UINT8:
|
|
111
|
-
TemplatedUnnest<uint8_t>(
|
|
156
|
+
TemplatedUnnest<uint8_t>(child_vector_data, start, end, result);
|
|
112
157
|
break;
|
|
113
158
|
case PhysicalType::UINT16:
|
|
114
|
-
TemplatedUnnest<uint16_t>(
|
|
159
|
+
TemplatedUnnest<uint16_t>(child_vector_data, start, end, result);
|
|
115
160
|
break;
|
|
116
161
|
case PhysicalType::UINT32:
|
|
117
|
-
TemplatedUnnest<uint32_t>(
|
|
162
|
+
TemplatedUnnest<uint32_t>(child_vector_data, start, end, result);
|
|
118
163
|
break;
|
|
119
164
|
case PhysicalType::UINT64:
|
|
120
|
-
TemplatedUnnest<uint64_t>(
|
|
165
|
+
TemplatedUnnest<uint64_t>(child_vector_data, start, end, result);
|
|
121
166
|
break;
|
|
122
167
|
case PhysicalType::FLOAT:
|
|
123
|
-
TemplatedUnnest<float>(
|
|
168
|
+
TemplatedUnnest<float>(child_vector_data, start, end, result);
|
|
124
169
|
break;
|
|
125
170
|
case PhysicalType::DOUBLE:
|
|
126
|
-
TemplatedUnnest<double>(
|
|
171
|
+
TemplatedUnnest<double>(child_vector_data, start, end, result);
|
|
127
172
|
break;
|
|
128
173
|
case PhysicalType::INTERVAL:
|
|
129
|
-
TemplatedUnnest<interval_t>(
|
|
174
|
+
TemplatedUnnest<interval_t>(child_vector_data, start, end, result);
|
|
130
175
|
break;
|
|
131
176
|
case PhysicalType::VARCHAR:
|
|
132
|
-
TemplatedUnnest<string_t>(
|
|
177
|
+
TemplatedUnnest<string_t>(child_vector_data, start, end, result);
|
|
133
178
|
break;
|
|
134
179
|
case PhysicalType::LIST: {
|
|
180
|
+
// the child vector of result now references the child vector source
|
|
181
|
+
// FIXME: only reference relevant children (start - end) instead of all
|
|
135
182
|
auto &target = ListVector::GetEntry(result);
|
|
136
|
-
target.Reference(ListVector::GetEntry(
|
|
137
|
-
ListVector::SetListSize(result, ListVector::GetListSize(
|
|
138
|
-
|
|
183
|
+
target.Reference(ListVector::GetEntry(child_vector));
|
|
184
|
+
ListVector::SetListSize(result, ListVector::GetListSize(child_vector));
|
|
185
|
+
// unnest
|
|
186
|
+
TemplatedUnnest<list_entry_t>(child_vector_data, start, end, result);
|
|
139
187
|
break;
|
|
140
188
|
}
|
|
141
189
|
case PhysicalType::STRUCT: {
|
|
142
|
-
auto &
|
|
143
|
-
auto &
|
|
144
|
-
|
|
145
|
-
for
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
190
|
+
auto &child_vector_entries = StructVector::GetEntries(child_vector);
|
|
191
|
+
auto &result_entries = StructVector::GetEntries(result);
|
|
192
|
+
|
|
193
|
+
// set the validity mask for the 'outer' struct vector before unnesting its children
|
|
194
|
+
UnnestValidity(child_vector_data, start, end, result);
|
|
195
|
+
|
|
196
|
+
for (idx_t i = 0; i < child_vector_entries.size(); i++) {
|
|
197
|
+
UnifiedVectorFormat child_vector_entries_data;
|
|
198
|
+
child_vector_entries[i]->ToUnifiedFormat(list_size, child_vector_entries_data);
|
|
199
|
+
UnnestVector(child_vector_entries_data, *child_vector_entries[i], list_size, start, end,
|
|
200
|
+
*result_entries[i]);
|
|
149
201
|
}
|
|
150
202
|
break;
|
|
151
203
|
}
|
|
152
204
|
default:
|
|
153
|
-
throw InternalException("Unimplemented type for UNNEST");
|
|
205
|
+
throw InternalException("Unimplemented type for UNNEST.");
|
|
154
206
|
}
|
|
155
207
|
}
|
|
156
208
|
|
|
209
|
+
static void PrepareInput(UnnestOperatorState &state, DataChunk &input,
|
|
210
|
+
const vector<unique_ptr<Expression>> &select_list) {
|
|
211
|
+
|
|
212
|
+
state.list_data.Reset();
|
|
213
|
+
// execute the expressions inside each UNNEST in the select_list to get the list data
|
|
214
|
+
// execution results (lists) are kept in state.list_data chunk
|
|
215
|
+
state.executor.Execute(input, state.list_data);
|
|
216
|
+
|
|
217
|
+
// verify incoming lists
|
|
218
|
+
state.list_data.Verify();
|
|
219
|
+
D_ASSERT(input.size() == state.list_data.size());
|
|
220
|
+
D_ASSERT(state.list_data.ColumnCount() == select_list.size());
|
|
221
|
+
D_ASSERT(state.list_vector_data.size() == state.list_data.ColumnCount());
|
|
222
|
+
D_ASSERT(state.list_child_data.size() == state.list_data.ColumnCount());
|
|
223
|
+
|
|
224
|
+
// get the UnifiedVectorFormat of each list_data vector (LIST vectors for the different UNNESTs)
|
|
225
|
+
// both for the vector itself and its child vector
|
|
226
|
+
for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
|
|
227
|
+
|
|
228
|
+
auto &list_vector = state.list_data.data[col_idx];
|
|
229
|
+
list_vector.ToUnifiedFormat(state.list_data.size(), state.list_vector_data[col_idx]);
|
|
230
|
+
|
|
231
|
+
if (list_vector.GetType() == LogicalType::SQLNULL) {
|
|
232
|
+
// UNNEST(NULL): SQLNULL vectors don't have child vectors, but we need to point to the child vector of
|
|
233
|
+
// each vector, so we just get the UnifiedVectorFormat of the vector itself
|
|
234
|
+
auto &child_vector = list_vector;
|
|
235
|
+
child_vector.ToUnifiedFormat(0, state.list_child_data[col_idx]);
|
|
236
|
+
} else {
|
|
237
|
+
auto list_size = ListVector::GetListSize(list_vector);
|
|
238
|
+
auto &child_vector = ListVector::GetEntry(list_vector);
|
|
239
|
+
child_vector.ToUnifiedFormat(list_size, state.list_child_data[col_idx]);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
state.first_fetch = false;
|
|
244
|
+
}
|
|
245
|
+
|
|
157
246
|
unique_ptr<OperatorState> PhysicalUnnest::GetOperatorState(ExecutionContext &context) const {
|
|
158
247
|
return PhysicalUnnest::GetState(context, select_list);
|
|
159
248
|
}
|
|
@@ -167,137 +256,107 @@ OperatorResultType PhysicalUnnest::ExecuteInternal(ExecutionContext &context, Da
|
|
|
167
256
|
OperatorState &state_p,
|
|
168
257
|
const vector<unique_ptr<Expression>> &select_list,
|
|
169
258
|
bool include_input) {
|
|
259
|
+
|
|
170
260
|
auto &state = (UnnestOperatorState &)state_p;
|
|
261
|
+
|
|
171
262
|
do {
|
|
263
|
+
// prepare the input data by executing any expressions and getting the
|
|
264
|
+
// UnifiedVectorFormat of each LIST vector (list_vector_data) and its child vector (list_child_data)
|
|
172
265
|
if (state.first_fetch) {
|
|
173
|
-
|
|
174
|
-
state.list_data.Reset();
|
|
175
|
-
state.executor.Execute(input, state.list_data);
|
|
176
|
-
|
|
177
|
-
// paranoia aplenty
|
|
178
|
-
state.list_data.Verify();
|
|
179
|
-
D_ASSERT(input.size() == state.list_data.size());
|
|
180
|
-
D_ASSERT(state.list_data.ColumnCount() == select_list.size());
|
|
181
|
-
D_ASSERT(state.list_vector_data.size() == state.list_data.ColumnCount());
|
|
182
|
-
D_ASSERT(state.list_child_data.size() == state.list_data.ColumnCount());
|
|
183
|
-
|
|
184
|
-
// initialize UnifiedVectorFormat object so the nullmask can accessed
|
|
185
|
-
for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
|
|
186
|
-
auto &list_vector = state.list_data.data[col_idx];
|
|
187
|
-
list_vector.ToUnifiedFormat(state.list_data.size(), state.list_vector_data[col_idx]);
|
|
188
|
-
|
|
189
|
-
if (list_vector.GetType() == LogicalType::SQLNULL) {
|
|
190
|
-
// UNNEST(NULL)
|
|
191
|
-
auto &child_vector = list_vector;
|
|
192
|
-
child_vector.ToUnifiedFormat(0, state.list_child_data[col_idx]);
|
|
193
|
-
} else {
|
|
194
|
-
auto list_size = ListVector::GetListSize(list_vector);
|
|
195
|
-
auto &child_vector = ListVector::GetEntry(list_vector);
|
|
196
|
-
child_vector.ToUnifiedFormat(list_size, state.list_child_data[col_idx]);
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
state.first_fetch = false;
|
|
266
|
+
PrepareInput(state, input, select_list);
|
|
200
267
|
}
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
state.
|
|
205
|
-
state.list_length = -1;
|
|
206
|
-
state.first_fetch = true;
|
|
268
|
+
|
|
269
|
+
// finished with all rows of this input chunk, reset
|
|
270
|
+
if (state.current_row >= input.size()) {
|
|
271
|
+
state.Reset();
|
|
207
272
|
return OperatorResultType::NEED_MORE_INPUT;
|
|
208
273
|
}
|
|
209
274
|
|
|
210
|
-
//
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
auto current_idx = vdata.sel->get_index(state.parent_position);
|
|
215
|
-
|
|
216
|
-
int64_t list_length;
|
|
217
|
-
// deal with NULL values
|
|
218
|
-
if (!vdata.validity.RowIsValid(current_idx)) {
|
|
219
|
-
list_length = 0;
|
|
220
|
-
} else {
|
|
221
|
-
auto list_data = (list_entry_t *)vdata.data;
|
|
222
|
-
auto list_entry = list_data[current_idx];
|
|
223
|
-
list_length = (int64_t)list_entry.length;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
if (list_length > state.list_length) {
|
|
227
|
-
state.list_length = list_length;
|
|
228
|
-
}
|
|
229
|
-
}
|
|
275
|
+
// each UNNEST in the select_list contains a list (or NULL) for this row, find longest list
|
|
276
|
+
// because this length determines how many times we need to repeat for the current row
|
|
277
|
+
if (state.longest_list_length == DConstants::INVALID_INDEX) {
|
|
278
|
+
state.SetLongestListLength();
|
|
230
279
|
}
|
|
280
|
+
D_ASSERT(state.longest_list_length != DConstants::INVALID_INDEX);
|
|
231
281
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
auto this_chunk_len = MinValue<idx_t>(STANDARD_VECTOR_SIZE, state.list_length - state.list_position);
|
|
235
|
-
|
|
236
|
-
// first cols are from child, last n cols from unnest
|
|
282
|
+
// we emit chunks of either STANDARD_VECTOR_SIZE or smaller
|
|
283
|
+
auto this_chunk_len = MinValue<idx_t>(STANDARD_VECTOR_SIZE, state.longest_list_length - state.list_position);
|
|
237
284
|
chunk.SetCardinality(this_chunk_len);
|
|
238
285
|
|
|
239
|
-
|
|
286
|
+
// if we include other projection input columns, e.g. SELECT 1, UNNEST([1, 2]);, then
|
|
287
|
+
// we need to add them as a constant vector to the resulting chunk
|
|
288
|
+
// FIXME: emit multiple unnested rows. Currently, we never emit a chunk containing multiple unnested input rows,
|
|
289
|
+
// so setting a constant vector for the value at state.current_row is fine
|
|
290
|
+
idx_t col_offset = 0;
|
|
240
291
|
if (include_input) {
|
|
241
292
|
for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) {
|
|
242
|
-
ConstantVector::Reference(chunk.data[col_idx], input.data[col_idx], state.
|
|
243
|
-
input.size());
|
|
293
|
+
ConstantVector::Reference(chunk.data[col_idx], input.data[col_idx], state.current_row, input.size());
|
|
244
294
|
}
|
|
245
|
-
|
|
295
|
+
col_offset = input.ColumnCount();
|
|
246
296
|
}
|
|
247
297
|
|
|
298
|
+
// unnest the lists
|
|
248
299
|
for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
|
|
249
|
-
|
|
300
|
+
|
|
301
|
+
auto &result_vector = chunk.data[col_idx + col_offset];
|
|
250
302
|
|
|
251
303
|
if (state.list_data.data[col_idx].GetType() == LogicalType::SQLNULL) {
|
|
252
304
|
// UNNEST(NULL)
|
|
253
305
|
chunk.SetCardinality(0);
|
|
306
|
+
break;
|
|
307
|
+
|
|
254
308
|
} else {
|
|
255
|
-
auto &vdata = state.list_vector_data[col_idx];
|
|
256
|
-
auto &child_data = state.list_child_data[col_idx];
|
|
257
|
-
auto current_idx = vdata.sel->get_index(state.parent_position);
|
|
258
309
|
|
|
259
|
-
auto
|
|
260
|
-
auto
|
|
310
|
+
auto &vector_data = state.list_vector_data[col_idx];
|
|
311
|
+
auto current_idx = vector_data.sel->get_index(state.current_row);
|
|
312
|
+
|
|
313
|
+
if (!vector_data.validity.RowIsValid(current_idx)) {
|
|
314
|
+
UnnestNull(0, this_chunk_len, result_vector);
|
|
261
315
|
|
|
262
|
-
idx_t list_count;
|
|
263
|
-
if (state.list_position >= list_entry.length) {
|
|
264
|
-
list_count = 0;
|
|
265
316
|
} else {
|
|
266
|
-
list_count = MinValue<idx_t>(this_chunk_len, list_entry.length - state.list_position);
|
|
267
|
-
}
|
|
268
317
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
318
|
+
auto list_data = (list_entry_t *)vector_data.data;
|
|
319
|
+
auto list_entry = list_data[current_idx];
|
|
320
|
+
|
|
321
|
+
idx_t list_count = 0;
|
|
322
|
+
if (state.list_position < list_entry.length) {
|
|
323
|
+
// there are still list_count elements to unnest
|
|
324
|
+
list_count = MinValue<idx_t>(this_chunk_len, list_entry.length - state.list_position);
|
|
325
|
+
|
|
273
326
|
auto &list_vector = state.list_data.data[col_idx];
|
|
274
327
|
auto &child_vector = ListVector::GetEntry(list_vector);
|
|
275
328
|
auto list_size = ListVector::GetListSize(list_vector);
|
|
329
|
+
auto &child_vector_data = state.list_child_data[col_idx];
|
|
276
330
|
|
|
277
331
|
auto base_offset = list_entry.offset + state.list_position;
|
|
278
|
-
UnnestVector(
|
|
332
|
+
UnnestVector(child_vector_data, child_vector, list_size, base_offset, base_offset + list_count,
|
|
279
333
|
result_vector);
|
|
280
334
|
}
|
|
281
|
-
}
|
|
282
335
|
|
|
283
|
-
|
|
336
|
+
// fill the rest with NULLs
|
|
337
|
+
if (list_count != this_chunk_len) {
|
|
338
|
+
UnnestNull(list_count, this_chunk_len, result_vector);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
284
341
|
}
|
|
285
342
|
}
|
|
286
343
|
|
|
344
|
+
chunk.Verify();
|
|
345
|
+
|
|
287
346
|
state.list_position += this_chunk_len;
|
|
288
|
-
if (
|
|
289
|
-
state.
|
|
290
|
-
state.
|
|
347
|
+
if (state.list_position == state.longest_list_length) {
|
|
348
|
+
state.current_row++;
|
|
349
|
+
state.longest_list_length = DConstants::INVALID_INDEX;
|
|
291
350
|
state.list_position = 0;
|
|
292
351
|
}
|
|
293
352
|
|
|
294
|
-
|
|
353
|
+
// we only emit one unnested row (that contains data) at a time
|
|
295
354
|
} while (chunk.size() == 0);
|
|
296
355
|
return OperatorResultType::HAVE_MORE_OUTPUT;
|
|
297
356
|
}
|
|
298
357
|
|
|
299
358
|
OperatorResultType PhysicalUnnest::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
|
300
|
-
GlobalOperatorState
|
|
359
|
+
GlobalOperatorState &, OperatorState &state) const {
|
|
301
360
|
return ExecuteInternal(context, input, chunk, state, select_list);
|
|
302
361
|
}
|
|
303
362
|
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
#include "duckdb/main/attached_database.hpp"
|
|
6
6
|
#include "duckdb/main/database.hpp"
|
|
7
7
|
#include "duckdb/storage/storage_extension.hpp"
|
|
8
|
+
#include "duckdb/main/extension_helper.hpp"
|
|
8
9
|
|
|
9
10
|
namespace duckdb {
|
|
10
11
|
|
|
@@ -55,11 +56,29 @@ void PhysicalAttach::GetData(ExecutionContext &context, DataChunk &chunk, Global
|
|
|
55
56
|
unrecognized_option = entry.first;
|
|
56
57
|
}
|
|
57
58
|
}
|
|
59
|
+
auto &db = DatabaseInstance::GetDatabase(context.client);
|
|
60
|
+
if (type.empty()) {
|
|
61
|
+
// try to extract type from path
|
|
62
|
+
type = db.ExtractDatabaseType(info->path);
|
|
63
|
+
}
|
|
64
|
+
if (!type.empty()) {
|
|
65
|
+
type = ExtensionHelper::ApplyExtensionAlias(type);
|
|
66
|
+
}
|
|
67
|
+
if (type.empty() && !unrecognized_option.empty()) {
|
|
68
|
+
throw BinderException("Unrecognized option for attach \"%s\"", unrecognized_option);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// if we are loading a database type from an extension - check if that extension is loaded
|
|
72
|
+
if (!type.empty()) {
|
|
73
|
+
if (!db.ExtensionIsLoaded(type)) {
|
|
74
|
+
ExtensionHelper::LoadExternalExtension(context.client, type);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
58
77
|
|
|
59
78
|
// attach the database
|
|
60
|
-
auto name = info->name;
|
|
79
|
+
auto &name = info->name;
|
|
61
80
|
const auto &path = info->path;
|
|
62
|
-
|
|
81
|
+
|
|
63
82
|
if (name.empty()) {
|
|
64
83
|
name = AttachedDatabase::ExtractDatabaseName(path);
|
|
65
84
|
}
|
|
@@ -68,22 +87,7 @@ void PhysicalAttach::GetData(ExecutionContext &context, DataChunk &chunk, Global
|
|
|
68
87
|
if (existing_db) {
|
|
69
88
|
throw BinderException("Database \"%s\" is already attached with alias \"%s\"", path, existing_db->GetName());
|
|
70
89
|
}
|
|
71
|
-
|
|
72
|
-
unique_ptr<AttachedDatabase> new_db;
|
|
73
|
-
if (type.empty()) {
|
|
74
|
-
if (!unrecognized_option.empty()) {
|
|
75
|
-
throw BinderException("Unrecognized option for attach \"%s\"", unrecognized_option);
|
|
76
|
-
}
|
|
77
|
-
new_db = make_unique<AttachedDatabase>(db, Catalog::GetSystemCatalog(db), name, path, access_mode);
|
|
78
|
-
} else {
|
|
79
|
-
// attach an extension database
|
|
80
|
-
auto entry = config.storage_extensions.find(type);
|
|
81
|
-
if (entry == config.storage_extensions.end()) {
|
|
82
|
-
throw BinderException("Unrecognized storage type \"%s\"", type);
|
|
83
|
-
}
|
|
84
|
-
new_db =
|
|
85
|
-
make_unique<AttachedDatabase>(db, Catalog::GetSystemCatalog(db), *entry->second, name, *info, access_mode);
|
|
86
|
-
}
|
|
90
|
+
auto new_db = db.CreateAttachedDatabase(*info, type, access_mode);
|
|
87
91
|
new_db->Initialize();
|
|
88
92
|
|
|
89
93
|
db_manager.AddDatabase(context.client, std::move(new_db));
|
|
@@ -34,7 +34,7 @@ unique_ptr<PhysicalOperator> DuckCatalog::PlanCreateTableAs(ClientContext &conte
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTable &op) {
|
|
37
|
-
auto &create_info = (CreateTableInfo &)*op.info->base;
|
|
37
|
+
const auto &create_info = (CreateTableInfo &)*op.info->base;
|
|
38
38
|
auto &catalog = *op.info->schema->catalog;
|
|
39
39
|
auto existing_entry = catalog.GetEntry<TableCatalogEntry>(context, create_info.schema, create_info.table, true);
|
|
40
40
|
bool replace = op.info->Base().on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT;
|
|
@@ -183,7 +183,6 @@ struct VectorArgMinMaxBase : ArgMinMaxBase<COMPARATOR> {
|
|
|
183
183
|
|
|
184
184
|
auto states = (STATE **)sdata.data;
|
|
185
185
|
for (idx_t i = 0; i < count; i++) {
|
|
186
|
-
const auto aidx = adata.sel->get_index(i);
|
|
187
186
|
const auto bidx = bdata.sel->get_index(i);
|
|
188
187
|
if (!bdata.validity.RowIsValid(bidx)) {
|
|
189
188
|
continue;
|
|
@@ -194,12 +193,12 @@ struct VectorArgMinMaxBase : ArgMinMaxBase<COMPARATOR> {
|
|
|
194
193
|
auto state = states[sidx];
|
|
195
194
|
if (!state->is_initialized) {
|
|
196
195
|
STATE::template AssignValue<BY_TYPE>(state->value, bval, false);
|
|
197
|
-
AssignVector(state, arg,
|
|
196
|
+
AssignVector(state, arg, i);
|
|
198
197
|
state->is_initialized = true;
|
|
199
198
|
|
|
200
199
|
} else if (COMPARATOR::template Operation<BY_TYPE>(bval, state->value)) {
|
|
201
200
|
STATE::template AssignValue<BY_TYPE>(state->value, bval, true);
|
|
202
|
-
AssignVector(state, arg,
|
|
201
|
+
AssignVector(state, arg, i);
|
|
203
202
|
}
|
|
204
203
|
}
|
|
205
204
|
}
|
|
@@ -36,7 +36,7 @@ static void SetSeedFunction(DataChunk &args, ExpressionState &state, Vector &res
|
|
|
36
36
|
|
|
37
37
|
auto &random_engine = RandomEngine::Get(info.context);
|
|
38
38
|
for (idx_t i = 0; i < args.size(); i++) {
|
|
39
|
-
if (input_seeds[i] < -1.0 || input_seeds[i] > 1.0) {
|
|
39
|
+
if (input_seeds[i] < -1.0 || input_seeds[i] > 1.0 || Value::IsNan(input_seeds[i])) {
|
|
40
40
|
throw Exception("SETSEED accepts seed values between -1.0 and 1.0, inclusive");
|
|
41
41
|
}
|
|
42
42
|
uint32_t norm_seed = (input_seeds[i] + 1.0) * half_max;
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
#include "duckdb/storage/statistics/string_statistics.hpp"
|
|
8
8
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
|
9
9
|
#include "utf8proc.hpp"
|
|
10
|
+
#include "duckdb/common/types/blob.hpp"
|
|
10
11
|
|
|
11
12
|
namespace duckdb {
|
|
12
13
|
|
|
@@ -138,6 +139,13 @@ string_t SubstringFun::SubstringUnicode(Vector &result, string_t input, int64_t
|
|
|
138
139
|
}
|
|
139
140
|
}
|
|
140
141
|
}
|
|
142
|
+
while (!LengthFun::IsCharacter(input_data[start_pos])) {
|
|
143
|
+
start_pos++;
|
|
144
|
+
}
|
|
145
|
+
while (end_pos < input_size && !LengthFun::IsCharacter(input_data[end_pos])) {
|
|
146
|
+
end_pos++;
|
|
147
|
+
}
|
|
148
|
+
|
|
141
149
|
if (end_pos == DConstants::INVALID_INDEX) {
|
|
142
150
|
return SubstringEmptyString(result);
|
|
143
151
|
}
|
|
@@ -257,7 +257,7 @@ public:
|
|
|
257
257
|
first_file_size = file_size;
|
|
258
258
|
bytes_read = 0;
|
|
259
259
|
if (buffer_size < file_size) {
|
|
260
|
-
bytes_per_local_state = buffer_size / MaxThreads();
|
|
260
|
+
bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
|
|
261
261
|
} else {
|
|
262
262
|
bytes_per_local_state = file_size / MaxThreads();
|
|
263
263
|
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev2226"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "6e71048a0d"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -196,6 +196,8 @@ public:
|
|
|
196
196
|
DUCKDB_API CatalogEntry *GetEntry(ClientContext &context, const string &schema, const string &name);
|
|
197
197
|
|
|
198
198
|
//! Fetches a logical type from the catalog
|
|
199
|
+
DUCKDB_API LogicalType GetType(ClientContext &context, const string &schema, const string &names, bool if_exists);
|
|
200
|
+
|
|
199
201
|
DUCKDB_API static LogicalType GetType(ClientContext &context, const string &catalog_name, const string &schema,
|
|
200
202
|
const string &name);
|
|
201
203
|
|
|
@@ -22,7 +22,11 @@ enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT };
|
|
|
22
22
|
struct BoxRendererConfig {
|
|
23
23
|
// a max_width of 0 means we default to the terminal width
|
|
24
24
|
idx_t max_width = 0;
|
|
25
|
+
// the maximum amount of rows to render
|
|
25
26
|
idx_t max_rows = 20;
|
|
27
|
+
// the limit that is applied prior to rendering
|
|
28
|
+
// if we are rendering exactly "limit" rows then a question mark is rendered instead
|
|
29
|
+
idx_t limit = 0;
|
|
26
30
|
// the max col width determines the maximum size of a single column
|
|
27
31
|
// note that the max col width is only used if the result does not fit on the screen
|
|
28
32
|
idx_t max_col_width = 20;
|
|
@@ -25,6 +25,8 @@ public:
|
|
|
25
25
|
virtual ClientContext *TryGetClientContext() = 0;
|
|
26
26
|
|
|
27
27
|
DUCKDB_API static FileOpener *Get(ClientContext &context);
|
|
28
|
+
DUCKDB_API static ClientContext *TryGetClientContext(FileOpener *opener);
|
|
29
|
+
DUCKDB_API static bool TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result);
|
|
28
30
|
};
|
|
29
31
|
|
|
30
32
|
} // namespace duckdb
|
|
@@ -35,7 +35,7 @@ public:
|
|
|
35
35
|
|
|
36
36
|
//! helper function to get the HTTP
|
|
37
37
|
static HTTPStats *TryGetStats(FileOpener *opener) {
|
|
38
|
-
auto client_context =
|
|
38
|
+
auto client_context = FileOpener::TryGetClientContext(opener);
|
|
39
39
|
if (client_context) {
|
|
40
40
|
return client_context->client_data->http_stats.get();
|
|
41
41
|
}
|