duckdb 0.6.2-dev716.0 → 0.6.2-dev735.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +54 -4
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +5 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +15 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +4 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +2 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +3 -0
- package/src/duckdb/src/parallel/pipeline_executor.cpp +10 -4
- package/src/duckdb/src/parser/transform/tableref/transform_table_function.cpp +0 -3
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +6 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +23 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +15 -4
package/package.json
CHANGED
|
@@ -4,10 +4,13 @@ namespace duckdb {
|
|
|
4
4
|
|
|
5
5
|
class TableInOutLocalState : public OperatorState {
|
|
6
6
|
public:
|
|
7
|
-
TableInOutLocalState() {
|
|
7
|
+
TableInOutLocalState() : row_index(0), new_row(true) {
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
unique_ptr<LocalTableFunctionState> local_state;
|
|
11
|
+
idx_t row_index;
|
|
12
|
+
bool new_row;
|
|
13
|
+
DataChunk input_chunk;
|
|
11
14
|
};
|
|
12
15
|
|
|
13
16
|
class TableInOutGlobalState : public GlobalOperatorState {
|
|
@@ -20,9 +23,11 @@ public:
|
|
|
20
23
|
|
|
21
24
|
PhysicalTableInOutFunction::PhysicalTableInOutFunction(vector<LogicalType> types, TableFunction function_p,
|
|
22
25
|
unique_ptr<FunctionData> bind_data_p,
|
|
23
|
-
vector<column_t> column_ids_p, idx_t estimated_cardinality
|
|
26
|
+
vector<column_t> column_ids_p, idx_t estimated_cardinality,
|
|
27
|
+
vector<column_t> project_input_p)
|
|
24
28
|
: PhysicalOperator(PhysicalOperatorType::INOUT_FUNCTION, move(types), estimated_cardinality),
|
|
25
|
-
function(move(function_p)), bind_data(move(bind_data_p)), column_ids(move(column_ids_p))
|
|
29
|
+
function(move(function_p)), bind_data(move(bind_data_p)), column_ids(move(column_ids_p)),
|
|
30
|
+
projected_input(move(project_input_p)) {
|
|
26
31
|
}
|
|
27
32
|
|
|
28
33
|
unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(ExecutionContext &context) const {
|
|
@@ -32,6 +37,9 @@ unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(Execution
|
|
|
32
37
|
TableFunctionInitInput input(bind_data.get(), column_ids, vector<idx_t>(), nullptr);
|
|
33
38
|
result->local_state = function.init_local(context, input, gstate.global_state.get());
|
|
34
39
|
}
|
|
40
|
+
if (!projected_input.empty()) {
|
|
41
|
+
result->input_chunk.Initialize(context.client, children[0]->types);
|
|
42
|
+
}
|
|
35
43
|
return move(result);
|
|
36
44
|
}
|
|
37
45
|
|
|
@@ -49,7 +57,46 @@ OperatorResultType PhysicalTableInOutFunction::Execute(ExecutionContext &context
|
|
|
49
57
|
auto &gstate = (TableInOutGlobalState &)gstate_p;
|
|
50
58
|
auto &state = (TableInOutLocalState &)state_p;
|
|
51
59
|
TableFunctionInput data(bind_data.get(), state.local_state.get(), gstate.global_state.get());
|
|
52
|
-
|
|
60
|
+
if (projected_input.empty()) {
|
|
61
|
+
// straightforward case - no need to project input
|
|
62
|
+
return function.in_out_function(context, data, input, chunk);
|
|
63
|
+
}
|
|
64
|
+
// when project_input is set we execute the input function row-by-row
|
|
65
|
+
if (state.new_row) {
|
|
66
|
+
if (state.row_index >= input.size()) {
|
|
67
|
+
// finished processing this chunk
|
|
68
|
+
state.new_row = true;
|
|
69
|
+
state.row_index = 0;
|
|
70
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
|
71
|
+
}
|
|
72
|
+
// we are processing a new row: fetch the data for the current row
|
|
73
|
+
D_ASSERT(input.ColumnCount() == state.input_chunk.ColumnCount());
|
|
74
|
+
// set up the input data to the table in-out function
|
|
75
|
+
for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) {
|
|
76
|
+
ConstantVector::Reference(state.input_chunk.data[col_idx], input.data[col_idx], state.row_index, 1);
|
|
77
|
+
}
|
|
78
|
+
state.input_chunk.SetCardinality(1);
|
|
79
|
+
state.row_index++;
|
|
80
|
+
state.new_row = false;
|
|
81
|
+
}
|
|
82
|
+
// set up the output data in "chunk"
|
|
83
|
+
D_ASSERT(chunk.ColumnCount() > projected_input.size());
|
|
84
|
+
D_ASSERT(state.row_index > 0);
|
|
85
|
+
idx_t base_idx = chunk.ColumnCount() - projected_input.size();
|
|
86
|
+
for (idx_t project_idx = 0; project_idx < projected_input.size(); project_idx++) {
|
|
87
|
+
auto source_idx = projected_input[project_idx];
|
|
88
|
+
auto target_idx = base_idx + project_idx;
|
|
89
|
+
ConstantVector::Reference(chunk.data[target_idx], input.data[source_idx], state.row_index - 1, 1);
|
|
90
|
+
}
|
|
91
|
+
auto result = function.in_out_function(context, data, state.input_chunk, chunk);
|
|
92
|
+
if (result == OperatorResultType::FINISHED) {
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
if (result == OperatorResultType::NEED_MORE_INPUT) {
|
|
96
|
+
// we finished processing this row: move to the next row
|
|
97
|
+
state.new_row = true;
|
|
98
|
+
}
|
|
99
|
+
return OperatorResultType::HAVE_MORE_OUTPUT;
|
|
53
100
|
}
|
|
54
101
|
|
|
55
102
|
OperatorFinalizeResultType PhysicalTableInOutFunction::FinalExecute(ExecutionContext &context, DataChunk &chunk,
|
|
@@ -57,6 +104,9 @@ OperatorFinalizeResultType PhysicalTableInOutFunction::FinalExecute(ExecutionCon
|
|
|
57
104
|
OperatorState &state_p) const {
|
|
58
105
|
auto &gstate = (TableInOutGlobalState &)gstate_p;
|
|
59
106
|
auto &state = (TableInOutLocalState &)state_p;
|
|
107
|
+
if (!projected_input.empty()) {
|
|
108
|
+
throw InternalException("FinalExecute not supported for project_input");
|
|
109
|
+
}
|
|
60
110
|
TableFunctionInput data(bind_data.get(), state.local_state.get(), gstate.global_state.get());
|
|
61
111
|
return function.in_out_function_final(context, data, chunk);
|
|
62
112
|
}
|
|
@@ -89,6 +89,7 @@ static void UnnestValidity(UnifiedVectorFormat &vdata, idx_t start, idx_t end, V
|
|
|
89
89
|
|
|
90
90
|
static void UnnestVector(UnifiedVectorFormat &vdata, Vector &source, idx_t list_size, idx_t start, idx_t end,
|
|
91
91
|
Vector &result) {
|
|
92
|
+
D_ASSERT(source.GetType() == result.GetType());
|
|
92
93
|
switch (result.GetType().InternalType()) {
|
|
93
94
|
case PhysicalType::BOOL:
|
|
94
95
|
case PhysicalType::INT8:
|
|
@@ -33,11 +33,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
|
|
|
33
33
|
if (!op.children.empty()) {
|
|
34
34
|
// this is for table producing functions that consume subquery results
|
|
35
35
|
D_ASSERT(op.children.size() == 1);
|
|
36
|
-
auto node = make_unique<PhysicalTableInOutFunction>(op.
|
|
37
|
-
op.
|
|
36
|
+
auto node = make_unique<PhysicalTableInOutFunction>(op.types, op.function, move(op.bind_data), op.column_ids,
|
|
37
|
+
op.estimated_cardinality, move(op.projected_input));
|
|
38
38
|
node->children.push_back(CreatePlan(move(op.children[0])));
|
|
39
39
|
return move(node);
|
|
40
40
|
}
|
|
41
|
+
if (!op.projected_input.empty()) {
|
|
42
|
+
throw InternalException("LogicalGet::project_input can only be set for table-in-out functions");
|
|
43
|
+
}
|
|
41
44
|
|
|
42
45
|
unique_ptr<TableFilterSet> table_filters;
|
|
43
46
|
if (!op.table_filters.filters.empty()) {
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev735"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "b85fb31ebf"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -203,6 +203,21 @@ public:
|
|
|
203
203
|
return source.Read<T>();
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
+
template <class T, class CONTAINER_TYPE = vector<T>>
|
|
207
|
+
bool ReadList(CONTAINER_TYPE &result) {
|
|
208
|
+
if (field_count >= max_field_count) {
|
|
209
|
+
// field is not there, return false and leave the result empty
|
|
210
|
+
return false;
|
|
211
|
+
}
|
|
212
|
+
AddField();
|
|
213
|
+
auto result_count = source.Read<uint32_t>();
|
|
214
|
+
result.reserve(result_count);
|
|
215
|
+
for (idx_t i = 0; i < result_count; i++) {
|
|
216
|
+
result.push_back(source.Read<T>());
|
|
217
|
+
}
|
|
218
|
+
return true;
|
|
219
|
+
}
|
|
220
|
+
|
|
206
221
|
template <class T, class CONTAINER_TYPE = vector<T>>
|
|
207
222
|
CONTAINER_TYPE ReadRequiredList() {
|
|
208
223
|
if (field_count >= max_field_count) {
|
package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp
CHANGED
|
@@ -19,7 +19,7 @@ class PhysicalTableInOutFunction : public PhysicalOperator {
|
|
|
19
19
|
public:
|
|
20
20
|
PhysicalTableInOutFunction(vector<LogicalType> types, TableFunction function_p,
|
|
21
21
|
unique_ptr<FunctionData> bind_data_p, vector<column_t> column_ids_p,
|
|
22
|
-
idx_t estimated_cardinality);
|
|
22
|
+
idx_t estimated_cardinality, vector<column_t> projected_input);
|
|
23
23
|
|
|
24
24
|
public:
|
|
25
25
|
unique_ptr<OperatorState> GetOperatorState(ExecutionContext &context) const override;
|
|
@@ -42,8 +42,10 @@ private:
|
|
|
42
42
|
TableFunction function;
|
|
43
43
|
//! Bind data of the function
|
|
44
44
|
unique_ptr<FunctionData> bind_data;
|
|
45
|
-
|
|
45
|
+
//! The set of column ids to fetch
|
|
46
46
|
vector<column_t> column_ids;
|
|
47
|
+
//! The set of input columns to project out
|
|
48
|
+
vector<column_t> projected_input;
|
|
47
49
|
};
|
|
48
50
|
|
|
49
51
|
} // namespace duckdb
|
|
@@ -44,6 +44,8 @@ public:
|
|
|
44
44
|
vector<LogicalType> input_table_types;
|
|
45
45
|
//! The set of named input table names for the table-in table-out function
|
|
46
46
|
vector<string> input_table_names;
|
|
47
|
+
//! For a table-in-out function, the set of projected input columns
|
|
48
|
+
vector<column_t> projected_input;
|
|
47
49
|
|
|
48
50
|
string GetName() const override;
|
|
49
51
|
string ParamsToString() const override;
|
|
@@ -215,6 +215,9 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
|
|
|
215
215
|
LogicalOperatorVisitor::VisitOperatorExpressions(op);
|
|
216
216
|
if (!everything_referenced) {
|
|
217
217
|
auto &get = (LogicalGet &)op;
|
|
218
|
+
if (!get.function.projection_pushdown) {
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
218
221
|
|
|
219
222
|
// Create "selection vector" of all column ids
|
|
220
223
|
vector<idx_t> proj_sel;
|
|
@@ -133,8 +133,11 @@ void PipelineExecutor::FlushCachingOperatorsPull(DataChunk &result) {
|
|
|
133
133
|
finalize_result = cached_final_execute_result;
|
|
134
134
|
} else {
|
|
135
135
|
// Flush the current operator
|
|
136
|
-
|
|
137
|
-
|
|
136
|
+
auto current_operator = pipeline.operators[op_idx];
|
|
137
|
+
StartOperator(current_operator);
|
|
138
|
+
finalize_result = current_operator->FinalExecute(context, curr_chunk, *current_operator->op_state,
|
|
139
|
+
*intermediate_states[op_idx]);
|
|
140
|
+
EndOperator(current_operator, &curr_chunk);
|
|
138
141
|
}
|
|
139
142
|
|
|
140
143
|
auto execute_result = Execute(curr_chunk, result, op_idx + 1);
|
|
@@ -171,8 +174,11 @@ void PipelineExecutor::FlushCachingOperatorsPush() {
|
|
|
171
174
|
do {
|
|
172
175
|
auto &curr_chunk =
|
|
173
176
|
op_idx + 1 >= intermediate_chunks.size() ? final_chunk : *intermediate_chunks[op_idx + 1];
|
|
174
|
-
|
|
175
|
-
|
|
177
|
+
auto current_operator = pipeline.operators[op_idx];
|
|
178
|
+
StartOperator(current_operator);
|
|
179
|
+
finalize_result = current_operator->FinalExecute(context, curr_chunk, *current_operator->op_state,
|
|
180
|
+
*intermediate_states[op_idx]);
|
|
181
|
+
EndOperator(current_operator, &curr_chunk);
|
|
176
182
|
push_result = ExecutePushInternal(curr_chunk, op_idx + 1);
|
|
177
183
|
} while (finalize_result != OperatorFinalizeResultType::FINISHED &&
|
|
178
184
|
push_result != OperatorResultType::FINISHED);
|
|
@@ -5,9 +5,6 @@
|
|
|
5
5
|
namespace duckdb {
|
|
6
6
|
|
|
7
7
|
unique_ptr<TableRef> Transformer::TransformRangeFunction(duckdb_libpgquery::PGRangeFunction *root) {
|
|
8
|
-
if (root->lateral) {
|
|
9
|
-
throw NotImplementedException("LATERAL not implemented");
|
|
10
|
-
}
|
|
11
8
|
if (root->ordinality) {
|
|
12
9
|
throw NotImplementedException("WITH ORDINALITY not implemented");
|
|
13
10
|
}
|
|
@@ -156,6 +156,12 @@ Binder::BindTableFunctionInternal(TableFunction &table_function, const string &f
|
|
|
156
156
|
get->named_parameters = named_parameters;
|
|
157
157
|
get->input_table_types = input_table_types;
|
|
158
158
|
get->input_table_names = input_table_names;
|
|
159
|
+
if (table_function.in_out_function && !table_function.projection_pushdown) {
|
|
160
|
+
get->column_ids.reserve(return_types.size());
|
|
161
|
+
for (idx_t i = 0; i < return_types.size(); i++) {
|
|
162
|
+
get->column_ids.push_back(i);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
159
165
|
// now add the table function to the bind context so its columns can be bound
|
|
160
166
|
bind_context.AddTableFunction(bind_index, function_name, return_names, return_types, get->column_ids,
|
|
161
167
|
get->GetTable());
|
|
@@ -54,6 +54,16 @@ vector<ColumnBinding> LogicalGet::GetColumnBindings() {
|
|
|
54
54
|
result.emplace_back(table_index, proj_id);
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
+
if (!projected_input.empty()) {
|
|
58
|
+
if (children.size() != 1) {
|
|
59
|
+
throw InternalException("LogicalGet::project_input can only be set for table-in-out functions");
|
|
60
|
+
}
|
|
61
|
+
auto child_bindings = children[0]->GetColumnBindings();
|
|
62
|
+
for (auto entry : projected_input) {
|
|
63
|
+
D_ASSERT(entry < child_bindings.size());
|
|
64
|
+
result.emplace_back(child_bindings[entry]);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
57
67
|
return result;
|
|
58
68
|
}
|
|
59
69
|
|
|
@@ -80,6 +90,15 @@ void LogicalGet::ResolveTypes() {
|
|
|
80
90
|
}
|
|
81
91
|
}
|
|
82
92
|
}
|
|
93
|
+
if (!projected_input.empty()) {
|
|
94
|
+
if (children.size() != 1) {
|
|
95
|
+
throw InternalException("LogicalGet::project_input can only be set for table-in-out functions");
|
|
96
|
+
}
|
|
97
|
+
for (auto entry : projected_input) {
|
|
98
|
+
D_ASSERT(entry < children[0]->types.size());
|
|
99
|
+
types.push_back(children[0]->types[entry]);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
83
102
|
}
|
|
84
103
|
|
|
85
104
|
idx_t LogicalGet::EstimateCardinality(ClientContext &context) {
|
|
@@ -113,6 +132,7 @@ void LogicalGet::Serialize(FieldWriter &writer) const {
|
|
|
113
132
|
writer.WriteRegularSerializableList(input_table_types);
|
|
114
133
|
writer.WriteList<string>(input_table_names);
|
|
115
134
|
}
|
|
135
|
+
writer.WriteList<column_t>(projected_input);
|
|
116
136
|
}
|
|
117
137
|
|
|
118
138
|
unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState &state, FieldReader &reader) {
|
|
@@ -162,6 +182,8 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
|
|
|
162
182
|
"Table function deserialization failure - bind returned different returned names than were serialized");
|
|
163
183
|
}
|
|
164
184
|
}
|
|
185
|
+
vector<column_t> projected_input;
|
|
186
|
+
reader.ReadList<column_t>(projected_input);
|
|
165
187
|
|
|
166
188
|
auto result = make_unique<LogicalGet>(table_index, function, move(bind_data), returned_types, returned_names);
|
|
167
189
|
result->column_ids = move(column_ids);
|
|
@@ -171,6 +193,7 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
|
|
|
171
193
|
result->named_parameters = move(named_parameters);
|
|
172
194
|
result->input_table_types = input_table_types;
|
|
173
195
|
result->input_table_names = input_table_names;
|
|
196
|
+
result->projected_input = move(projected_input);
|
|
174
197
|
return move(result);
|
|
175
198
|
}
|
|
176
199
|
|
|
@@ -82,8 +82,8 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
82
82
|
auto left_columns = plan->GetColumnBindings().size();
|
|
83
83
|
auto delim_index = binder.GenerateTableIndex();
|
|
84
84
|
this->base_binding = ColumnBinding(delim_index, 0);
|
|
85
|
-
this->delim_offset =
|
|
86
|
-
this->data_offset =
|
|
85
|
+
this->delim_offset = left_columns;
|
|
86
|
+
this->data_offset = 0;
|
|
87
87
|
auto delim_scan = make_unique<LogicalDelimGet>(delim_index, delim_types);
|
|
88
88
|
return LogicalCrossProduct::Create(move(plan), move(delim_scan));
|
|
89
89
|
}
|
|
@@ -505,8 +505,19 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
505
505
|
case LogicalOperatorType::LOGICAL_ORDER_BY:
|
|
506
506
|
plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
|
|
507
507
|
return plan;
|
|
508
|
-
case LogicalOperatorType::LOGICAL_GET:
|
|
509
|
-
|
|
508
|
+
case LogicalOperatorType::LOGICAL_GET: {
|
|
509
|
+
auto &get = (LogicalGet &)*plan;
|
|
510
|
+
if (get.children.size() != 1) {
|
|
511
|
+
throw InternalException("Flatten dependent joins - logical get encountered without children");
|
|
512
|
+
}
|
|
513
|
+
plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
|
|
514
|
+
for (idx_t i = 0; i < (perform_delim ? correlated_columns.size() : 1); i++) {
|
|
515
|
+
get.projected_input.push_back(this->delim_offset + i);
|
|
516
|
+
}
|
|
517
|
+
this->delim_offset = get.returned_types.size();
|
|
518
|
+
this->data_offset = 0;
|
|
519
|
+
return plan;
|
|
520
|
+
}
|
|
510
521
|
case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: {
|
|
511
522
|
throw BinderException("Recursive CTEs not supported in correlated subquery");
|
|
512
523
|
}
|