duckdb 0.6.2-dev712.0 → 0.6.2-dev733.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev712.0",
5
+ "version": "0.6.2-dev733.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -4,10 +4,13 @@ namespace duckdb {
4
4
 
5
5
  class TableInOutLocalState : public OperatorState {
6
6
  public:
7
- TableInOutLocalState() {
7
+ TableInOutLocalState() : row_index(0), new_row(true) {
8
8
  }
9
9
 
10
10
  unique_ptr<LocalTableFunctionState> local_state;
11
+ idx_t row_index;
12
+ bool new_row;
13
+ DataChunk input_chunk;
11
14
  };
12
15
 
13
16
  class TableInOutGlobalState : public GlobalOperatorState {
@@ -20,9 +23,11 @@ public:
20
23
 
21
24
  PhysicalTableInOutFunction::PhysicalTableInOutFunction(vector<LogicalType> types, TableFunction function_p,
22
25
  unique_ptr<FunctionData> bind_data_p,
23
- vector<column_t> column_ids_p, idx_t estimated_cardinality)
26
+ vector<column_t> column_ids_p, idx_t estimated_cardinality,
27
+ vector<column_t> project_input_p)
24
28
  : PhysicalOperator(PhysicalOperatorType::INOUT_FUNCTION, move(types), estimated_cardinality),
25
- function(move(function_p)), bind_data(move(bind_data_p)), column_ids(move(column_ids_p)) {
29
+ function(move(function_p)), bind_data(move(bind_data_p)), column_ids(move(column_ids_p)),
30
+ projected_input(move(project_input_p)) {
26
31
  }
27
32
 
28
33
  unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(ExecutionContext &context) const {
@@ -32,6 +37,9 @@ unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(Execution
32
37
  TableFunctionInitInput input(bind_data.get(), column_ids, vector<idx_t>(), nullptr);
33
38
  result->local_state = function.init_local(context, input, gstate.global_state.get());
34
39
  }
40
+ if (!projected_input.empty()) {
41
+ result->input_chunk.Initialize(context.client, children[0]->types);
42
+ }
35
43
  return move(result);
36
44
  }
37
45
 
@@ -49,7 +57,46 @@ OperatorResultType PhysicalTableInOutFunction::Execute(ExecutionContext &context
49
57
  auto &gstate = (TableInOutGlobalState &)gstate_p;
50
58
  auto &state = (TableInOutLocalState &)state_p;
51
59
  TableFunctionInput data(bind_data.get(), state.local_state.get(), gstate.global_state.get());
52
- return function.in_out_function(context, data, input, chunk);
60
+ if (projected_input.empty()) {
61
+ // straightforward case - no need to project input
62
+ return function.in_out_function(context, data, input, chunk);
63
+ }
64
+ // when project_input is set we execute the input function row-by-row
65
+ if (state.new_row) {
66
+ if (state.row_index >= input.size()) {
67
+ // finished processing this chunk
68
+ state.new_row = true;
69
+ state.row_index = 0;
70
+ return OperatorResultType::NEED_MORE_INPUT;
71
+ }
72
+ // we are processing a new row: fetch the data for the current row
73
+ D_ASSERT(input.ColumnCount() == state.input_chunk.ColumnCount());
74
+ // set up the input data to the table in-out function
75
+ for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) {
76
+ ConstantVector::Reference(state.input_chunk.data[col_idx], input.data[col_idx], state.row_index, 1);
77
+ }
78
+ state.input_chunk.SetCardinality(1);
79
+ state.row_index++;
80
+ state.new_row = false;
81
+ }
82
+ // set up the output data in "chunk"
83
+ D_ASSERT(chunk.ColumnCount() > projected_input.size());
84
+ D_ASSERT(state.row_index > 0);
85
+ idx_t base_idx = chunk.ColumnCount() - projected_input.size();
86
+ for (idx_t project_idx = 0; project_idx < projected_input.size(); project_idx++) {
87
+ auto source_idx = projected_input[project_idx];
88
+ auto target_idx = base_idx + project_idx;
89
+ ConstantVector::Reference(chunk.data[target_idx], input.data[source_idx], state.row_index - 1, 1);
90
+ }
91
+ auto result = function.in_out_function(context, data, state.input_chunk, chunk);
92
+ if (result == OperatorResultType::FINISHED) {
93
+ return result;
94
+ }
95
+ if (result == OperatorResultType::NEED_MORE_INPUT) {
96
+ // we finished processing this row: move to the next row
97
+ state.new_row = true;
98
+ }
99
+ return OperatorResultType::HAVE_MORE_OUTPUT;
53
100
  }
54
101
 
55
102
  OperatorFinalizeResultType PhysicalTableInOutFunction::FinalExecute(ExecutionContext &context, DataChunk &chunk,
@@ -57,6 +104,9 @@ OperatorFinalizeResultType PhysicalTableInOutFunction::FinalExecute(ExecutionCon
57
104
  OperatorState &state_p) const {
58
105
  auto &gstate = (TableInOutGlobalState &)gstate_p;
59
106
  auto &state = (TableInOutLocalState &)state_p;
107
+ if (!projected_input.empty()) {
108
+ throw InternalException("FinalExecute not supported for project_input");
109
+ }
60
110
  TableFunctionInput data(bind_data.get(), state.local_state.get(), gstate.global_state.get());
61
111
  return function.in_out_function_final(context, data, chunk);
62
112
  }
@@ -89,6 +89,7 @@ static void UnnestValidity(UnifiedVectorFormat &vdata, idx_t start, idx_t end, V
89
89
 
90
90
  static void UnnestVector(UnifiedVectorFormat &vdata, Vector &source, idx_t list_size, idx_t start, idx_t end,
91
91
  Vector &result) {
92
+ D_ASSERT(source.GetType() == result.GetType());
92
93
  switch (result.GetType().InternalType()) {
93
94
  case PhysicalType::BOOL:
94
95
  case PhysicalType::INT8:
@@ -33,11 +33,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
33
33
  if (!op.children.empty()) {
34
34
  // this is for table producing functions that consume subquery results
35
35
  D_ASSERT(op.children.size() == 1);
36
- auto node = make_unique<PhysicalTableInOutFunction>(op.returned_types, op.function, move(op.bind_data),
37
- op.column_ids, op.estimated_cardinality);
36
+ auto node = make_unique<PhysicalTableInOutFunction>(op.types, op.function, move(op.bind_data), op.column_ids,
37
+ op.estimated_cardinality, move(op.projected_input));
38
38
  node->children.push_back(CreatePlan(move(op.children[0])));
39
39
  return move(node);
40
40
  }
41
+ if (!op.projected_input.empty()) {
42
+ throw InternalException("LogicalGet::project_input can only be set for table-in-out functions");
43
+ }
41
44
 
42
45
  unique_ptr<TableFilterSet> table_filters;
43
46
  if (!op.table_filters.filters.empty()) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev712"
2
+ #define DUCKDB_VERSION "0.6.2-dev733"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "4db88c5e6b"
5
+ #define DUCKDB_SOURCE_ID "cbe7a7fdc7"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -203,6 +203,21 @@ public:
203
203
  return source.Read<T>();
204
204
  }
205
205
 
206
+ template <class T, class CONTAINER_TYPE = vector<T>>
207
+ bool ReadList(CONTAINER_TYPE &result) {
208
+ if (field_count >= max_field_count) {
209
+ // field is not there, return false and leave the result empty
210
+ return false;
211
+ }
212
+ AddField();
213
+ auto result_count = source.Read<uint32_t>();
214
+ result.reserve(result_count);
215
+ for (idx_t i = 0; i < result_count; i++) {
216
+ result.push_back(source.Read<T>());
217
+ }
218
+ return true;
219
+ }
220
+
206
221
  template <class T, class CONTAINER_TYPE = vector<T>>
207
222
  CONTAINER_TYPE ReadRequiredList() {
208
223
  if (field_count >= max_field_count) {
@@ -19,7 +19,7 @@ class PhysicalTableInOutFunction : public PhysicalOperator {
19
19
  public:
20
20
  PhysicalTableInOutFunction(vector<LogicalType> types, TableFunction function_p,
21
21
  unique_ptr<FunctionData> bind_data_p, vector<column_t> column_ids_p,
22
- idx_t estimated_cardinality);
22
+ idx_t estimated_cardinality, vector<column_t> projected_input);
23
23
 
24
24
  public:
25
25
  unique_ptr<OperatorState> GetOperatorState(ExecutionContext &context) const override;
@@ -42,8 +42,10 @@ private:
42
42
  TableFunction function;
43
43
  //! Bind data of the function
44
44
  unique_ptr<FunctionData> bind_data;
45
-
45
+ //! The set of column ids to fetch
46
46
  vector<column_t> column_ids;
47
+ //! The set of input columns to project out
48
+ vector<column_t> projected_input;
47
49
  };
48
50
 
49
51
  } // namespace duckdb
@@ -57,6 +57,9 @@ public:
57
57
  static void ReplacementOpenPost(ClientContext &context, const string &extension, DatabaseInstance &instance,
58
58
  ReplacementOpenData *open_data);
59
59
 
60
+ // Returns extension name, or empty string if not a replacement open path
61
+ static string ExtractExtensionPrefixFromPath(const string &path);
62
+
60
63
  private:
61
64
  static const vector<string> PathComponents();
62
65
  static ExtensionInitResult InitialLoad(DBConfig &context, FileOpener *opener, const string &extension);
@@ -44,6 +44,8 @@ public:
44
44
  vector<LogicalType> input_table_types;
45
45
  //! The set of named input table names for the table-in table-out function
46
46
  vector<string> input_table_names;
47
+ //! For a table-in-out function, the set of projected input columns
48
+ vector<column_t> projected_input;
47
49
 
48
50
  string GetName() const override;
49
51
  string ParamsToString() const override;
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/main/db_instance_cache.hpp"
2
-
2
+ #include "duckdb/main/extension_helper.hpp"
3
3
  namespace duckdb {
4
4
 
5
5
  string GetDBAbsolutePath(const string &database) {
@@ -10,6 +10,10 @@ string GetDBAbsolutePath(const string &database) {
10
10
  // this is a memory db, just return it.
11
11
  return database;
12
12
  }
13
+ if (!ExtensionHelper::ExtractExtensionPrefixFromPath(database).empty()) {
14
+ // this database path is handled by a replacement open and is not a file path
15
+ return database;
16
+ }
13
17
  if (FileSystem::IsPathAbsolute(database)) {
14
18
  return database;
15
19
  }
@@ -176,4 +176,20 @@ void ExtensionHelper::ReplacementOpenPost(ClientContext &context, const string &
176
176
  }
177
177
  }
178
178
 
179
+ string ExtensionHelper::ExtractExtensionPrefixFromPath(const string &path) {
180
+ auto first_colon = path.find(':');
181
+ if (first_colon == string::npos || first_colon < 2) { // needs to be at least two characters because windows c: ...
182
+ return "";
183
+ }
184
+ auto extension = path.substr(0, first_colon);
185
+ D_ASSERT(extension.size() > 1);
186
+ // needs to be alphanumeric
187
+ for (auto &ch : extension) {
188
+ if (!isalnum(ch) && ch != '_') {
189
+ return "";
190
+ }
191
+ }
192
+ return extension;
193
+ }
194
+
179
195
  } // namespace duckdb
@@ -16,18 +16,10 @@ struct ExtensionPrefixOpenData : public ReplacementOpenData {
16
16
 
17
17
  static unique_ptr<ReplacementOpenData> ExtensionPrefixPreOpen(DBConfig &config, ReplacementOpenStaticData *) {
18
18
  auto path = config.options.database_path;
19
- auto first_colon = path.find(':');
20
- if (first_colon == string::npos || first_colon < 2) { // needs to be at least two characters because windows c: ...
19
+ string extension = ExtensionHelper::ExtractExtensionPrefixFromPath(path);
20
+ if (extension.empty()) {
21
21
  return nullptr;
22
22
  }
23
- auto extension = path.substr(0, first_colon);
24
- D_ASSERT(extension.size() > 1);
25
- // needs to be alphanumeric
26
- for (auto &ch : extension) {
27
- if (!isalnum(ch) && ch != '_') {
28
- return nullptr;
29
- }
30
- }
31
23
  auto extension_data = ExtensionHelper::ReplacementOpenPre(extension, config);
32
24
  if (extension_data) {
33
25
  return make_unique<ExtensionPrefixOpenData>(extension, path, move(extension_data));
@@ -215,6 +215,9 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
215
215
  LogicalOperatorVisitor::VisitOperatorExpressions(op);
216
216
  if (!everything_referenced) {
217
217
  auto &get = (LogicalGet &)op;
218
+ if (!get.function.projection_pushdown) {
219
+ return;
220
+ }
218
221
 
219
222
  // Create "selection vector" of all column ids
220
223
  vector<idx_t> proj_sel;
@@ -133,8 +133,11 @@ void PipelineExecutor::FlushCachingOperatorsPull(DataChunk &result) {
133
133
  finalize_result = cached_final_execute_result;
134
134
  } else {
135
135
  // Flush the current operator
136
- finalize_result = pipeline.operators[op_idx]->FinalExecute(
137
- context, curr_chunk, *pipeline.operators[op_idx]->op_state, *intermediate_states[op_idx]);
136
+ auto current_operator = pipeline.operators[op_idx];
137
+ StartOperator(current_operator);
138
+ finalize_result = current_operator->FinalExecute(context, curr_chunk, *current_operator->op_state,
139
+ *intermediate_states[op_idx]);
140
+ EndOperator(current_operator, &curr_chunk);
138
141
  }
139
142
 
140
143
  auto execute_result = Execute(curr_chunk, result, op_idx + 1);
@@ -171,8 +174,11 @@ void PipelineExecutor::FlushCachingOperatorsPush() {
171
174
  do {
172
175
  auto &curr_chunk =
173
176
  op_idx + 1 >= intermediate_chunks.size() ? final_chunk : *intermediate_chunks[op_idx + 1];
174
- finalize_result = pipeline.operators[op_idx]->FinalExecute(
175
- context, curr_chunk, *pipeline.operators[op_idx]->op_state, *intermediate_states[op_idx]);
177
+ auto current_operator = pipeline.operators[op_idx];
178
+ StartOperator(current_operator);
179
+ finalize_result = current_operator->FinalExecute(context, curr_chunk, *current_operator->op_state,
180
+ *intermediate_states[op_idx]);
181
+ EndOperator(current_operator, &curr_chunk);
176
182
  push_result = ExecutePushInternal(curr_chunk, op_idx + 1);
177
183
  } while (finalize_result != OperatorFinalizeResultType::FINISHED &&
178
184
  push_result != OperatorResultType::FINISHED);
@@ -5,9 +5,6 @@
5
5
  namespace duckdb {
6
6
 
7
7
  unique_ptr<TableRef> Transformer::TransformRangeFunction(duckdb_libpgquery::PGRangeFunction *root) {
8
- if (root->lateral) {
9
- throw NotImplementedException("LATERAL not implemented");
10
- }
11
8
  if (root->ordinality) {
12
9
  throw NotImplementedException("WITH ORDINALITY not implemented");
13
10
  }
@@ -156,6 +156,12 @@ Binder::BindTableFunctionInternal(TableFunction &table_function, const string &f
156
156
  get->named_parameters = named_parameters;
157
157
  get->input_table_types = input_table_types;
158
158
  get->input_table_names = input_table_names;
159
+ if (table_function.in_out_function && !table_function.projection_pushdown) {
160
+ get->column_ids.reserve(return_types.size());
161
+ for (idx_t i = 0; i < return_types.size(); i++) {
162
+ get->column_ids.push_back(i);
163
+ }
164
+ }
159
165
  // now add the table function to the bind context so its columns can be bound
160
166
  bind_context.AddTableFunction(bind_index, function_name, return_names, return_types, get->column_ids,
161
167
  get->GetTable());
@@ -54,6 +54,16 @@ vector<ColumnBinding> LogicalGet::GetColumnBindings() {
54
54
  result.emplace_back(table_index, proj_id);
55
55
  }
56
56
  }
57
+ if (!projected_input.empty()) {
58
+ if (children.size() != 1) {
59
+ throw InternalException("LogicalGet::project_input can only be set for table-in-out functions");
60
+ }
61
+ auto child_bindings = children[0]->GetColumnBindings();
62
+ for (auto entry : projected_input) {
63
+ D_ASSERT(entry < child_bindings.size());
64
+ result.emplace_back(child_bindings[entry]);
65
+ }
66
+ }
57
67
  return result;
58
68
  }
59
69
 
@@ -80,6 +90,15 @@ void LogicalGet::ResolveTypes() {
80
90
  }
81
91
  }
82
92
  }
93
+ if (!projected_input.empty()) {
94
+ if (children.size() != 1) {
95
+ throw InternalException("LogicalGet::project_input can only be set for table-in-out functions");
96
+ }
97
+ for (auto entry : projected_input) {
98
+ D_ASSERT(entry < children[0]->types.size());
99
+ types.push_back(children[0]->types[entry]);
100
+ }
101
+ }
83
102
  }
84
103
 
85
104
  idx_t LogicalGet::EstimateCardinality(ClientContext &context) {
@@ -113,6 +132,7 @@ void LogicalGet::Serialize(FieldWriter &writer) const {
113
132
  writer.WriteRegularSerializableList(input_table_types);
114
133
  writer.WriteList<string>(input_table_names);
115
134
  }
135
+ writer.WriteList<column_t>(projected_input);
116
136
  }
117
137
 
118
138
  unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState &state, FieldReader &reader) {
@@ -162,6 +182,8 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
162
182
  "Table function deserialization failure - bind returned different returned names than were serialized");
163
183
  }
164
184
  }
185
+ vector<column_t> projected_input;
186
+ reader.ReadList<column_t>(projected_input);
165
187
 
166
188
  auto result = make_unique<LogicalGet>(table_index, function, move(bind_data), returned_types, returned_names);
167
189
  result->column_ids = move(column_ids);
@@ -171,6 +193,7 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
171
193
  result->named_parameters = move(named_parameters);
172
194
  result->input_table_types = input_table_types;
173
195
  result->input_table_names = input_table_names;
196
+ result->projected_input = move(projected_input);
174
197
  return move(result);
175
198
  }
176
199
 
@@ -82,8 +82,8 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
82
82
  auto left_columns = plan->GetColumnBindings().size();
83
83
  auto delim_index = binder.GenerateTableIndex();
84
84
  this->base_binding = ColumnBinding(delim_index, 0);
85
- this->delim_offset = 0;
86
- this->data_offset = left_columns;
85
+ this->delim_offset = left_columns;
86
+ this->data_offset = 0;
87
87
  auto delim_scan = make_unique<LogicalDelimGet>(delim_index, delim_types);
88
88
  return LogicalCrossProduct::Create(move(plan), move(delim_scan));
89
89
  }
@@ -505,8 +505,19 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
505
505
  case LogicalOperatorType::LOGICAL_ORDER_BY:
506
506
  plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
507
507
  return plan;
508
- case LogicalOperatorType::LOGICAL_GET:
509
- throw BinderException("Table-in table-out functions not (yet) supported in correlated subqueries");
508
+ case LogicalOperatorType::LOGICAL_GET: {
509
+ auto &get = (LogicalGet &)*plan;
510
+ if (get.children.size() != 1) {
511
+ throw InternalException("Flatten dependent joins - logical get encountered without children");
512
+ }
513
+ plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
514
+ for (idx_t i = 0; i < (perform_delim ? correlated_columns.size() : 1); i++) {
515
+ get.projected_input.push_back(this->delim_offset + i);
516
+ }
517
+ this->delim_offset = get.returned_types.size();
518
+ this->data_offset = 0;
519
+ return plan;
520
+ }
510
521
  case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: {
511
522
  throw BinderException("Recursive CTEs not supported in correlated subquery");
512
523
  }