duckdb 0.5.2-dev494.0 → 0.5.2-dev512.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev494.0",
4
+ "version": "0.5.2-dev512.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -42766,7 +42766,7 @@ void DataChunk::Fuse(DataChunk &other) {
42766
42766
  other.Destroy();
42767
42767
  }
42768
42768
 
42769
- void DataChunk::ReferenceColumns(DataChunk &other, vector<column_t> column_ids) {
42769
+ void DataChunk::ReferenceColumns(DataChunk &other, const vector<column_t> &column_ids) {
42770
42770
  D_ASSERT(ColumnCount() == column_ids.size());
42771
42771
  Reset();
42772
42772
  for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) {
@@ -72428,16 +72428,25 @@ namespace duckdb {
72428
72428
  //! Represents a scan of a base table
72429
72429
  class PhysicalTableScan : public PhysicalOperator {
72430
72430
  public:
72431
+ //! Regular Table Scan
72431
72432
  PhysicalTableScan(vector<LogicalType> types, TableFunction function, unique_ptr<FunctionData> bind_data,
72432
72433
  vector<column_t> column_ids, vector<string> names, unique_ptr<TableFilterSet> table_filters,
72433
72434
  idx_t estimated_cardinality);
72435
+ //! Table scan that immediately projects out filter columns that are unused in the remainder of the query plan
72436
+ PhysicalTableScan(vector<LogicalType> types, TableFunction function, unique_ptr<FunctionData> bind_data,
72437
+ vector<LogicalType> returned_types, vector<column_t> column_ids, vector<idx_t> projection_ids,
72438
+ vector<string> names, unique_ptr<TableFilterSet> table_filters, idx_t estimated_cardinality);
72434
72439
 
72435
72440
  //! The table function
72436
72441
  TableFunction function;
72437
72442
  //! Bind data of the function
72438
72443
  unique_ptr<FunctionData> bind_data;
72439
- //! The projected-out column ids
72444
+ //! The types of ALL columns that can be returned by the table function
72445
+ vector<LogicalType> returned_types;
72446
+ //! The column ids used within the table function
72440
72447
  vector<column_t> column_ids;
72448
+ //! The projected-out column ids
72449
+ vector<idx_t> projection_ids;
72441
72450
  //! The names of the columns
72442
72451
  vector<string> names;
72443
72452
  //! The table filters
@@ -74665,11 +74674,7 @@ SinkResultType PhysicalOrder::Sink(ExecutionContext &context, GlobalSinkState &g
74665
74674
  lstate.key_executor.Execute(input, keys);
74666
74675
 
74667
74676
  auto &payload = lstate.payload;
74668
- payload.Reset();
74669
- for (idx_t col_idx = 0; col_idx < projections.size(); col_idx++) {
74670
- payload.data[col_idx].Reference(input.data[projections[col_idx]]);
74671
- }
74672
- payload.SetCardinality(input.size());
74677
+ payload.ReferenceColumns(input, projections);
74673
74678
 
74674
74679
  // Sink the data into the local sort state
74675
74680
  keys.Verify();
@@ -78642,7 +78647,7 @@ unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(Execution
78642
78647
  auto &gstate = (TableInOutGlobalState &)*op_state;
78643
78648
  auto result = make_unique<TableInOutLocalState>();
78644
78649
  if (function.init_local) {
78645
- TableFunctionInitInput input(bind_data.get(), column_ids, nullptr);
78650
+ TableFunctionInitInput input(bind_data.get(), column_ids, vector<idx_t>(), nullptr);
78646
78651
  result->local_state = function.init_local(context, input, gstate.global_state.get());
78647
78652
  }
78648
78653
  return move(result);
@@ -78651,7 +78656,7 @@ unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(Execution
78651
78656
  unique_ptr<GlobalOperatorState> PhysicalTableInOutFunction::GetGlobalOperatorState(ClientContext &context) const {
78652
78657
  auto result = make_unique<TableInOutGlobalState>();
78653
78658
  if (function.init_global) {
78654
- TableFunctionInitInput input(bind_data.get(), column_ids, nullptr);
78659
+ TableFunctionInitInput input(bind_data.get(), column_ids, vector<idx_t>(), nullptr);
78655
78660
  result->global_state = function.init_global(context, input);
78656
78661
  }
78657
78662
  return move(result);
@@ -79299,11 +79304,22 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
79299
79304
  table_filters(move(table_filters_p)) {
79300
79305
  }
79301
79306
 
79307
+ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction function_p,
79308
+ unique_ptr<FunctionData> bind_data_p, vector<LogicalType> returned_types_p,
79309
+ vector<column_t> column_ids_p, vector<idx_t> projection_ids_p,
79310
+ vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
79311
+ idx_t estimated_cardinality)
79312
+ : PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, move(types), estimated_cardinality),
79313
+ function(move(function_p)), bind_data(move(bind_data_p)), returned_types(move(returned_types_p)),
79314
+ column_ids(move(column_ids_p)), projection_ids(move(projection_ids_p)), names(move(names_p)),
79315
+ table_filters(move(table_filters_p)) {
79316
+ }
79317
+
79302
79318
  class TableScanGlobalSourceState : public GlobalSourceState {
79303
79319
  public:
79304
79320
  TableScanGlobalSourceState(ClientContext &context, const PhysicalTableScan &op) {
79305
79321
  if (op.function.init_global) {
79306
- TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.table_filters.get());
79322
+ TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.projection_ids, op.table_filters.get());
79307
79323
  global_state = op.function.init_global(context, input);
79308
79324
  if (global_state) {
79309
79325
  max_threads = global_state->MaxThreads();
@@ -79326,7 +79342,7 @@ public:
79326
79342
  TableScanLocalSourceState(ExecutionContext &context, TableScanGlobalSourceState &gstate,
79327
79343
  const PhysicalTableScan &op) {
79328
79344
  if (op.function.init_local) {
79329
- TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.table_filters.get());
79345
+ TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.projection_ids, op.table_filters.get());
79330
79346
  local_state = op.function.init_local(context, input, gstate.global_state.get());
79331
79347
  }
79332
79348
  }
@@ -79383,12 +79399,13 @@ string PhysicalTableScan::ParamsToString() const {
79383
79399
  result += "\n[INFOSEPARATOR]\n";
79384
79400
  }
79385
79401
  if (function.projection_pushdown) {
79386
- for (idx_t i = 0; i < column_ids.size(); i++) {
79387
- if (column_ids[i] < names.size()) {
79402
+ for (idx_t i = 0; i < projection_ids.size(); i++) {
79403
+ const auto &column_id = column_ids[projection_ids[i]];
79404
+ if (column_id < names.size()) {
79388
79405
  if (i > 0) {
79389
79406
  result += "\n";
79390
79407
  }
79391
- result += names[column_ids[i]];
79408
+ result += names[column_id];
79392
79409
  }
79393
79410
  }
79394
79411
  }
@@ -83671,7 +83688,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalFilter &op
83671
83688
 
83672
83689
 
83673
83690
 
83674
-
83675
83691
  namespace duckdb {
83676
83692
 
83677
83693
  unique_ptr<TableFilterSet> CreateTableFilterSet(TableFilterSet &table_filters, vector<column_t> &column_ids) {
@@ -83715,8 +83731,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
83715
83731
  // create the table scan node
83716
83732
  if (!op.function.projection_pushdown) {
83717
83733
  // function does not support projection pushdown
83718
- auto node = make_unique<PhysicalTableScan>(op.returned_types, op.function, move(op.bind_data), op.column_ids,
83719
- op.names, move(table_filters), op.estimated_cardinality);
83734
+ auto node = make_unique<PhysicalTableScan>(op.returned_types, op.function, move(op.bind_data),
83735
+ op.returned_types, op.column_ids, vector<column_t>(), op.names,
83736
+ move(table_filters), op.estimated_cardinality);
83720
83737
  // first check if an additional projection is necessary
83721
83738
  if (op.column_ids.size() == op.returned_types.size()) {
83722
83739
  bool projection_necessary = false;
@@ -83751,8 +83768,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
83751
83768
  projection->children.push_back(move(node));
83752
83769
  return move(projection);
83753
83770
  } else {
83754
- return make_unique<PhysicalTableScan>(op.types, op.function, move(op.bind_data), op.column_ids, op.names,
83755
- move(table_filters), op.estimated_cardinality);
83771
+ return make_unique<PhysicalTableScan>(op.types, op.function, move(op.bind_data), op.returned_types,
83772
+ op.column_ids, op.projection_ids, op.names, move(table_filters),
83773
+ op.estimated_cardinality);
83756
83774
  }
83757
83775
  }
83758
83776
 
@@ -116769,6 +116787,7 @@ string ScalarMacroFunction::ToSQL(const string &schema, const string &name) {
116769
116787
 
116770
116788
 
116771
116789
 
116790
+
116772
116791
  //===----------------------------------------------------------------------===//
116773
116792
  // DuckDB
116774
116793
  //
@@ -116852,6 +116871,7 @@ struct ArrowScanFunctionData : public PyTableFunctionData {
116852
116871
  }
116853
116872
  //! This holds the original list type (col_idx, [ArrowListType,size])
116854
116873
  unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
116874
+ vector<LogicalType> all_types;
116855
116875
  atomic<idx_t> lines_read;
116856
116876
  ArrowSchemaWrapper schema_root;
116857
116877
  idx_t rows_per_thread;
@@ -116872,6 +116892,8 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
116872
116892
  //! Store child vectors for Arrow Dictionary Vectors (col-idx,vector)
116873
116893
  unordered_map<idx_t, unique_ptr<Vector>> arrow_dictionary_vectors;
116874
116894
  TableFilterSet *filters = nullptr;
116895
+ //! The DataChunk containing all read columns (even filter columns that are immediately removed)
116896
+ DataChunk all_columns;
116875
116897
  };
116876
116898
 
116877
116899
  struct ArrowScanGlobalState : public GlobalTableFunctionState {
@@ -116880,9 +116902,16 @@ struct ArrowScanGlobalState : public GlobalTableFunctionState {
116880
116902
  idx_t max_threads = 1;
116881
116903
  bool done = false;
116882
116904
 
116905
+ vector<idx_t> projection_ids;
116906
+ vector<LogicalType> scanned_types;
116907
+
116883
116908
  idx_t MaxThreads() const override {
116884
116909
  return max_threads;
116885
116910
  }
116911
+
116912
+ bool CanRemoveFilterColumns() const {
116913
+ return !projection_ids.empty();
116914
+ }
116886
116915
  };
116887
116916
 
116888
116917
  struct ArrowTableFunction {
@@ -116927,7 +116956,6 @@ private:
116927
116956
 
116928
116957
 
116929
116958
 
116930
-
116931
116959
  namespace duckdb {
116932
116960
 
116933
116961
  LogicalType GetArrowLogicalType(ArrowSchema &schema,
@@ -117139,6 +117167,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
117139
117167
  names.push_back(name);
117140
117168
  }
117141
117169
  RenameArrowColumns(names);
117170
+ res->all_types = return_types;
117142
117171
  return move(res);
117143
117172
  }
117144
117173
 
@@ -117190,6 +117219,16 @@ unique_ptr<GlobalTableFunctionState> ArrowTableFunction::ArrowScanInitGlobal(Cli
117190
117219
  auto result = make_unique<ArrowScanGlobalState>();
117191
117220
  result->stream = ProduceArrowScan(bind_data, input.column_ids, input.filters);
117192
117221
  result->max_threads = ArrowScanMaxThreads(context, input.bind_data);
117222
+ if (input.CanRemoveFilterColumns()) {
117223
+ result->projection_ids = input.projection_ids;
117224
+ for (const auto &col_idx : input.column_ids) {
117225
+ if (col_idx == COLUMN_IDENTIFIER_ROW_ID) {
117226
+ result->scanned_types.emplace_back(LogicalType::ROW_TYPE);
117227
+ } else {
117228
+ result->scanned_types.push_back(bind_data.all_types[col_idx]);
117229
+ }
117230
+ }
117231
+ }
117193
117232
  return move(result);
117194
117233
  }
117195
117234
 
@@ -117201,6 +117240,10 @@ unique_ptr<LocalTableFunctionState> ArrowTableFunction::ArrowScanInitLocal(Execu
117201
117240
  auto result = make_unique<ArrowScanLocalState>(move(current_chunk));
117202
117241
  result->column_ids = input.column_ids;
117203
117242
  result->filters = input.filters;
117243
+ if (input.CanRemoveFilterColumns()) {
117244
+ auto &asgs = (ArrowScanGlobalState &)*global_state_p;
117245
+ result->all_columns.Initialize(context.client, asgs.scanned_types);
117246
+ }
117204
117247
  if (!ArrowScanParallelStateNext(context.client, input.bind_data, *result, global_state)) {
117205
117248
  return nullptr;
117206
117249
  }
@@ -117223,8 +117266,16 @@ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunction
117223
117266
  }
117224
117267
  int64_t output_size = MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
117225
117268
  data.lines_read += output_size;
117226
- output.SetCardinality(output_size);
117227
- ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size);
117269
+ if (global_state.CanRemoveFilterColumns()) {
117270
+ state.all_columns.Reset();
117271
+ state.all_columns.SetCardinality(output_size);
117272
+ ArrowToDuckDB(state, data.arrow_convert_data, state.all_columns, data.lines_read - output_size);
117273
+ output.ReferenceColumns(state.all_columns, global_state.projection_ids);
117274
+ } else {
117275
+ output.SetCardinality(output_size);
117276
+ ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size);
117277
+ }
117278
+
117228
117279
  output.Verify();
117229
117280
  state.chunk_offset += output.size();
117230
117281
  }
@@ -117239,6 +117290,7 @@ void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) {
117239
117290
  arrow.cardinality = ArrowScanCardinality;
117240
117291
  arrow.projection_pushdown = true;
117241
117292
  arrow.filter_pushdown = true;
117293
+ arrow.filter_prune = true;
117242
117294
  set.AddFunction(arrow);
117243
117295
  }
117244
117296
 
@@ -123436,10 +123488,6 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
123436
123488
 
123437
123489
 
123438
123490
 
123439
-
123440
-
123441
-
123442
-
123443
123491
  namespace duckdb {
123444
123492
 
123445
123493
  //===--------------------------------------------------------------------===//
@@ -123451,6 +123499,8 @@ bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind
123451
123499
  struct TableScanLocalState : public LocalTableFunctionState {
123452
123500
  //! The current position in the scan
123453
123501
  TableScanState scan_state;
123502
+ //! The DataChunk containing all read columns (even filter columns that are immediately removed)
123503
+ DataChunk all_columns;
123454
123504
  };
123455
123505
 
123456
123506
  static storage_t GetStorageIndex(TableCatalogEntry &table, column_t column_id) {
@@ -123472,9 +123522,16 @@ struct TableScanGlobalState : public GlobalTableFunctionState {
123472
123522
  mutex lock;
123473
123523
  idx_t max_threads;
123474
123524
 
123525
+ vector<idx_t> projection_ids;
123526
+ vector<LogicalType> scanned_types;
123527
+
123475
123528
  idx_t MaxThreads() const override {
123476
123529
  return max_threads;
123477
123530
  }
123531
+
123532
+ bool CanRemoveFilterColumns() const {
123533
+ return !projection_ids.empty();
123534
+ }
123478
123535
  };
123479
123536
 
123480
123537
  static unique_ptr<LocalTableFunctionState> TableScanInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
@@ -123488,6 +123545,10 @@ static unique_ptr<LocalTableFunctionState> TableScanInitLocal(ExecutionContext &
123488
123545
  }
123489
123546
  result->scan_state.Initialize(move(column_ids), input.filters);
123490
123547
  TableScanParallelStateNext(context.client, input.bind_data, result.get(), gstate);
123548
+ if (input.CanRemoveFilterColumns()) {
123549
+ auto &tsgs = (TableScanGlobalState &)*gstate;
123550
+ result->all_columns.Initialize(context.client, tsgs.scanned_types);
123551
+ }
123491
123552
  return move(result);
123492
123553
  }
123493
123554
 
@@ -123496,8 +123557,18 @@ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context,
123496
123557
  D_ASSERT(input.bind_data);
123497
123558
  auto &bind_data = (const TableScanBindData &)*input.bind_data;
123498
123559
  auto result = make_unique<TableScanGlobalState>(context, input.bind_data);
123499
-
123500
123560
  bind_data.table->storage->InitializeParallelScan(context, result->state);
123561
+ if (input.CanRemoveFilterColumns()) {
123562
+ result->projection_ids = input.projection_ids;
123563
+ const auto &columns = bind_data.table->columns;
123564
+ for (const auto &col_idx : input.column_ids) {
123565
+ if (col_idx == COLUMN_IDENTIFIER_ROW_ID) {
123566
+ result->scanned_types.emplace_back(LogicalType::ROW_TYPE);
123567
+ } else {
123568
+ result->scanned_types.push_back(columns[col_idx].Type());
123569
+ }
123570
+ }
123571
+ }
123501
123572
  return move(result);
123502
123573
  }
123503
123574
 
@@ -123514,12 +123585,17 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
123514
123585
 
123515
123586
  static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
123516
123587
  auto &bind_data = (TableScanBindData &)*data_p.bind_data;
123588
+ auto &gstate = (TableScanGlobalState &)*data_p.global_state;
123517
123589
  auto &state = (TableScanLocalState &)*data_p.local_state;
123518
123590
  auto &transaction = Transaction::GetTransaction(context);
123519
123591
  do {
123520
123592
  if (bind_data.is_create_index) {
123521
123593
  bind_data.table->storage->CreateIndexScan(
123522
123594
  state.scan_state, output, TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED);
123595
+ } else if (gstate.CanRemoveFilterColumns()) {
123596
+ state.all_columns.Reset();
123597
+ bind_data.table->storage->Scan(transaction, state.all_columns, state.scan_state);
123598
+ output.ReferenceColumns(state.all_columns, gstate.projection_ids);
123523
123599
  } else {
123524
123600
  bind_data.table->storage->Scan(transaction, output, state.scan_state);
123525
123601
  }
@@ -123851,6 +123927,7 @@ TableFunction TableScanFunction::GetFunction() {
123851
123927
  scan_function.get_batch_index = TableScanGetBatchIndex;
123852
123928
  scan_function.projection_pushdown = true;
123853
123929
  scan_function.filter_pushdown = true;
123930
+ scan_function.filter_prune = true;
123854
123931
  scan_function.serialize = TableScanSerialize;
123855
123932
  scan_function.deserialize = TableScanDeserialize;
123856
123933
  return scan_function;
@@ -124067,7 +124144,7 @@ TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_f
124067
124144
  init_local(init_local), function(function), in_out_function(nullptr), statistics(nullptr), dependency(nullptr),
124068
124145
  cardinality(nullptr), pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr),
124069
124146
  get_batch_index(nullptr), serialize(nullptr), deserialize(nullptr), projection_pushdown(false),
124070
- filter_pushdown(false) {
124147
+ filter_pushdown(false), filter_prune(false) {
124071
124148
  }
124072
124149
 
124073
124150
  TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
@@ -124079,7 +124156,8 @@ TableFunction::TableFunction()
124079
124156
  : SimpleNamedParameterFunction("", {}), bind(nullptr), init_global(nullptr), init_local(nullptr), function(nullptr),
124080
124157
  in_out_function(nullptr), statistics(nullptr), dependency(nullptr), cardinality(nullptr),
124081
124158
  pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr), get_batch_index(nullptr),
124082
- serialize(nullptr), deserialize(nullptr), projection_pushdown(false), filter_pushdown(false) {
124159
+ serialize(nullptr), deserialize(nullptr), projection_pushdown(false), filter_pushdown(false),
124160
+ filter_prune(false) {
124083
124161
  }
124084
124162
 
124085
124163
  } // namespace duckdb
@@ -149270,7 +149348,7 @@ private:
149270
149348
 
149271
149349
  private:
149272
149350
  template <class T>
149273
- void ClearUnusedExpressions(vector<T> &list, idx_t table_idx);
149351
+ void ClearUnusedExpressions(vector<T> &list, idx_t table_idx, bool replace = true);
149274
149352
 
149275
149353
  //! Perform a replacement of the ColumnBinding, iterating over all the currently found column references and
149276
149354
  //! replacing the bindings
@@ -150793,7 +150871,7 @@ void RemoveUnusedColumns::ReplaceBinding(ColumnBinding current_binding, ColumnBi
150793
150871
  }
150794
150872
 
150795
150873
  template <class T>
150796
- void RemoveUnusedColumns::ClearUnusedExpressions(vector<T> &list, idx_t table_idx) {
150874
+ void RemoveUnusedColumns::ClearUnusedExpressions(vector<T> &list, idx_t table_idx, bool replace) {
150797
150875
  idx_t offset = 0;
150798
150876
  for (idx_t col_idx = 0; col_idx < list.size(); col_idx++) {
150799
150877
  auto current_binding = ColumnBinding(table_idx, col_idx + offset);
@@ -150803,7 +150881,7 @@ void RemoveUnusedColumns::ClearUnusedExpressions(vector<T> &list, idx_t table_id
150803
150881
  list.erase(list.begin() + col_idx);
150804
150882
  offset++;
150805
150883
  col_idx--;
150806
- } else if (offset > 0) {
150884
+ } else if (offset > 0 && replace) {
150807
150885
  // column is used but the ColumnBinding has changed because of removed columns
150808
150886
  ReplaceBinding(current_binding, ColumnBinding(table_idx, col_idx));
150809
150887
  }
@@ -150976,6 +151054,17 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
150976
151054
  LogicalOperatorVisitor::VisitOperatorExpressions(op);
150977
151055
  if (!everything_referenced) {
150978
151056
  auto &get = (LogicalGet &)op;
151057
+
151058
+ // Create "selection vector" of all column ids
151059
+ vector<idx_t> proj_sel;
151060
+ for (idx_t col_idx = 0; col_idx < get.column_ids.size(); col_idx++) {
151061
+ proj_sel.push_back(col_idx);
151062
+ }
151063
+ // Create a copy that we can use to match ids later
151064
+ auto col_sel = proj_sel;
151065
+ // Clear unused ids, exclude filter columns that are projected out immediately
151066
+ ClearUnusedExpressions(proj_sel, get.table_index, false);
151067
+
150979
151068
  // for every table filter, push a column binding into the column references map to prevent the column from
150980
151069
  // being projected out
150981
151070
  for (auto &filter : get.table_filters.filters) {
@@ -150994,8 +151083,31 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
150994
151083
  column_references.insert(make_pair(filter_binding, vector<BoundColumnRefExpression *>()));
150995
151084
  }
150996
151085
  }
150997
- // table scan: figure out which columns are referenced
150998
- ClearUnusedExpressions(get.column_ids, get.table_index);
151086
+
151087
+ // Clear unused ids, include filter columns that are projected out immediately
151088
+ ClearUnusedExpressions(col_sel, get.table_index);
151089
+
151090
+ // Now set the column ids in the LogicalGet using the "selection vector"
151091
+ vector<column_t> column_ids;
151092
+ column_ids.reserve(col_sel.size());
151093
+ for (auto col_sel_idx : col_sel) {
151094
+ column_ids.push_back(get.column_ids[col_sel_idx]);
151095
+ }
151096
+ get.column_ids = move(column_ids);
151097
+
151098
+ if (get.function.filter_prune) {
151099
+ // Now set the projection cols by matching the "selection vector" that excludes filter columns
151100
+ // with the "selection vector" that includes filter columns
151101
+ idx_t col_idx = 0;
151102
+ for (auto proj_sel_idx : proj_sel) {
151103
+ for (; col_idx < col_sel.size(); col_idx++) {
151104
+ if (proj_sel_idx == col_sel[col_idx]) {
151105
+ get.projection_ids.push_back(col_idx);
151106
+ break;
151107
+ }
151108
+ }
151109
+ }
151110
+ }
150999
151111
 
151000
151112
  if (get.column_ids.empty()) {
151001
151113
  // this generally means we are only interested in whether or not anything exists in the table (e.g.
@@ -190301,7 +190413,6 @@ unique_ptr<LogicalOperator> LogicalFilter::Deserialize(LogicalDeserializationSta
190301
190413
 
190302
190414
 
190303
190415
 
190304
-
190305
190416
  namespace duckdb {
190306
190417
 
190307
190418
  LogicalGet::LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
@@ -190339,8 +190450,14 @@ vector<ColumnBinding> LogicalGet::GetColumnBindings() {
190339
190450
  return {ColumnBinding(table_index, 0)};
190340
190451
  }
190341
190452
  vector<ColumnBinding> result;
190342
- for (idx_t i = 0; i < column_ids.size(); i++) {
190343
- result.emplace_back(table_index, i);
190453
+ if (projection_ids.empty()) {
190454
+ for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) {
190455
+ result.emplace_back(table_index, col_idx);
190456
+ }
190457
+ } else {
190458
+ for (auto proj_id : projection_ids) {
190459
+ result.emplace_back(table_index, proj_id);
190460
+ }
190344
190461
  }
190345
190462
  return result;
190346
190463
  }
@@ -190349,11 +190466,23 @@ void LogicalGet::ResolveTypes() {
190349
190466
  if (column_ids.empty()) {
190350
190467
  column_ids.push_back(COLUMN_IDENTIFIER_ROW_ID);
190351
190468
  }
190352
- for (auto &index : column_ids) {
190353
- if (index == COLUMN_IDENTIFIER_ROW_ID) {
190354
- types.emplace_back(LogicalType::ROW_TYPE);
190355
- } else {
190356
- types.push_back(returned_types[index]);
190469
+
190470
+ if (projection_ids.empty()) {
190471
+ for (auto &index : column_ids) {
190472
+ if (index == COLUMN_IDENTIFIER_ROW_ID) {
190473
+ types.emplace_back(LogicalType::ROW_TYPE);
190474
+ } else {
190475
+ types.push_back(returned_types[index]);
190476
+ }
190477
+ }
190478
+ } else {
190479
+ for (auto &proj_index : projection_ids) {
190480
+ auto &index = column_ids[proj_index];
190481
+ if (index == COLUMN_IDENTIFIER_ROW_ID) {
190482
+ types.emplace_back(LogicalType::ROW_TYPE);
190483
+ } else {
190484
+ types.push_back(returned_types[index]);
190485
+ }
190357
190486
  }
190358
190487
  }
190359
190488
  }
@@ -190373,6 +190502,7 @@ void LogicalGet::Serialize(FieldWriter &writer) const {
190373
190502
  writer.WriteRegularSerializableList(returned_types);
190374
190503
  writer.WriteList<string>(names);
190375
190504
  writer.WriteList<column_t>(column_ids);
190505
+ writer.WriteList<column_t>(projection_ids);
190376
190506
  writer.WriteSerializable(table_filters);
190377
190507
 
190378
190508
  FunctionSerializer::SerializeBase<TableFunction>(writer, function, bind_data.get());
@@ -190395,6 +190525,7 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
190395
190525
  auto returned_types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
190396
190526
  auto returned_names = reader.ReadRequiredList<string>();
190397
190527
  auto column_ids = reader.ReadRequiredList<column_t>();
190528
+ auto projection_ids = reader.ReadRequiredList<column_t>();
190398
190529
  auto table_filters = reader.ReadRequiredSerializable<TableFilterSet>();
190399
190530
 
190400
190531
  unique_ptr<FunctionData> bind_data;
@@ -190439,6 +190570,7 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
190439
190570
 
190440
190571
  auto result = make_unique<LogicalGet>(table_index, function, move(bind_data), returned_types, returned_names);
190441
190572
  result->column_ids = move(column_ids);
190573
+ result->projection_ids = move(projection_ids);
190442
190574
  result->table_filters = move(*table_filters);
190443
190575
  result->parameters = move(parameters);
190444
190576
  result->named_parameters = move(named_parameters);
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "f21e66bf8"
15
- #define DUCKDB_VERSION "v0.5.2-dev494"
14
+ #define DUCKDB_SOURCE_ID "9f84ca408"
15
+ #define DUCKDB_VERSION "v0.5.2-dev512"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -4855,7 +4855,7 @@ public:
4855
4855
  DUCKDB_API void Fuse(DataChunk &other);
4856
4856
 
4857
4857
  //! Makes this DataChunk reference the specified columns in the other DataChunk
4858
- DUCKDB_API void ReferenceColumns(DataChunk &other, vector<column_t> column_ids);
4858
+ DUCKDB_API void ReferenceColumns(DataChunk &other, const vector<column_t> &column_ids);
4859
4859
 
4860
4860
  //! Turn all the vectors from the chunk into flat vectors
4861
4861
  DUCKDB_API void Flatten();
@@ -12615,21 +12615,37 @@ struct TableFunctionBindInput {
12615
12615
 
12616
12616
  struct TableFunctionInitInput {
12617
12617
  TableFunctionInitInput(const FunctionData *bind_data_p, const vector<column_t> &column_ids_p,
12618
- TableFilterSet *filters_p)
12619
- : bind_data(bind_data_p), column_ids(column_ids_p), filters(filters_p) {
12618
+ const vector<idx_t> &projection_ids_p, TableFilterSet *filters_p)
12619
+ : bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p) {
12620
12620
  }
12621
12621
 
12622
12622
  const FunctionData *bind_data;
12623
12623
  const vector<column_t> &column_ids;
12624
+ const vector<idx_t> projection_ids;
12624
12625
  TableFilterSet *filters;
12626
+
12627
+ bool CanRemoveFilterColumns() const {
12628
+ if (projection_ids.empty()) {
12629
+ // Not set, can't remove filter columns
12630
+ return false;
12631
+ } else if (projection_ids.size() == column_ids.size()) {
12632
+ // Filter column is used in remainder of plan, can't remove
12633
+ return false;
12634
+ } else {
12635
+ // Less columns need to be projected out than that we scan
12636
+ return true;
12637
+ }
12638
+ }
12625
12639
  };
12626
12640
 
12627
12641
  struct TableFunctionInput {
12642
+ public:
12628
12643
  TableFunctionInput(const FunctionData *bind_data_p, LocalTableFunctionState *local_state_p,
12629
12644
  GlobalTableFunctionState *global_state_p)
12630
12645
  : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) {
12631
12646
  }
12632
12647
 
12648
+ public:
12633
12649
  const FunctionData *bind_data;
12634
12650
  LocalTableFunctionState *local_state;
12635
12651
  GlobalTableFunctionState *global_state;
@@ -12722,6 +12738,9 @@ public:
12722
12738
  //! Whether or not the table function supports filter pushdown. If not supported a filter will be added
12723
12739
  //! that applies the table filter directly.
12724
12740
  bool filter_pushdown;
12741
+ //! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of
12742
+ //! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all
12743
+ bool filter_prune;
12725
12744
  //! Additional function info, passed to the bind
12726
12745
  shared_ptr<TableFunctionInfo> function_info;
12727
12746
  };
@@ -28963,6 +28982,8 @@ public:
28963
28982
  vector<string> names;
28964
28983
  //! Bound column IDs
28965
28984
  vector<column_t> column_ids;
28985
+ //! Columns that are used outside of the scan
28986
+ vector<idx_t> projection_ids;
28966
28987
  //! Filters pushed down for table scan
28967
28988
  TableFilterSet table_filters;
28968
28989
  //! The set of input parameters for the table function