duckdb 0.5.2-dev494.0 → 0.5.2-dev512.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +174 -42
- package/src/duckdb.hpp +26 -5
- package/src/parquet-amalgamation.cpp +33468 -33442
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -42766,7 +42766,7 @@ void DataChunk::Fuse(DataChunk &other) {
|
|
|
42766
42766
|
other.Destroy();
|
|
42767
42767
|
}
|
|
42768
42768
|
|
|
42769
|
-
void DataChunk::ReferenceColumns(DataChunk &other, vector<column_t> column_ids) {
|
|
42769
|
+
void DataChunk::ReferenceColumns(DataChunk &other, const vector<column_t> &column_ids) {
|
|
42770
42770
|
D_ASSERT(ColumnCount() == column_ids.size());
|
|
42771
42771
|
Reset();
|
|
42772
42772
|
for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) {
|
|
@@ -72428,16 +72428,25 @@ namespace duckdb {
|
|
|
72428
72428
|
//! Represents a scan of a base table
|
|
72429
72429
|
class PhysicalTableScan : public PhysicalOperator {
|
|
72430
72430
|
public:
|
|
72431
|
+
//! Regular Table Scan
|
|
72431
72432
|
PhysicalTableScan(vector<LogicalType> types, TableFunction function, unique_ptr<FunctionData> bind_data,
|
|
72432
72433
|
vector<column_t> column_ids, vector<string> names, unique_ptr<TableFilterSet> table_filters,
|
|
72433
72434
|
idx_t estimated_cardinality);
|
|
72435
|
+
//! Table scan that immediately projects out filter columns that are unused in the remainder of the query plan
|
|
72436
|
+
PhysicalTableScan(vector<LogicalType> types, TableFunction function, unique_ptr<FunctionData> bind_data,
|
|
72437
|
+
vector<LogicalType> returned_types, vector<column_t> column_ids, vector<idx_t> projection_ids,
|
|
72438
|
+
vector<string> names, unique_ptr<TableFilterSet> table_filters, idx_t estimated_cardinality);
|
|
72434
72439
|
|
|
72435
72440
|
//! The table function
|
|
72436
72441
|
TableFunction function;
|
|
72437
72442
|
//! Bind data of the function
|
|
72438
72443
|
unique_ptr<FunctionData> bind_data;
|
|
72439
|
-
//! The
|
|
72444
|
+
//! The types of ALL columns that can be returned by the table function
|
|
72445
|
+
vector<LogicalType> returned_types;
|
|
72446
|
+
//! The column ids used within the table function
|
|
72440
72447
|
vector<column_t> column_ids;
|
|
72448
|
+
//! The projected-out column ids
|
|
72449
|
+
vector<idx_t> projection_ids;
|
|
72441
72450
|
//! The names of the columns
|
|
72442
72451
|
vector<string> names;
|
|
72443
72452
|
//! The table filters
|
|
@@ -74665,11 +74674,7 @@ SinkResultType PhysicalOrder::Sink(ExecutionContext &context, GlobalSinkState &g
|
|
|
74665
74674
|
lstate.key_executor.Execute(input, keys);
|
|
74666
74675
|
|
|
74667
74676
|
auto &payload = lstate.payload;
|
|
74668
|
-
payload.
|
|
74669
|
-
for (idx_t col_idx = 0; col_idx < projections.size(); col_idx++) {
|
|
74670
|
-
payload.data[col_idx].Reference(input.data[projections[col_idx]]);
|
|
74671
|
-
}
|
|
74672
|
-
payload.SetCardinality(input.size());
|
|
74677
|
+
payload.ReferenceColumns(input, projections);
|
|
74673
74678
|
|
|
74674
74679
|
// Sink the data into the local sort state
|
|
74675
74680
|
keys.Verify();
|
|
@@ -78642,7 +78647,7 @@ unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(Execution
|
|
|
78642
78647
|
auto &gstate = (TableInOutGlobalState &)*op_state;
|
|
78643
78648
|
auto result = make_unique<TableInOutLocalState>();
|
|
78644
78649
|
if (function.init_local) {
|
|
78645
|
-
TableFunctionInitInput input(bind_data.get(), column_ids, nullptr);
|
|
78650
|
+
TableFunctionInitInput input(bind_data.get(), column_ids, vector<idx_t>(), nullptr);
|
|
78646
78651
|
result->local_state = function.init_local(context, input, gstate.global_state.get());
|
|
78647
78652
|
}
|
|
78648
78653
|
return move(result);
|
|
@@ -78651,7 +78656,7 @@ unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(Execution
|
|
|
78651
78656
|
unique_ptr<GlobalOperatorState> PhysicalTableInOutFunction::GetGlobalOperatorState(ClientContext &context) const {
|
|
78652
78657
|
auto result = make_unique<TableInOutGlobalState>();
|
|
78653
78658
|
if (function.init_global) {
|
|
78654
|
-
TableFunctionInitInput input(bind_data.get(), column_ids, nullptr);
|
|
78659
|
+
TableFunctionInitInput input(bind_data.get(), column_ids, vector<idx_t>(), nullptr);
|
|
78655
78660
|
result->global_state = function.init_global(context, input);
|
|
78656
78661
|
}
|
|
78657
78662
|
return move(result);
|
|
@@ -79299,11 +79304,22 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
|
|
|
79299
79304
|
table_filters(move(table_filters_p)) {
|
|
79300
79305
|
}
|
|
79301
79306
|
|
|
79307
|
+
PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction function_p,
|
|
79308
|
+
unique_ptr<FunctionData> bind_data_p, vector<LogicalType> returned_types_p,
|
|
79309
|
+
vector<column_t> column_ids_p, vector<idx_t> projection_ids_p,
|
|
79310
|
+
vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
|
|
79311
|
+
idx_t estimated_cardinality)
|
|
79312
|
+
: PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, move(types), estimated_cardinality),
|
|
79313
|
+
function(move(function_p)), bind_data(move(bind_data_p)), returned_types(move(returned_types_p)),
|
|
79314
|
+
column_ids(move(column_ids_p)), projection_ids(move(projection_ids_p)), names(move(names_p)),
|
|
79315
|
+
table_filters(move(table_filters_p)) {
|
|
79316
|
+
}
|
|
79317
|
+
|
|
79302
79318
|
class TableScanGlobalSourceState : public GlobalSourceState {
|
|
79303
79319
|
public:
|
|
79304
79320
|
TableScanGlobalSourceState(ClientContext &context, const PhysicalTableScan &op) {
|
|
79305
79321
|
if (op.function.init_global) {
|
|
79306
|
-
TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.table_filters.get());
|
|
79322
|
+
TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.projection_ids, op.table_filters.get());
|
|
79307
79323
|
global_state = op.function.init_global(context, input);
|
|
79308
79324
|
if (global_state) {
|
|
79309
79325
|
max_threads = global_state->MaxThreads();
|
|
@@ -79326,7 +79342,7 @@ public:
|
|
|
79326
79342
|
TableScanLocalSourceState(ExecutionContext &context, TableScanGlobalSourceState &gstate,
|
|
79327
79343
|
const PhysicalTableScan &op) {
|
|
79328
79344
|
if (op.function.init_local) {
|
|
79329
|
-
TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.table_filters.get());
|
|
79345
|
+
TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.projection_ids, op.table_filters.get());
|
|
79330
79346
|
local_state = op.function.init_local(context, input, gstate.global_state.get());
|
|
79331
79347
|
}
|
|
79332
79348
|
}
|
|
@@ -79383,12 +79399,13 @@ string PhysicalTableScan::ParamsToString() const {
|
|
|
79383
79399
|
result += "\n[INFOSEPARATOR]\n";
|
|
79384
79400
|
}
|
|
79385
79401
|
if (function.projection_pushdown) {
|
|
79386
|
-
for (idx_t i = 0; i <
|
|
79387
|
-
|
|
79402
|
+
for (idx_t i = 0; i < projection_ids.size(); i++) {
|
|
79403
|
+
const auto &column_id = column_ids[projection_ids[i]];
|
|
79404
|
+
if (column_id < names.size()) {
|
|
79388
79405
|
if (i > 0) {
|
|
79389
79406
|
result += "\n";
|
|
79390
79407
|
}
|
|
79391
|
-
result += names[
|
|
79408
|
+
result += names[column_id];
|
|
79392
79409
|
}
|
|
79393
79410
|
}
|
|
79394
79411
|
}
|
|
@@ -83671,7 +83688,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalFilter &op
|
|
|
83671
83688
|
|
|
83672
83689
|
|
|
83673
83690
|
|
|
83674
|
-
|
|
83675
83691
|
namespace duckdb {
|
|
83676
83692
|
|
|
83677
83693
|
unique_ptr<TableFilterSet> CreateTableFilterSet(TableFilterSet &table_filters, vector<column_t> &column_ids) {
|
|
@@ -83715,8 +83731,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
|
|
|
83715
83731
|
// create the table scan node
|
|
83716
83732
|
if (!op.function.projection_pushdown) {
|
|
83717
83733
|
// function does not support projection pushdown
|
|
83718
|
-
auto node = make_unique<PhysicalTableScan>(op.returned_types, op.function, move(op.bind_data),
|
|
83719
|
-
op.
|
|
83734
|
+
auto node = make_unique<PhysicalTableScan>(op.returned_types, op.function, move(op.bind_data),
|
|
83735
|
+
op.returned_types, op.column_ids, vector<column_t>(), op.names,
|
|
83736
|
+
move(table_filters), op.estimated_cardinality);
|
|
83720
83737
|
// first check if an additional projection is necessary
|
|
83721
83738
|
if (op.column_ids.size() == op.returned_types.size()) {
|
|
83722
83739
|
bool projection_necessary = false;
|
|
@@ -83751,8 +83768,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
|
|
|
83751
83768
|
projection->children.push_back(move(node));
|
|
83752
83769
|
return move(projection);
|
|
83753
83770
|
} else {
|
|
83754
|
-
return make_unique<PhysicalTableScan>(op.types, op.function, move(op.bind_data), op.
|
|
83755
|
-
move(table_filters),
|
|
83771
|
+
return make_unique<PhysicalTableScan>(op.types, op.function, move(op.bind_data), op.returned_types,
|
|
83772
|
+
op.column_ids, op.projection_ids, op.names, move(table_filters),
|
|
83773
|
+
op.estimated_cardinality);
|
|
83756
83774
|
}
|
|
83757
83775
|
}
|
|
83758
83776
|
|
|
@@ -116769,6 +116787,7 @@ string ScalarMacroFunction::ToSQL(const string &schema, const string &name) {
|
|
|
116769
116787
|
|
|
116770
116788
|
|
|
116771
116789
|
|
|
116790
|
+
|
|
116772
116791
|
//===----------------------------------------------------------------------===//
|
|
116773
116792
|
// DuckDB
|
|
116774
116793
|
//
|
|
@@ -116852,6 +116871,7 @@ struct ArrowScanFunctionData : public PyTableFunctionData {
|
|
|
116852
116871
|
}
|
|
116853
116872
|
//! This holds the original list type (col_idx, [ArrowListType,size])
|
|
116854
116873
|
unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
|
|
116874
|
+
vector<LogicalType> all_types;
|
|
116855
116875
|
atomic<idx_t> lines_read;
|
|
116856
116876
|
ArrowSchemaWrapper schema_root;
|
|
116857
116877
|
idx_t rows_per_thread;
|
|
@@ -116872,6 +116892,8 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
|
|
|
116872
116892
|
//! Store child vectors for Arrow Dictionary Vectors (col-idx,vector)
|
|
116873
116893
|
unordered_map<idx_t, unique_ptr<Vector>> arrow_dictionary_vectors;
|
|
116874
116894
|
TableFilterSet *filters = nullptr;
|
|
116895
|
+
//! The DataChunk containing all read columns (even filter columns that are immediately removed)
|
|
116896
|
+
DataChunk all_columns;
|
|
116875
116897
|
};
|
|
116876
116898
|
|
|
116877
116899
|
struct ArrowScanGlobalState : public GlobalTableFunctionState {
|
|
@@ -116880,9 +116902,16 @@ struct ArrowScanGlobalState : public GlobalTableFunctionState {
|
|
|
116880
116902
|
idx_t max_threads = 1;
|
|
116881
116903
|
bool done = false;
|
|
116882
116904
|
|
|
116905
|
+
vector<idx_t> projection_ids;
|
|
116906
|
+
vector<LogicalType> scanned_types;
|
|
116907
|
+
|
|
116883
116908
|
idx_t MaxThreads() const override {
|
|
116884
116909
|
return max_threads;
|
|
116885
116910
|
}
|
|
116911
|
+
|
|
116912
|
+
bool CanRemoveFilterColumns() const {
|
|
116913
|
+
return !projection_ids.empty();
|
|
116914
|
+
}
|
|
116886
116915
|
};
|
|
116887
116916
|
|
|
116888
116917
|
struct ArrowTableFunction {
|
|
@@ -116927,7 +116956,6 @@ private:
|
|
|
116927
116956
|
|
|
116928
116957
|
|
|
116929
116958
|
|
|
116930
|
-
|
|
116931
116959
|
namespace duckdb {
|
|
116932
116960
|
|
|
116933
116961
|
LogicalType GetArrowLogicalType(ArrowSchema &schema,
|
|
@@ -117139,6 +117167,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
|
|
|
117139
117167
|
names.push_back(name);
|
|
117140
117168
|
}
|
|
117141
117169
|
RenameArrowColumns(names);
|
|
117170
|
+
res->all_types = return_types;
|
|
117142
117171
|
return move(res);
|
|
117143
117172
|
}
|
|
117144
117173
|
|
|
@@ -117190,6 +117219,16 @@ unique_ptr<GlobalTableFunctionState> ArrowTableFunction::ArrowScanInitGlobal(Cli
|
|
|
117190
117219
|
auto result = make_unique<ArrowScanGlobalState>();
|
|
117191
117220
|
result->stream = ProduceArrowScan(bind_data, input.column_ids, input.filters);
|
|
117192
117221
|
result->max_threads = ArrowScanMaxThreads(context, input.bind_data);
|
|
117222
|
+
if (input.CanRemoveFilterColumns()) {
|
|
117223
|
+
result->projection_ids = input.projection_ids;
|
|
117224
|
+
for (const auto &col_idx : input.column_ids) {
|
|
117225
|
+
if (col_idx == COLUMN_IDENTIFIER_ROW_ID) {
|
|
117226
|
+
result->scanned_types.emplace_back(LogicalType::ROW_TYPE);
|
|
117227
|
+
} else {
|
|
117228
|
+
result->scanned_types.push_back(bind_data.all_types[col_idx]);
|
|
117229
|
+
}
|
|
117230
|
+
}
|
|
117231
|
+
}
|
|
117193
117232
|
return move(result);
|
|
117194
117233
|
}
|
|
117195
117234
|
|
|
@@ -117201,6 +117240,10 @@ unique_ptr<LocalTableFunctionState> ArrowTableFunction::ArrowScanInitLocal(Execu
|
|
|
117201
117240
|
auto result = make_unique<ArrowScanLocalState>(move(current_chunk));
|
|
117202
117241
|
result->column_ids = input.column_ids;
|
|
117203
117242
|
result->filters = input.filters;
|
|
117243
|
+
if (input.CanRemoveFilterColumns()) {
|
|
117244
|
+
auto &asgs = (ArrowScanGlobalState &)*global_state_p;
|
|
117245
|
+
result->all_columns.Initialize(context.client, asgs.scanned_types);
|
|
117246
|
+
}
|
|
117204
117247
|
if (!ArrowScanParallelStateNext(context.client, input.bind_data, *result, global_state)) {
|
|
117205
117248
|
return nullptr;
|
|
117206
117249
|
}
|
|
@@ -117223,8 +117266,16 @@ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunction
|
|
|
117223
117266
|
}
|
|
117224
117267
|
int64_t output_size = MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
|
|
117225
117268
|
data.lines_read += output_size;
|
|
117226
|
-
|
|
117227
|
-
|
|
117269
|
+
if (global_state.CanRemoveFilterColumns()) {
|
|
117270
|
+
state.all_columns.Reset();
|
|
117271
|
+
state.all_columns.SetCardinality(output_size);
|
|
117272
|
+
ArrowToDuckDB(state, data.arrow_convert_data, state.all_columns, data.lines_read - output_size);
|
|
117273
|
+
output.ReferenceColumns(state.all_columns, global_state.projection_ids);
|
|
117274
|
+
} else {
|
|
117275
|
+
output.SetCardinality(output_size);
|
|
117276
|
+
ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size);
|
|
117277
|
+
}
|
|
117278
|
+
|
|
117228
117279
|
output.Verify();
|
|
117229
117280
|
state.chunk_offset += output.size();
|
|
117230
117281
|
}
|
|
@@ -117239,6 +117290,7 @@ void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) {
|
|
|
117239
117290
|
arrow.cardinality = ArrowScanCardinality;
|
|
117240
117291
|
arrow.projection_pushdown = true;
|
|
117241
117292
|
arrow.filter_pushdown = true;
|
|
117293
|
+
arrow.filter_prune = true;
|
|
117242
117294
|
set.AddFunction(arrow);
|
|
117243
117295
|
}
|
|
117244
117296
|
|
|
@@ -123436,10 +123488,6 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
|
|
|
123436
123488
|
|
|
123437
123489
|
|
|
123438
123490
|
|
|
123439
|
-
|
|
123440
|
-
|
|
123441
|
-
|
|
123442
|
-
|
|
123443
123491
|
namespace duckdb {
|
|
123444
123492
|
|
|
123445
123493
|
//===--------------------------------------------------------------------===//
|
|
@@ -123451,6 +123499,8 @@ bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind
|
|
|
123451
123499
|
struct TableScanLocalState : public LocalTableFunctionState {
|
|
123452
123500
|
//! The current position in the scan
|
|
123453
123501
|
TableScanState scan_state;
|
|
123502
|
+
//! The DataChunk containing all read columns (even filter columns that are immediately removed)
|
|
123503
|
+
DataChunk all_columns;
|
|
123454
123504
|
};
|
|
123455
123505
|
|
|
123456
123506
|
static storage_t GetStorageIndex(TableCatalogEntry &table, column_t column_id) {
|
|
@@ -123472,9 +123522,16 @@ struct TableScanGlobalState : public GlobalTableFunctionState {
|
|
|
123472
123522
|
mutex lock;
|
|
123473
123523
|
idx_t max_threads;
|
|
123474
123524
|
|
|
123525
|
+
vector<idx_t> projection_ids;
|
|
123526
|
+
vector<LogicalType> scanned_types;
|
|
123527
|
+
|
|
123475
123528
|
idx_t MaxThreads() const override {
|
|
123476
123529
|
return max_threads;
|
|
123477
123530
|
}
|
|
123531
|
+
|
|
123532
|
+
bool CanRemoveFilterColumns() const {
|
|
123533
|
+
return !projection_ids.empty();
|
|
123534
|
+
}
|
|
123478
123535
|
};
|
|
123479
123536
|
|
|
123480
123537
|
static unique_ptr<LocalTableFunctionState> TableScanInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
|
|
@@ -123488,6 +123545,10 @@ static unique_ptr<LocalTableFunctionState> TableScanInitLocal(ExecutionContext &
|
|
|
123488
123545
|
}
|
|
123489
123546
|
result->scan_state.Initialize(move(column_ids), input.filters);
|
|
123490
123547
|
TableScanParallelStateNext(context.client, input.bind_data, result.get(), gstate);
|
|
123548
|
+
if (input.CanRemoveFilterColumns()) {
|
|
123549
|
+
auto &tsgs = (TableScanGlobalState &)*gstate;
|
|
123550
|
+
result->all_columns.Initialize(context.client, tsgs.scanned_types);
|
|
123551
|
+
}
|
|
123491
123552
|
return move(result);
|
|
123492
123553
|
}
|
|
123493
123554
|
|
|
@@ -123496,8 +123557,18 @@ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context,
|
|
|
123496
123557
|
D_ASSERT(input.bind_data);
|
|
123497
123558
|
auto &bind_data = (const TableScanBindData &)*input.bind_data;
|
|
123498
123559
|
auto result = make_unique<TableScanGlobalState>(context, input.bind_data);
|
|
123499
|
-
|
|
123500
123560
|
bind_data.table->storage->InitializeParallelScan(context, result->state);
|
|
123561
|
+
if (input.CanRemoveFilterColumns()) {
|
|
123562
|
+
result->projection_ids = input.projection_ids;
|
|
123563
|
+
const auto &columns = bind_data.table->columns;
|
|
123564
|
+
for (const auto &col_idx : input.column_ids) {
|
|
123565
|
+
if (col_idx == COLUMN_IDENTIFIER_ROW_ID) {
|
|
123566
|
+
result->scanned_types.emplace_back(LogicalType::ROW_TYPE);
|
|
123567
|
+
} else {
|
|
123568
|
+
result->scanned_types.push_back(columns[col_idx].Type());
|
|
123569
|
+
}
|
|
123570
|
+
}
|
|
123571
|
+
}
|
|
123501
123572
|
return move(result);
|
|
123502
123573
|
}
|
|
123503
123574
|
|
|
@@ -123514,12 +123585,17 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
|
|
|
123514
123585
|
|
|
123515
123586
|
static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
|
123516
123587
|
auto &bind_data = (TableScanBindData &)*data_p.bind_data;
|
|
123588
|
+
auto &gstate = (TableScanGlobalState &)*data_p.global_state;
|
|
123517
123589
|
auto &state = (TableScanLocalState &)*data_p.local_state;
|
|
123518
123590
|
auto &transaction = Transaction::GetTransaction(context);
|
|
123519
123591
|
do {
|
|
123520
123592
|
if (bind_data.is_create_index) {
|
|
123521
123593
|
bind_data.table->storage->CreateIndexScan(
|
|
123522
123594
|
state.scan_state, output, TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED);
|
|
123595
|
+
} else if (gstate.CanRemoveFilterColumns()) {
|
|
123596
|
+
state.all_columns.Reset();
|
|
123597
|
+
bind_data.table->storage->Scan(transaction, state.all_columns, state.scan_state);
|
|
123598
|
+
output.ReferenceColumns(state.all_columns, gstate.projection_ids);
|
|
123523
123599
|
} else {
|
|
123524
123600
|
bind_data.table->storage->Scan(transaction, output, state.scan_state);
|
|
123525
123601
|
}
|
|
@@ -123851,6 +123927,7 @@ TableFunction TableScanFunction::GetFunction() {
|
|
|
123851
123927
|
scan_function.get_batch_index = TableScanGetBatchIndex;
|
|
123852
123928
|
scan_function.projection_pushdown = true;
|
|
123853
123929
|
scan_function.filter_pushdown = true;
|
|
123930
|
+
scan_function.filter_prune = true;
|
|
123854
123931
|
scan_function.serialize = TableScanSerialize;
|
|
123855
123932
|
scan_function.deserialize = TableScanDeserialize;
|
|
123856
123933
|
return scan_function;
|
|
@@ -124067,7 +124144,7 @@ TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_f
|
|
|
124067
124144
|
init_local(init_local), function(function), in_out_function(nullptr), statistics(nullptr), dependency(nullptr),
|
|
124068
124145
|
cardinality(nullptr), pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr),
|
|
124069
124146
|
get_batch_index(nullptr), serialize(nullptr), deserialize(nullptr), projection_pushdown(false),
|
|
124070
|
-
filter_pushdown(false) {
|
|
124147
|
+
filter_pushdown(false), filter_prune(false) {
|
|
124071
124148
|
}
|
|
124072
124149
|
|
|
124073
124150
|
TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
|
|
@@ -124079,7 +124156,8 @@ TableFunction::TableFunction()
|
|
|
124079
124156
|
: SimpleNamedParameterFunction("", {}), bind(nullptr), init_global(nullptr), init_local(nullptr), function(nullptr),
|
|
124080
124157
|
in_out_function(nullptr), statistics(nullptr), dependency(nullptr), cardinality(nullptr),
|
|
124081
124158
|
pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr), get_batch_index(nullptr),
|
|
124082
|
-
serialize(nullptr), deserialize(nullptr), projection_pushdown(false), filter_pushdown(false)
|
|
124159
|
+
serialize(nullptr), deserialize(nullptr), projection_pushdown(false), filter_pushdown(false),
|
|
124160
|
+
filter_prune(false) {
|
|
124083
124161
|
}
|
|
124084
124162
|
|
|
124085
124163
|
} // namespace duckdb
|
|
@@ -149270,7 +149348,7 @@ private:
|
|
|
149270
149348
|
|
|
149271
149349
|
private:
|
|
149272
149350
|
template <class T>
|
|
149273
|
-
void ClearUnusedExpressions(vector<T> &list, idx_t table_idx);
|
|
149351
|
+
void ClearUnusedExpressions(vector<T> &list, idx_t table_idx, bool replace = true);
|
|
149274
149352
|
|
|
149275
149353
|
//! Perform a replacement of the ColumnBinding, iterating over all the currently found column references and
|
|
149276
149354
|
//! replacing the bindings
|
|
@@ -150793,7 +150871,7 @@ void RemoveUnusedColumns::ReplaceBinding(ColumnBinding current_binding, ColumnBi
|
|
|
150793
150871
|
}
|
|
150794
150872
|
|
|
150795
150873
|
template <class T>
|
|
150796
|
-
void RemoveUnusedColumns::ClearUnusedExpressions(vector<T> &list, idx_t table_idx) {
|
|
150874
|
+
void RemoveUnusedColumns::ClearUnusedExpressions(vector<T> &list, idx_t table_idx, bool replace) {
|
|
150797
150875
|
idx_t offset = 0;
|
|
150798
150876
|
for (idx_t col_idx = 0; col_idx < list.size(); col_idx++) {
|
|
150799
150877
|
auto current_binding = ColumnBinding(table_idx, col_idx + offset);
|
|
@@ -150803,7 +150881,7 @@ void RemoveUnusedColumns::ClearUnusedExpressions(vector<T> &list, idx_t table_id
|
|
|
150803
150881
|
list.erase(list.begin() + col_idx);
|
|
150804
150882
|
offset++;
|
|
150805
150883
|
col_idx--;
|
|
150806
|
-
} else if (offset > 0) {
|
|
150884
|
+
} else if (offset > 0 && replace) {
|
|
150807
150885
|
// column is used but the ColumnBinding has changed because of removed columns
|
|
150808
150886
|
ReplaceBinding(current_binding, ColumnBinding(table_idx, col_idx));
|
|
150809
150887
|
}
|
|
@@ -150976,6 +151054,17 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
|
|
|
150976
151054
|
LogicalOperatorVisitor::VisitOperatorExpressions(op);
|
|
150977
151055
|
if (!everything_referenced) {
|
|
150978
151056
|
auto &get = (LogicalGet &)op;
|
|
151057
|
+
|
|
151058
|
+
// Create "selection vector" of all column ids
|
|
151059
|
+
vector<idx_t> proj_sel;
|
|
151060
|
+
for (idx_t col_idx = 0; col_idx < get.column_ids.size(); col_idx++) {
|
|
151061
|
+
proj_sel.push_back(col_idx);
|
|
151062
|
+
}
|
|
151063
|
+
// Create a copy that we can use to match ids later
|
|
151064
|
+
auto col_sel = proj_sel;
|
|
151065
|
+
// Clear unused ids, exclude filter columns that are projected out immediately
|
|
151066
|
+
ClearUnusedExpressions(proj_sel, get.table_index, false);
|
|
151067
|
+
|
|
150979
151068
|
// for every table filter, push a column binding into the column references map to prevent the column from
|
|
150980
151069
|
// being projected out
|
|
150981
151070
|
for (auto &filter : get.table_filters.filters) {
|
|
@@ -150994,8 +151083,31 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
|
|
|
150994
151083
|
column_references.insert(make_pair(filter_binding, vector<BoundColumnRefExpression *>()));
|
|
150995
151084
|
}
|
|
150996
151085
|
}
|
|
150997
|
-
|
|
150998
|
-
|
|
151086
|
+
|
|
151087
|
+
// Clear unused ids, include filter columns that are projected out immediately
|
|
151088
|
+
ClearUnusedExpressions(col_sel, get.table_index);
|
|
151089
|
+
|
|
151090
|
+
// Now set the column ids in the LogicalGet using the "selection vector"
|
|
151091
|
+
vector<column_t> column_ids;
|
|
151092
|
+
column_ids.reserve(col_sel.size());
|
|
151093
|
+
for (auto col_sel_idx : col_sel) {
|
|
151094
|
+
column_ids.push_back(get.column_ids[col_sel_idx]);
|
|
151095
|
+
}
|
|
151096
|
+
get.column_ids = move(column_ids);
|
|
151097
|
+
|
|
151098
|
+
if (get.function.filter_prune) {
|
|
151099
|
+
// Now set the projection cols by matching the "selection vector" that excludes filter columns
|
|
151100
|
+
// with the "selection vector" that includes filter columns
|
|
151101
|
+
idx_t col_idx = 0;
|
|
151102
|
+
for (auto proj_sel_idx : proj_sel) {
|
|
151103
|
+
for (; col_idx < col_sel.size(); col_idx++) {
|
|
151104
|
+
if (proj_sel_idx == col_sel[col_idx]) {
|
|
151105
|
+
get.projection_ids.push_back(col_idx);
|
|
151106
|
+
break;
|
|
151107
|
+
}
|
|
151108
|
+
}
|
|
151109
|
+
}
|
|
151110
|
+
}
|
|
150999
151111
|
|
|
151000
151112
|
if (get.column_ids.empty()) {
|
|
151001
151113
|
// this generally means we are only interested in whether or not anything exists in the table (e.g.
|
|
@@ -190301,7 +190413,6 @@ unique_ptr<LogicalOperator> LogicalFilter::Deserialize(LogicalDeserializationSta
|
|
|
190301
190413
|
|
|
190302
190414
|
|
|
190303
190415
|
|
|
190304
|
-
|
|
190305
190416
|
namespace duckdb {
|
|
190306
190417
|
|
|
190307
190418
|
LogicalGet::LogicalGet(idx_t table_index, TableFunction function, unique_ptr<FunctionData> bind_data,
|
|
@@ -190339,8 +190450,14 @@ vector<ColumnBinding> LogicalGet::GetColumnBindings() {
|
|
|
190339
190450
|
return {ColumnBinding(table_index, 0)};
|
|
190340
190451
|
}
|
|
190341
190452
|
vector<ColumnBinding> result;
|
|
190342
|
-
|
|
190343
|
-
|
|
190453
|
+
if (projection_ids.empty()) {
|
|
190454
|
+
for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) {
|
|
190455
|
+
result.emplace_back(table_index, col_idx);
|
|
190456
|
+
}
|
|
190457
|
+
} else {
|
|
190458
|
+
for (auto proj_id : projection_ids) {
|
|
190459
|
+
result.emplace_back(table_index, proj_id);
|
|
190460
|
+
}
|
|
190344
190461
|
}
|
|
190345
190462
|
return result;
|
|
190346
190463
|
}
|
|
@@ -190349,11 +190466,23 @@ void LogicalGet::ResolveTypes() {
|
|
|
190349
190466
|
if (column_ids.empty()) {
|
|
190350
190467
|
column_ids.push_back(COLUMN_IDENTIFIER_ROW_ID);
|
|
190351
190468
|
}
|
|
190352
|
-
|
|
190353
|
-
|
|
190354
|
-
|
|
190355
|
-
|
|
190356
|
-
|
|
190469
|
+
|
|
190470
|
+
if (projection_ids.empty()) {
|
|
190471
|
+
for (auto &index : column_ids) {
|
|
190472
|
+
if (index == COLUMN_IDENTIFIER_ROW_ID) {
|
|
190473
|
+
types.emplace_back(LogicalType::ROW_TYPE);
|
|
190474
|
+
} else {
|
|
190475
|
+
types.push_back(returned_types[index]);
|
|
190476
|
+
}
|
|
190477
|
+
}
|
|
190478
|
+
} else {
|
|
190479
|
+
for (auto &proj_index : projection_ids) {
|
|
190480
|
+
auto &index = column_ids[proj_index];
|
|
190481
|
+
if (index == COLUMN_IDENTIFIER_ROW_ID) {
|
|
190482
|
+
types.emplace_back(LogicalType::ROW_TYPE);
|
|
190483
|
+
} else {
|
|
190484
|
+
types.push_back(returned_types[index]);
|
|
190485
|
+
}
|
|
190357
190486
|
}
|
|
190358
190487
|
}
|
|
190359
190488
|
}
|
|
@@ -190373,6 +190502,7 @@ void LogicalGet::Serialize(FieldWriter &writer) const {
|
|
|
190373
190502
|
writer.WriteRegularSerializableList(returned_types);
|
|
190374
190503
|
writer.WriteList<string>(names);
|
|
190375
190504
|
writer.WriteList<column_t>(column_ids);
|
|
190505
|
+
writer.WriteList<column_t>(projection_ids);
|
|
190376
190506
|
writer.WriteSerializable(table_filters);
|
|
190377
190507
|
|
|
190378
190508
|
FunctionSerializer::SerializeBase<TableFunction>(writer, function, bind_data.get());
|
|
@@ -190395,6 +190525,7 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
|
|
|
190395
190525
|
auto returned_types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
|
|
190396
190526
|
auto returned_names = reader.ReadRequiredList<string>();
|
|
190397
190527
|
auto column_ids = reader.ReadRequiredList<column_t>();
|
|
190528
|
+
auto projection_ids = reader.ReadRequiredList<column_t>();
|
|
190398
190529
|
auto table_filters = reader.ReadRequiredSerializable<TableFilterSet>();
|
|
190399
190530
|
|
|
190400
190531
|
unique_ptr<FunctionData> bind_data;
|
|
@@ -190439,6 +190570,7 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
|
|
|
190439
190570
|
|
|
190440
190571
|
auto result = make_unique<LogicalGet>(table_index, function, move(bind_data), returned_types, returned_names);
|
|
190441
190572
|
result->column_ids = move(column_ids);
|
|
190573
|
+
result->projection_ids = move(projection_ids);
|
|
190442
190574
|
result->table_filters = move(*table_filters);
|
|
190443
190575
|
result->parameters = move(parameters);
|
|
190444
190576
|
result->named_parameters = move(named_parameters);
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "9f84ca408"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev512"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -4855,7 +4855,7 @@ public:
|
|
|
4855
4855
|
DUCKDB_API void Fuse(DataChunk &other);
|
|
4856
4856
|
|
|
4857
4857
|
//! Makes this DataChunk reference the specified columns in the other DataChunk
|
|
4858
|
-
DUCKDB_API void ReferenceColumns(DataChunk &other, vector<column_t> column_ids);
|
|
4858
|
+
DUCKDB_API void ReferenceColumns(DataChunk &other, const vector<column_t> &column_ids);
|
|
4859
4859
|
|
|
4860
4860
|
//! Turn all the vectors from the chunk into flat vectors
|
|
4861
4861
|
DUCKDB_API void Flatten();
|
|
@@ -12615,21 +12615,37 @@ struct TableFunctionBindInput {
|
|
|
12615
12615
|
|
|
12616
12616
|
struct TableFunctionInitInput {
|
|
12617
12617
|
TableFunctionInitInput(const FunctionData *bind_data_p, const vector<column_t> &column_ids_p,
|
|
12618
|
-
TableFilterSet *filters_p)
|
|
12619
|
-
: bind_data(bind_data_p), column_ids(column_ids_p), filters(filters_p) {
|
|
12618
|
+
const vector<idx_t> &projection_ids_p, TableFilterSet *filters_p)
|
|
12619
|
+
: bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p) {
|
|
12620
12620
|
}
|
|
12621
12621
|
|
|
12622
12622
|
const FunctionData *bind_data;
|
|
12623
12623
|
const vector<column_t> &column_ids;
|
|
12624
|
+
const vector<idx_t> projection_ids;
|
|
12624
12625
|
TableFilterSet *filters;
|
|
12626
|
+
|
|
12627
|
+
bool CanRemoveFilterColumns() const {
|
|
12628
|
+
if (projection_ids.empty()) {
|
|
12629
|
+
// Not set, can't remove filter columns
|
|
12630
|
+
return false;
|
|
12631
|
+
} else if (projection_ids.size() == column_ids.size()) {
|
|
12632
|
+
// Filter column is used in remainder of plan, can't remove
|
|
12633
|
+
return false;
|
|
12634
|
+
} else {
|
|
12635
|
+
// Less columns need to be projected out than that we scan
|
|
12636
|
+
return true;
|
|
12637
|
+
}
|
|
12638
|
+
}
|
|
12625
12639
|
};
|
|
12626
12640
|
|
|
12627
12641
|
struct TableFunctionInput {
|
|
12642
|
+
public:
|
|
12628
12643
|
TableFunctionInput(const FunctionData *bind_data_p, LocalTableFunctionState *local_state_p,
|
|
12629
12644
|
GlobalTableFunctionState *global_state_p)
|
|
12630
12645
|
: bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) {
|
|
12631
12646
|
}
|
|
12632
12647
|
|
|
12648
|
+
public:
|
|
12633
12649
|
const FunctionData *bind_data;
|
|
12634
12650
|
LocalTableFunctionState *local_state;
|
|
12635
12651
|
GlobalTableFunctionState *global_state;
|
|
@@ -12722,6 +12738,9 @@ public:
|
|
|
12722
12738
|
//! Whether or not the table function supports filter pushdown. If not supported a filter will be added
|
|
12723
12739
|
//! that applies the table filter directly.
|
|
12724
12740
|
bool filter_pushdown;
|
|
12741
|
+
//! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of
|
|
12742
|
+
//! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all
|
|
12743
|
+
bool filter_prune;
|
|
12725
12744
|
//! Additional function info, passed to the bind
|
|
12726
12745
|
shared_ptr<TableFunctionInfo> function_info;
|
|
12727
12746
|
};
|
|
@@ -28963,6 +28982,8 @@ public:
|
|
|
28963
28982
|
vector<string> names;
|
|
28964
28983
|
//! Bound column IDs
|
|
28965
28984
|
vector<column_t> column_ids;
|
|
28985
|
+
//! Columns that are used outside of the scan
|
|
28986
|
+
vector<idx_t> projection_ids;
|
|
28966
28987
|
//! Filters pushed down for table scan
|
|
28967
28988
|
TableFilterSet table_filters;
|
|
28968
28989
|
//! The set of input parameters for the table function
|