duckdb 0.3.5-dev617.0 → 0.3.5-dev651.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -7044,6 +7044,8 @@ string PhysicalOperatorToString(PhysicalOperatorType type) {
7044
7044
  return "LIMIT";
7045
7045
  case PhysicalOperatorType::LIMIT_PERCENT:
7046
7046
  return "LIMIT_PERCENT";
7047
+ case PhysicalOperatorType::STREAMING_LIMIT:
7048
+ return "STREAMING_LIMIT";
7047
7049
  case PhysicalOperatorType::RESERVOIR_SAMPLE:
7048
7050
  return "RESERVOIR_SAMPLE";
7049
7051
  case PhysicalOperatorType::STREAMING_SAMPLE:
@@ -7142,6 +7144,8 @@ string PhysicalOperatorToString(PhysicalOperatorType type) {
7142
7144
  return "INOUT_FUNCTION";
7143
7145
  case PhysicalOperatorType::CREATE_TYPE:
7144
7146
  return "CREATE_TYPE";
7147
+ case PhysicalOperatorType::RESULT_COLLECTOR:
7148
+ return "RESULT_COLLECTOR";
7145
7149
  case PhysicalOperatorType::INVALID:
7146
7150
  break;
7147
7151
  }
@@ -36623,6 +36627,9 @@ public:
36623
36627
  unique_ptr<PhysicalHashAggregate> distinct;
36624
36628
  vector<PhysicalOperator *> delim_scans;
36625
36629
 
36630
+ public:
36631
+ vector<PhysicalOperator *> GetChildren() const override;
36632
+
36626
36633
  public:
36627
36634
  unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
36628
36635
  unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
@@ -36640,6 +36647,9 @@ public:
36640
36647
  }
36641
36648
 
36642
36649
  string ParamsToString() const override;
36650
+
36651
+ public:
36652
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
36643
36653
  };
36644
36654
 
36645
36655
  } // namespace duckdb
@@ -37666,10 +37676,127 @@ unique_ptr<RenderTree> TreeRenderer::CreateTree(const Pipeline &op) {
37666
37676
  return CreateRenderTree<PipelineRenderNode>(*node);
37667
37677
  }
37668
37678
 
37679
+ } // namespace duckdb//===----------------------------------------------------------------------===//
37680
+ // DuckDB
37681
+ //
37682
+ // duckdb/common/types/batched_chunk_collection.hpp
37683
+ //
37684
+ //
37685
+ //===----------------------------------------------------------------------===//
37686
+
37687
+
37688
+
37689
+
37690
+
37691
+
37692
+ namespace duckdb {
37693
+
37694
+ struct BatchedChunkScanState {
37695
+ map<idx_t, unique_ptr<ChunkCollection>>::iterator iterator;
37696
+ idx_t chunk_index;
37697
+ };
37698
+
37699
+ //! A BatchedChunkCollection holds a number of data entries that are partitioned by batch index
37700
+ //! Scans over a BatchedChunkCollection are ordered by batch index
37701
+ class BatchedChunkCollection {
37702
+ public:
37703
+ DUCKDB_API BatchedChunkCollection();
37704
+
37705
+ //! Appends a datachunk with the given batch index to the batched collection
37706
+ DUCKDB_API void Append(DataChunk &input, idx_t batch_index);
37707
+
37708
+ //! Merge the other batched chunk collection into this batched collection
37709
+ DUCKDB_API void Merge(BatchedChunkCollection &other);
37710
+
37711
+ //! Initialize a scan over the batched chunk collection
37712
+ DUCKDB_API void InitializeScan(BatchedChunkScanState &state);
37713
+
37714
+ //! Scan a chunk from the batched chunk collection, in-order of batch index
37715
+ DUCKDB_API void Scan(BatchedChunkScanState &state, DataChunk &output);
37716
+
37717
+ DUCKDB_API string ToString() const;
37718
+ DUCKDB_API void Print() const;
37719
+
37720
+ private:
37721
+ //! The data of the batched chunk collection - a set of batch_index -> ChunkCollection pointers
37722
+ map<idx_t, unique_ptr<ChunkCollection>> data;
37723
+ };
37669
37724
  } // namespace duckdb
37670
37725
 
37671
37726
 
37672
37727
 
37728
+ namespace duckdb {
37729
+
37730
+ BatchedChunkCollection::BatchedChunkCollection() {
37731
+ }
37732
+
37733
+ void BatchedChunkCollection::Append(DataChunk &input, idx_t batch_index) {
37734
+ D_ASSERT(batch_index != DConstants::INVALID_INDEX);
37735
+ auto entry = data.find(batch_index);
37736
+ ChunkCollection *collection;
37737
+ if (entry == data.end()) {
37738
+ auto new_collection = make_unique<ChunkCollection>();
37739
+ collection = new_collection.get();
37740
+ data.insert(make_pair(batch_index, move(new_collection)));
37741
+ } else {
37742
+ collection = entry->second.get();
37743
+ }
37744
+ collection->Append(input);
37745
+ }
37746
+
37747
+ void BatchedChunkCollection::Merge(BatchedChunkCollection &other) {
37748
+ for (auto &entry : other.data) {
37749
+ if (data.find(entry.first) != data.end()) {
37750
+ throw InternalException(
37751
+ "BatchChunkCollection::Merge error - batch index %d is present in both collections. This occurs when "
37752
+ "batch indexes are not uniquely distributed over threads",
37753
+ entry.first);
37754
+ }
37755
+ data[entry.first] = move(entry.second);
37756
+ }
37757
+ other.data.clear();
37758
+ }
37759
+
37760
+ void BatchedChunkCollection::InitializeScan(BatchedChunkScanState &state) {
37761
+ state.iterator = data.begin();
37762
+ state.chunk_index = 0;
37763
+ }
37764
+
37765
+ void BatchedChunkCollection::Scan(BatchedChunkScanState &state, DataChunk &output) {
37766
+ while (state.iterator != data.end()) {
37767
+ // check if there is a chunk remaining in this collection
37768
+ auto collection = state.iterator->second.get();
37769
+ if (state.chunk_index < collection->ChunkCount()) {
37770
+ // there is! increment the chunk count
37771
+ output.Reference(collection->GetChunk(state.chunk_index));
37772
+ state.chunk_index++;
37773
+ return;
37774
+ }
37775
+ // there isn't! move to the next collection
37776
+ state.iterator++;
37777
+ state.chunk_index = 0;
37778
+ }
37779
+ }
37780
+
37781
+ string BatchedChunkCollection::ToString() const {
37782
+ string result;
37783
+ result += "Batched Chunk Collection\n";
37784
+ for (auto &entry : data) {
37785
+ result += "Batch Index - " + to_string(entry.first) + "\n";
37786
+ result += entry.second->ToString() + "\n\n";
37787
+ }
37788
+ return result;
37789
+ }
37790
+
37791
+ void BatchedChunkCollection::Print() const {
37792
+ Printer::Print(ToString());
37793
+ }
37794
+
37795
+ } // namespace duckdb
37796
+
37797
+
37798
+
37799
+
37673
37800
 
37674
37801
 
37675
37802
 
@@ -38507,7 +38634,12 @@ void ChunkCollection::CopyCell(idx_t column, idx_t index, Vector &target, idx_t
38507
38634
  VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
38508
38635
  }
38509
38636
 
38510
- void ChunkCollection::Print() {
38637
+ string ChunkCollection::ToString() const {
38638
+ return chunks.empty() ? "ChunkCollection [ 0 ]"
38639
+ : "ChunkCollection [ " + std::to_string(count) + " ]: \n" + chunks[0]->ToString();
38640
+ }
38641
+
38642
+ void ChunkCollection::Print() const {
38511
38643
  Printer::Print(ToString());
38512
38644
  }
38513
38645
 
@@ -48636,17 +48768,14 @@ inline idx_t ComparisonSelector::Select<duckdb::LessThanEquals>(Vector &left, Ve
48636
48768
  return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel);
48637
48769
  }
48638
48770
 
48639
- static idx_t ComparesNotNull(ValidityMask &vleft, ValidityMask &vright, ValidityMask &vresult, idx_t count,
48640
- SelectionVector &not_null) {
48641
- idx_t valid = 0;
48771
+ static void ComparesNotNull(VectorData &ldata, VectorData &rdata, ValidityMask &vresult, idx_t count) {
48642
48772
  for (idx_t i = 0; i < count; ++i) {
48643
- if (vleft.RowIsValid(i) && vright.RowIsValid(i)) {
48644
- not_null.set_index(valid++, i);
48645
- } else {
48773
+ auto lidx = ldata.sel->get_index(i);
48774
+ auto ridx = rdata.sel->get_index(i);
48775
+ if (!ldata.validity.RowIsValid(lidx) || !rdata.validity.RowIsValid(ridx)) {
48646
48776
  vresult.SetInvalid(i);
48647
48777
  }
48648
48778
  }
48649
- return valid;
48650
48779
  }
48651
48780
 
48652
48781
  template <typename OP>
@@ -48673,23 +48802,17 @@ static void NestedComparisonExecutor(Vector &left, Vector &right, Vector &result
48673
48802
 
48674
48803
  result.SetVectorType(VectorType::FLAT_VECTOR);
48675
48804
  auto result_data = FlatVector::GetData<bool>(result);
48676
- auto &validity = FlatVector::Validity(result);
48805
+ auto &result_validity = FlatVector::Validity(result);
48677
48806
 
48678
48807
  VectorData leftv, rightv;
48679
48808
  left.Orrify(count, leftv);
48680
48809
  right.Orrify(count, rightv);
48681
-
48810
+ if (!leftv.validity.AllValid() || !rightv.validity.AllValid()) {
48811
+ ComparesNotNull(leftv, rightv, result_validity, count);
48812
+ }
48682
48813
  SelectionVector true_sel(count);
48683
48814
  SelectionVector false_sel(count);
48684
-
48685
- idx_t match_count = 0;
48686
- if (leftv.validity.AllValid() && rightv.validity.AllValid()) {
48687
- match_count = ComparisonSelector::Select<OP>(left, right, nullptr, count, &true_sel, &false_sel);
48688
- } else {
48689
- SelectionVector not_null(count);
48690
- count = ComparesNotNull(leftv.validity, rightv.validity, validity, count, not_null);
48691
- match_count = ComparisonSelector::Select<OP>(left, right, &not_null, count, &true_sel, &false_sel);
48692
- }
48815
+ idx_t match_count = ComparisonSelector::Select<OP>(left, right, nullptr, count, &true_sel, &false_sel);
48693
48816
 
48694
48817
  for (idx_t i = 0; i < match_count; ++i) {
48695
48818
  const auto idx = true_sel.get_index(i);
@@ -60510,6 +60633,171 @@ string PhysicalFilter::ParamsToString() const {
60510
60633
  return expression->GetName();
60511
60634
  }
60512
60635
 
60636
+ } // namespace duckdb
60637
+ //===----------------------------------------------------------------------===//
60638
+ // DuckDB
60639
+ //
60640
+ // duckdb/execution/operator/helper/physical_batch_collector.hpp
60641
+ //
60642
+ //
60643
+ //===----------------------------------------------------------------------===//
60644
+
60645
+
60646
+
60647
+ //===----------------------------------------------------------------------===//
60648
+ // DuckDB
60649
+ //
60650
+ // duckdb/execution/operator/helper/physical_result_collector.hpp
60651
+ //
60652
+ //
60653
+ //===----------------------------------------------------------------------===//
60654
+
60655
+
60656
+
60657
+
60658
+
60659
+
60660
+ namespace duckdb {
60661
+ class PreparedStatementData;
60662
+
60663
+ //! PhysicalResultCollector is an abstract class that is used to generate the final result of a query
60664
+ class PhysicalResultCollector : public PhysicalOperator {
60665
+ public:
60666
+ PhysicalResultCollector(PreparedStatementData &data);
60667
+
60668
+ StatementType statement_type;
60669
+ StatementProperties properties;
60670
+ PhysicalOperator *plan;
60671
+ vector<string> names;
60672
+
60673
+ public:
60674
+ static unique_ptr<PhysicalResultCollector> GetResultCollector(ClientContext &context, PreparedStatementData &data);
60675
+
60676
+ public:
60677
+ //! The final method used to fetch the query result from this operator
60678
+ virtual unique_ptr<QueryResult> GetResult(GlobalSinkState &state) = 0;
60679
+
60680
+ bool IsSink() const override {
60681
+ return true;
60682
+ }
60683
+
60684
+ public:
60685
+ vector<PhysicalOperator *> GetChildren() const override;
60686
+
60687
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
60688
+ };
60689
+
60690
+ } // namespace duckdb
60691
+
60692
+
60693
+ namespace duckdb {
60694
+
60695
+ class PhysicalBatchCollector : public PhysicalResultCollector {
60696
+ public:
60697
+ PhysicalBatchCollector(PreparedStatementData &data);
60698
+
60699
+ public:
60700
+ unique_ptr<QueryResult> GetResult(GlobalSinkState &state) override;
60701
+
60702
+ public:
60703
+ // Sink interface
60704
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
60705
+ DataChunk &input) const override;
60706
+ void Combine(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate) const override;
60707
+ SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
60708
+ GlobalSinkState &gstate) const override;
60709
+
60710
+ unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
60711
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
60712
+
60713
+ bool RequiresBatchIndex() const override {
60714
+ return true;
60715
+ }
60716
+
60717
+ bool ParallelSink() const override {
60718
+ return true;
60719
+ }
60720
+ };
60721
+
60722
+ } // namespace duckdb
60723
+
60724
+
60725
+
60726
+
60727
+
60728
+ namespace duckdb {
60729
+
60730
+ PhysicalBatchCollector::PhysicalBatchCollector(PreparedStatementData &data) : PhysicalResultCollector(data) {
60731
+ }
60732
+
60733
+ //===--------------------------------------------------------------------===//
60734
+ // Sink
60735
+ //===--------------------------------------------------------------------===//
60736
+ class BatchCollectorGlobalState : public GlobalSinkState {
60737
+ public:
60738
+ mutex glock;
60739
+ BatchedChunkCollection data;
60740
+ unique_ptr<MaterializedQueryResult> result;
60741
+ };
60742
+
60743
+ class BatchCollectorLocalState : public LocalSinkState {
60744
+ public:
60745
+ BatchedChunkCollection data;
60746
+ };
60747
+
60748
+ SinkResultType PhysicalBatchCollector::Sink(ExecutionContext &context, GlobalSinkState &gstate,
60749
+ LocalSinkState &lstate_p, DataChunk &input) const {
60750
+ auto &state = (BatchCollectorLocalState &)lstate_p;
60751
+ state.data.Append(input, state.batch_index);
60752
+ return SinkResultType::NEED_MORE_INPUT;
60753
+ }
60754
+
60755
+ void PhysicalBatchCollector::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
60756
+ LocalSinkState &lstate_p) const {
60757
+ auto &gstate = (BatchCollectorGlobalState &)gstate_p;
60758
+ auto &state = (BatchCollectorLocalState &)lstate_p;
60759
+
60760
+ lock_guard<mutex> lock(gstate.glock);
60761
+ gstate.data.Merge(state.data);
60762
+ }
60763
+
60764
+ SinkFinalizeType PhysicalBatchCollector::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
60765
+ GlobalSinkState &gstate_p) const {
60766
+ auto &gstate = (BatchCollectorGlobalState &)gstate_p;
60767
+ auto result =
60768
+ make_unique<MaterializedQueryResult>(statement_type, properties, types, names, context.shared_from_this());
60769
+ DataChunk output;
60770
+ output.Initialize(types);
60771
+
60772
+ BatchedChunkScanState state;
60773
+ gstate.data.InitializeScan(state);
60774
+ while (true) {
60775
+ output.Reset();
60776
+ gstate.data.Scan(state, output);
60777
+ if (output.size() == 0) {
60778
+ break;
60779
+ }
60780
+ result->collection.Append(output);
60781
+ }
60782
+
60783
+ gstate.result = move(result);
60784
+ return SinkFinalizeType::READY;
60785
+ }
60786
+
60787
+ unique_ptr<LocalSinkState> PhysicalBatchCollector::GetLocalSinkState(ExecutionContext &context) const {
60788
+ return make_unique<BatchCollectorLocalState>();
60789
+ }
60790
+
60791
+ unique_ptr<GlobalSinkState> PhysicalBatchCollector::GetGlobalSinkState(ClientContext &context) const {
60792
+ return make_unique<BatchCollectorGlobalState>();
60793
+ }
60794
+
60795
+ unique_ptr<QueryResult> PhysicalBatchCollector::GetResult(GlobalSinkState &state) {
60796
+ auto &gstate = (BatchCollectorGlobalState &)state;
60797
+ D_ASSERT(gstate.result);
60798
+ return move(gstate.result);
60799
+ }
60800
+
60513
60801
  } // namespace duckdb
60514
60802
  //===----------------------------------------------------------------------===//
60515
60803
  // DuckDB
@@ -60586,6 +60874,12 @@ public:
60586
60874
  PhysicalOperator *plan;
60587
60875
  unique_ptr<PhysicalOperator> owned_plan;
60588
60876
  shared_ptr<PreparedStatementData> prepared;
60877
+
60878
+ public:
60879
+ vector<PhysicalOperator *> GetChildren() const override;
60880
+
60881
+ public:
60882
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
60589
60883
  };
60590
60884
 
60591
60885
  } // namespace duckdb
@@ -60597,6 +60891,15 @@ PhysicalExecute::PhysicalExecute(PhysicalOperator *plan)
60597
60891
  : PhysicalOperator(PhysicalOperatorType::EXECUTE, plan->types, -1), plan(plan) {
60598
60892
  }
60599
60893
 
60894
+ vector<PhysicalOperator *> PhysicalExecute::GetChildren() const {
60895
+ return {plan};
60896
+ }
60897
+
60898
+ void PhysicalExecute::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
60899
+ // EXECUTE statement: build pipeline on child
60900
+ plan->BuildPipelines(executor, current, state);
60901
+ }
60902
+
60600
60903
  } // namespace duckdb
60601
60904
  //===----------------------------------------------------------------------===//
60602
60905
  // DuckDB
@@ -60724,16 +61027,18 @@ namespace duckdb {
60724
61027
  class PhysicalLimit : public PhysicalOperator {
60725
61028
  public:
60726
61029
  PhysicalLimit(vector<LogicalType> types, idx_t limit, idx_t offset, unique_ptr<Expression> limit_expression,
60727
- unique_ptr<Expression> offset_expression, idx_t estimated_cardinality)
60728
- : PhysicalOperator(PhysicalOperatorType::LIMIT, move(types), estimated_cardinality), limit_value(limit),
60729
- offset_value(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)) {
60730
- }
61030
+ unique_ptr<Expression> offset_expression, idx_t estimated_cardinality);
60731
61031
 
60732
61032
  idx_t limit_value;
60733
61033
  idx_t offset_value;
60734
61034
  unique_ptr<Expression> limit_expression;
60735
61035
  unique_ptr<Expression> offset_expression;
60736
61036
 
61037
+ public:
61038
+ bool IsOrderDependent() const override {
61039
+ return true;
61040
+ }
61041
+
60737
61042
  public:
60738
61043
  // Source interface
60739
61044
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
@@ -60742,18 +61047,27 @@ public:
60742
61047
 
60743
61048
  public:
60744
61049
  // Sink Interface
60745
- unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
60746
61050
  SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
60747
61051
  DataChunk &input) const override;
61052
+ void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const override;
61053
+ unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
61054
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
60748
61055
 
60749
61056
  bool IsSink() const override {
60750
61057
  return true;
60751
61058
  }
60752
61059
 
60753
- bool SinkOrderMatters() const override {
61060
+ bool ParallelSink() const override {
61061
+ return true;
61062
+ }
61063
+
61064
+ bool RequiresBatchIndex() const override {
60754
61065
  return true;
60755
61066
  }
60756
61067
 
61068
+ public:
61069
+ static bool ComputeOffset(DataChunk &input, idx_t &limit, idx_t &offset, idx_t current_offset, idx_t &max_element,
61070
+ Expression *limit_expression, Expression *offset_expression);
60757
61071
  static bool HandleOffset(DataChunk &input, idx_t &current_offset, idx_t offset, idx_t limit);
60758
61072
  static Value GetDelimiter(DataChunk &input, Expression *expr);
60759
61073
  };
@@ -60766,14 +61080,75 @@ public:
60766
61080
 
60767
61081
 
60768
61082
 
61083
+ //===----------------------------------------------------------------------===//
61084
+ // DuckDB
61085
+ //
61086
+ // duckdb/execution/operator/helper/physical_streaming_limit.hpp
61087
+ //
61088
+ //
61089
+ //===----------------------------------------------------------------------===//
61090
+
61091
+
61092
+
61093
+
61094
+
61095
+
61096
+ namespace duckdb {
61097
+
61098
+ class PhysicalStreamingLimit : public PhysicalOperator {
61099
+ public:
61100
+ PhysicalStreamingLimit(vector<LogicalType> types, idx_t limit, idx_t offset,
61101
+ unique_ptr<Expression> limit_expression, unique_ptr<Expression> offset_expression,
61102
+ idx_t estimated_cardinality, bool parallel);
61103
+
61104
+ idx_t limit_value;
61105
+ idx_t offset_value;
61106
+ unique_ptr<Expression> limit_expression;
61107
+ unique_ptr<Expression> offset_expression;
61108
+ bool parallel;
61109
+
61110
+ public:
61111
+ // Operator interface
61112
+ unique_ptr<OperatorState> GetOperatorState(ClientContext &context) const override;
61113
+ unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
61114
+ OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
61115
+ GlobalOperatorState &gstate, OperatorState &state) const override;
61116
+
61117
+ bool IsOrderDependent() const override;
61118
+ bool ParallelOperator() const override;
61119
+ };
61120
+
61121
+ } // namespace duckdb
61122
+
61123
+
60769
61124
  namespace duckdb {
60770
61125
 
61126
+ PhysicalLimit::PhysicalLimit(vector<LogicalType> types, idx_t limit, idx_t offset,
61127
+ unique_ptr<Expression> limit_expression, unique_ptr<Expression> offset_expression,
61128
+ idx_t estimated_cardinality)
61129
+ : PhysicalOperator(PhysicalOperatorType::LIMIT, move(types), estimated_cardinality), limit_value(limit),
61130
+ offset_value(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)) {
61131
+ }
61132
+
60771
61133
  //===--------------------------------------------------------------------===//
60772
61134
  // Sink
60773
61135
  //===--------------------------------------------------------------------===//
60774
61136
  class LimitGlobalState : public GlobalSinkState {
60775
61137
  public:
60776
- explicit LimitGlobalState(const PhysicalLimit &op) : current_offset(0) {
61138
+ explicit LimitGlobalState(const PhysicalLimit &op) {
61139
+ limit = 0;
61140
+ offset = 0;
61141
+ }
61142
+
61143
+ mutex glock;
61144
+ idx_t limit;
61145
+ idx_t offset;
61146
+ BatchedChunkCollection data;
61147
+ };
61148
+
61149
+ class LimitLocalState : public LocalSinkState {
61150
+ public:
61151
+ explicit LimitLocalState(const PhysicalLimit &op) : current_offset(0) {
60777
61152
  this->limit = op.limit_expression ? DConstants::INVALID_INDEX : op.limit_value;
60778
61153
  this->offset = op.offset_expression ? DConstants::INVALID_INDEX : op.offset_value;
60779
61154
  }
@@ -60781,31 +61156,30 @@ public:
60781
61156
  idx_t current_offset;
60782
61157
  idx_t limit;
60783
61158
  idx_t offset;
60784
- ChunkCollection data;
61159
+ BatchedChunkCollection data;
60785
61160
  };
60786
61161
 
60787
61162
  unique_ptr<GlobalSinkState> PhysicalLimit::GetGlobalSinkState(ClientContext &context) const {
60788
61163
  return make_unique<LimitGlobalState>(*this);
60789
61164
  }
60790
61165
 
60791
- SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
60792
- DataChunk &input) const {
60793
- D_ASSERT(input.size() > 0);
60794
- auto &state = (LimitGlobalState &)gstate;
60795
- auto &limit = state.limit;
60796
- auto &offset = state.offset;
61166
+ unique_ptr<LocalSinkState> PhysicalLimit::GetLocalSinkState(ExecutionContext &context) const {
61167
+ return make_unique<LimitLocalState>(*this);
61168
+ }
60797
61169
 
61170
+ bool PhysicalLimit::ComputeOffset(DataChunk &input, idx_t &limit, idx_t &offset, idx_t current_offset,
61171
+ idx_t &max_element, Expression *limit_expression, Expression *offset_expression) {
60798
61172
  if (limit != DConstants::INVALID_INDEX && offset != DConstants::INVALID_INDEX) {
60799
- idx_t max_element = limit + offset;
60800
- if ((limit == 0 || state.current_offset >= max_element) && !(limit_expression || offset_expression)) {
60801
- return SinkResultType::FINISHED;
61173
+ max_element = limit + offset;
61174
+ if ((limit == 0 || current_offset >= max_element) && !(limit_expression || offset_expression)) {
61175
+ return false;
60802
61176
  }
60803
61177
  }
60804
61178
 
60805
61179
  // get the next chunk from the child
60806
61180
  if (limit == DConstants::INVALID_INDEX) {
60807
61181
  limit = 1ULL << 62ULL;
60808
- Value val = GetDelimiter(input, limit_expression.get());
61182
+ Value val = GetDelimiter(input, limit_expression);
60809
61183
  if (!val.IsNull()) {
60810
61184
  limit = val.GetValue<idx_t>();
60811
61185
  }
@@ -60815,7 +61189,7 @@ SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &g
60815
61189
  }
60816
61190
  if (offset == DConstants::INVALID_INDEX) {
60817
61191
  offset = 0;
60818
- Value val = GetDelimiter(input, offset_expression.get());
61192
+ Value val = GetDelimiter(input, offset_expression);
60819
61193
  if (!val.IsNull()) {
60820
61194
  offset = val.GetValue<idx_t>();
60821
61195
  }
@@ -60823,42 +61197,77 @@ SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &g
60823
61197
  throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", offset, 1ULL << 62ULL);
60824
61198
  }
60825
61199
  }
60826
- idx_t max_element = limit + offset;
60827
- if (limit == 0 || state.current_offset >= max_element) {
60828
- return SinkResultType::FINISHED;
60829
- }
60830
- if (!HandleOffset(input, state.current_offset, offset, limit)) {
60831
- return SinkResultType::NEED_MORE_INPUT;
61200
+ max_element = limit + offset;
61201
+ if (limit == 0 || current_offset >= max_element) {
61202
+ return false;
60832
61203
  }
61204
+ return true;
61205
+ }
60833
61206
 
60834
- state.data.Append(input);
61207
+ SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
61208
+ DataChunk &input) const {
61209
+
61210
+ D_ASSERT(input.size() > 0);
61211
+ auto &state = (LimitLocalState &)lstate;
61212
+ auto &limit = state.limit;
61213
+ auto &offset = state.offset;
61214
+
61215
+ idx_t max_element;
61216
+ if (!ComputeOffset(input, limit, offset, state.current_offset, max_element, limit_expression.get(),
61217
+ offset_expression.get())) {
61218
+ return SinkResultType::FINISHED;
61219
+ }
61220
+ state.data.Append(input, lstate.batch_index);
61221
+ state.current_offset += input.size();
60835
61222
  return SinkResultType::NEED_MORE_INPUT;
60836
61223
  }
60837
61224
 
61225
+ void PhysicalLimit::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
61226
+ auto &gstate = (LimitGlobalState &)gstate_p;
61227
+ auto &state = (LimitLocalState &)lstate_p;
61228
+
61229
+ lock_guard<mutex> lock(gstate.glock);
61230
+ gstate.limit = state.limit;
61231
+ gstate.offset = state.offset;
61232
+ gstate.data.Merge(state.data);
61233
+ }
61234
+
60838
61235
  //===--------------------------------------------------------------------===//
60839
61236
  // Source
60840
61237
  //===--------------------------------------------------------------------===//
60841
- class LimitOperatorState : public GlobalSourceState {
61238
+ class LimitSourceState : public GlobalSourceState {
60842
61239
  public:
60843
- LimitOperatorState() : chunk_idx(0) {
61240
+ LimitSourceState() {
61241
+ initialized = false;
61242
+ current_offset = 0;
60844
61243
  }
60845
61244
 
60846
- idx_t chunk_idx;
61245
+ bool initialized;
61246
+ idx_t current_offset;
61247
+ BatchedChunkScanState scan_state;
60847
61248
  };
60848
61249
 
60849
61250
  unique_ptr<GlobalSourceState> PhysicalLimit::GetGlobalSourceState(ClientContext &context) const {
60850
- return make_unique<LimitOperatorState>();
61251
+ return make_unique<LimitSourceState>();
60851
61252
  }
60852
61253
 
60853
61254
  void PhysicalLimit::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
60854
61255
  LocalSourceState &lstate) const {
60855
61256
  auto &gstate = (LimitGlobalState &)*sink_state;
60856
- auto &state = (LimitOperatorState &)gstate_p;
60857
- if (state.chunk_idx >= gstate.data.ChunkCount()) {
60858
- return;
61257
+ auto &state = (LimitSourceState &)gstate_p;
61258
+ while (state.current_offset < gstate.limit + gstate.offset) {
61259
+ if (!state.initialized) {
61260
+ gstate.data.InitializeScan(state.scan_state);
61261
+ state.initialized = true;
61262
+ }
61263
+ gstate.data.Scan(state.scan_state, chunk);
61264
+ if (chunk.size() == 0) {
61265
+ break;
61266
+ }
61267
+ if (HandleOffset(chunk, state.current_offset, gstate.offset, gstate.limit)) {
61268
+ break;
61269
+ }
60859
61270
  }
60860
- chunk.Reference(gstate.data.GetChunk(state.chunk_idx));
60861
- state.chunk_idx++;
60862
61271
  }
60863
61272
 
60864
61273
  bool PhysicalLimit::HandleOffset(DataChunk &input, idx_t &current_offset, idx_t offset, idx_t limit) {
@@ -60948,6 +61357,11 @@ public:
60948
61357
  unique_ptr<Expression> limit_expression;
60949
61358
  unique_ptr<Expression> offset_expression;
60950
61359
 
61360
+ public:
61361
+ bool IsOrderDependent() const override {
61362
+ return true;
61363
+ }
61364
+
60951
61365
  public:
60952
61366
  // Source interface
60953
61367
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
@@ -60963,10 +61377,6 @@ public:
60963
61377
  bool IsSink() const override {
60964
61378
  return true;
60965
61379
  }
60966
-
60967
- bool SinkOrderMatters() const override {
60968
- return true;
60969
- }
60970
61380
  };
60971
61381
 
60972
61382
  } // namespace duckdb
@@ -61182,6 +61592,86 @@ void PhysicalLoad::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSo
61182
61592
  }
61183
61593
  }
61184
61594
 
61595
+ } // namespace duckdb
61596
+ //===----------------------------------------------------------------------===//
61597
+ // DuckDB
61598
+ //
61599
+ // duckdb/execution/operator/helper/physical_materialized_collector.hpp
61600
+ //
61601
+ //
61602
+ //===----------------------------------------------------------------------===//
61603
+
61604
+
61605
+
61606
+
61607
+
61608
+ namespace duckdb {
61609
+
61610
+ class PhysicalMaterializedCollector : public PhysicalResultCollector {
61611
+ public:
61612
+ PhysicalMaterializedCollector(PreparedStatementData &data, bool parallel);
61613
+
61614
+ bool parallel;
61615
+
61616
+ public:
61617
+ unique_ptr<QueryResult> GetResult(GlobalSinkState &state) override;
61618
+
61619
+ public:
61620
+ // Sink interface
61621
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
61622
+ DataChunk &input) const override;
61623
+
61624
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
61625
+
61626
+ bool ParallelSink() const override;
61627
+ };
61628
+
61629
+ } // namespace duckdb
61630
+
61631
+
61632
+
61633
+
61634
+
61635
+ namespace duckdb {
61636
+
61637
+ PhysicalMaterializedCollector::PhysicalMaterializedCollector(PreparedStatementData &data, bool parallel)
61638
+ : PhysicalResultCollector(data), parallel(parallel) {
61639
+ }
61640
+
61641
+ //===--------------------------------------------------------------------===//
61642
+ // Sink
61643
+ //===--------------------------------------------------------------------===//
61644
+ class MaterializedCollectorGlobalState : public GlobalSinkState {
61645
+ public:
61646
+ mutex glock;
61647
+ unique_ptr<MaterializedQueryResult> result;
61648
+ };
61649
+
61650
+ SinkResultType PhysicalMaterializedCollector::Sink(ExecutionContext &context, GlobalSinkState &gstate_p,
61651
+ LocalSinkState &lstate, DataChunk &input) const {
61652
+ auto &gstate = (MaterializedCollectorGlobalState &)gstate_p;
61653
+ lock_guard<mutex> lock(gstate.glock);
61654
+ gstate.result->collection.Append(input);
61655
+ return SinkResultType::NEED_MORE_INPUT;
61656
+ }
61657
+
61658
+ unique_ptr<GlobalSinkState> PhysicalMaterializedCollector::GetGlobalSinkState(ClientContext &context) const {
61659
+ auto state = make_unique<MaterializedCollectorGlobalState>();
61660
+ state->result =
61661
+ make_unique<MaterializedQueryResult>(statement_type, properties, types, names, context.shared_from_this());
61662
+ return move(state);
61663
+ }
61664
+
61665
+ unique_ptr<QueryResult> PhysicalMaterializedCollector::GetResult(GlobalSinkState &state) {
61666
+ auto &gstate = (MaterializedCollectorGlobalState &)state;
61667
+ D_ASSERT(gstate.result);
61668
+ return move(gstate.result);
61669
+ }
61670
+
61671
+ bool PhysicalMaterializedCollector::ParallelSink() const {
61672
+ return parallel;
61673
+ }
61674
+
61185
61675
  } // namespace duckdb
61186
61676
  //===----------------------------------------------------------------------===//
61187
61677
  // DuckDB
@@ -61520,6 +62010,55 @@ string PhysicalReservoirSample::ParamsToString() const {
61520
62010
  }
61521
62011
 
61522
62012
  } // namespace duckdb
62013
+
62014
+
62015
+
62016
+
62017
+
62018
+
62019
+
62020
+ namespace duckdb {
62021
+
62022
+ PhysicalResultCollector::PhysicalResultCollector(PreparedStatementData &data)
62023
+ : PhysicalOperator(PhysicalOperatorType::RESULT_COLLECTOR, {LogicalType::BOOLEAN}, 0),
62024
+ statement_type(data.statement_type), properties(data.properties), plan(data.plan.get()), names(data.names) {
62025
+ this->types = data.types;
62026
+ }
62027
+
62028
+ unique_ptr<PhysicalResultCollector> PhysicalResultCollector::GetResultCollector(ClientContext &context,
62029
+ PreparedStatementData &data) {
62030
+ auto &config = DBConfig::GetConfig(context);
62031
+ bool use_materialized_collector = !config.preserve_insertion_order || !data.plan->AllSourcesSupportBatchIndex();
62032
+ if (use_materialized_collector) {
62033
+ // parallel materialized collector only if we don't care about maintaining insertion order
62034
+ return make_unique_base<PhysicalResultCollector, PhysicalMaterializedCollector>(
62035
+ data, !config.preserve_insertion_order);
62036
+ } else {
62037
+ // we care about maintaining insertion order and the sources all support batch indexes
62038
+ // use a batch collector
62039
+ return make_unique_base<PhysicalResultCollector, PhysicalBatchCollector>(data);
62040
+ }
62041
+ }
62042
+
62043
+ vector<PhysicalOperator *> PhysicalResultCollector::GetChildren() const {
62044
+ return {plan};
62045
+ }
62046
+
62047
+ void PhysicalResultCollector::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
62048
+ // operator is a sink, build a pipeline
62049
+ sink_state.reset();
62050
+
62051
+ // single operator:
62052
+ // the operator becomes the data source of the current pipeline
62053
+ state.SetPipelineSource(current, this);
62054
+ // we create a new pipeline starting from the child
62055
+ D_ASSERT(children.size() == 0);
62056
+ D_ASSERT(plan);
62057
+
62058
+ BuildChildPipeline(executor, current, state, plan);
62059
+ }
62060
+
62061
+ } // namespace duckdb
61523
62062
  //===----------------------------------------------------------------------===//
61524
62063
  // DuckDB
61525
62064
  //
@@ -61633,6 +62172,77 @@ void PhysicalSet::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSou
61633
62172
  }
61634
62173
 
61635
62174
  } // namespace duckdb
62175
+
62176
+
62177
+
62178
+ namespace duckdb {
62179
+
62180
+ PhysicalStreamingLimit::PhysicalStreamingLimit(vector<LogicalType> types, idx_t limit, idx_t offset,
62181
+ unique_ptr<Expression> limit_expression,
62182
+ unique_ptr<Expression> offset_expression, idx_t estimated_cardinality,
62183
+ bool parallel)
62184
+ : PhysicalOperator(PhysicalOperatorType::STREAMING_LIMIT, move(types), estimated_cardinality), limit_value(limit),
62185
+ offset_value(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)),
62186
+ parallel(parallel) {
62187
+ }
62188
+
62189
+ //===--------------------------------------------------------------------===//
62190
+ // Operator
62191
+ //===--------------------------------------------------------------------===//
62192
+ class StreamingLimitOperatorState : public OperatorState {
62193
+ public:
62194
+ explicit StreamingLimitOperatorState(const PhysicalStreamingLimit &op) {
62195
+ this->limit = op.limit_expression ? DConstants::INVALID_INDEX : op.limit_value;
62196
+ this->offset = op.offset_expression ? DConstants::INVALID_INDEX : op.offset_value;
62197
+ }
62198
+
62199
+ idx_t limit;
62200
+ idx_t offset;
62201
+ };
62202
+
62203
+ class StreamingLimitGlobalState : public GlobalOperatorState {
62204
+ public:
62205
+ StreamingLimitGlobalState() : current_offset(0) {
62206
+ }
62207
+
62208
+ std::atomic<idx_t> current_offset;
62209
+ };
62210
+
62211
+ unique_ptr<OperatorState> PhysicalStreamingLimit::GetOperatorState(ClientContext &context) const {
62212
+ return make_unique<StreamingLimitOperatorState>(*this);
62213
+ }
62214
+
62215
+ unique_ptr<GlobalOperatorState> PhysicalStreamingLimit::GetGlobalOperatorState(ClientContext &context) const {
62216
+ return make_unique<StreamingLimitGlobalState>();
62217
+ }
62218
+
62219
+ OperatorResultType PhysicalStreamingLimit::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
62220
+ GlobalOperatorState &gstate_p, OperatorState &state_p) const {
62221
+ auto &gstate = (StreamingLimitGlobalState &)gstate_p;
62222
+ auto &state = (StreamingLimitOperatorState &)state_p;
62223
+ auto &limit = state.limit;
62224
+ auto &offset = state.offset;
62225
+ idx_t current_offset = gstate.current_offset.fetch_add(input.size());
62226
+ idx_t max_element;
62227
+ if (!PhysicalLimit::ComputeOffset(input, limit, offset, current_offset, max_element, limit_expression.get(),
62228
+ offset_expression.get())) {
62229
+ return OperatorResultType::FINISHED;
62230
+ }
62231
+ if (PhysicalLimit::HandleOffset(input, current_offset, offset, limit)) {
62232
+ chunk.Reference(input);
62233
+ }
62234
+ return OperatorResultType::NEED_MORE_INPUT;
62235
+ }
62236
+
62237
+ bool PhysicalStreamingLimit::IsOrderDependent() const {
62238
+ return !parallel;
62239
+ }
62240
+
62241
+ bool PhysicalStreamingLimit::ParallelOperator() const {
62242
+ return parallel;
62243
+ }
62244
+
62245
+ } // namespace duckdb
61636
62246
  //===----------------------------------------------------------------------===//
61637
62247
  // DuckDB
61638
62248
  //
@@ -62002,6 +62612,12 @@ public:
62002
62612
  static void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left, DataChunk &result, bool found_match[],
62003
62613
  bool has_null);
62004
62614
  static void ConstructLeftJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
62615
+
62616
+ public:
62617
+ static void BuildJoinPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state,
62618
+ PhysicalOperator &op);
62619
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
62620
+ vector<const PhysicalOperator *> GetSources() const override;
62005
62621
  };
62006
62622
 
62007
62623
  } // namespace duckdb
@@ -62819,6 +63435,10 @@ public:
62819
63435
  bool ParallelSink() const override {
62820
63436
  return true;
62821
63437
  }
63438
+
63439
+ public:
63440
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
63441
+ vector<const PhysicalOperator *> GetSources() const override;
62822
63442
  };
62823
63443
 
62824
63444
  } // namespace duckdb
@@ -62826,6 +63446,7 @@ public:
62826
63446
 
62827
63447
 
62828
63448
 
63449
+
62829
63450
  namespace duckdb {
62830
63451
 
62831
63452
  PhysicalCrossProduct::PhysicalCrossProduct(vector<LogicalType> types, unique_ptr<PhysicalOperator> left,
@@ -62912,6 +63533,17 @@ OperatorResultType PhysicalCrossProduct::Execute(ExecutionContext &context, Data
62912
63533
  return OperatorResultType::HAVE_MORE_OUTPUT;
62913
63534
  }
62914
63535
 
63536
+ //===--------------------------------------------------------------------===//
63537
+ // Pipeline Construction
63538
+ //===--------------------------------------------------------------------===//
63539
+ void PhysicalCrossProduct::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
63540
+ PhysicalJoin::BuildJoinPipelines(executor, current, state, *this);
63541
+ }
63542
+
63543
+ vector<const PhysicalOperator *> PhysicalCrossProduct::GetSources() const {
63544
+ return children[0]->GetSources();
63545
+ }
63546
+
62915
63547
  } // namespace duckdb
62916
63548
 
62917
63549
 
@@ -62947,6 +63579,66 @@ public:
62947
63579
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
62948
63580
  void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
62949
63581
  LocalSourceState &lstate) const override;
63582
+
63583
+ public:
63584
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
63585
+ };
63586
+
63587
+ } // namespace duckdb
63588
+
63589
+
63590
+ //===----------------------------------------------------------------------===//
63591
+ // DuckDB
63592
+ //
63593
+ // duckdb/execution/operator/set/physical_recursive_cte.hpp
63594
+ //
63595
+ //
63596
+ //===----------------------------------------------------------------------===//
63597
+
63598
+
63599
+
63600
+
63601
+
63602
+ namespace duckdb {
63603
+ class Pipeline;
63604
+ class RecursiveCTEState;
63605
+
63606
+ class PhysicalRecursiveCTE : public PhysicalOperator {
63607
+ public:
63608
+ PhysicalRecursiveCTE(vector<LogicalType> types, bool union_all, unique_ptr<PhysicalOperator> top,
63609
+ unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality);
63610
+ ~PhysicalRecursiveCTE() override;
63611
+
63612
+ bool union_all;
63613
+ std::shared_ptr<ChunkCollection> working_table;
63614
+ vector<shared_ptr<Pipeline>> pipelines;
63615
+
63616
+ public:
63617
+ // Source interface
63618
+ void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
63619
+ LocalSourceState &lstate) const override;
63620
+
63621
+ public:
63622
+ // Sink interface
63623
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
63624
+ DataChunk &input) const override;
63625
+
63626
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
63627
+
63628
+ bool IsSink() const override {
63629
+ return true;
63630
+ }
63631
+
63632
+ public:
63633
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
63634
+
63635
+ vector<const PhysicalOperator *> GetSources() const override;
63636
+
63637
+ private:
63638
+ //! Probe Hash Table and eliminate duplicate rows
63639
+ idx_t ProbeHT(DataChunk &chunk, RecursiveCTEState &state) const;
63640
+
63641
+ void ExecuteRecursivePipelines(ExecutionContext &context) const;
62950
63642
  };
62951
63643
 
62952
63644
  } // namespace duckdb
@@ -62972,6 +63664,16 @@ PhysicalDelimJoin::PhysicalDelimJoin(vector<LogicalType> types, unique_ptr<Physi
62972
63664
  join->children[0] = move(cached_chunk_scan);
62973
63665
  }
62974
63666
 
63667
+ vector<PhysicalOperator *> PhysicalDelimJoin::GetChildren() const {
63668
+ vector<PhysicalOperator *> result;
63669
+ for (auto &child : children) {
63670
+ result.push_back(child.get());
63671
+ }
63672
+ result.push_back(join.get());
63673
+ result.push_back(distinct.get());
63674
+ return result;
63675
+ }
63676
+
62975
63677
  //===--------------------------------------------------------------------===//
62976
63678
  // Sink
62977
63679
  //===--------------------------------------------------------------------===//
@@ -63045,6 +63747,40 @@ string PhysicalDelimJoin::ParamsToString() const {
63045
63747
  return join->ParamsToString();
63046
63748
  }
63047
63749
 
63750
+ //===--------------------------------------------------------------------===//
63751
+ // Pipeline Construction
63752
+ //===--------------------------------------------------------------------===//
63753
+ void PhysicalDelimJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
63754
+ op_state.reset();
63755
+ sink_state.reset();
63756
+
63757
+ // duplicate eliminated join
63758
+ auto pipeline = make_shared<Pipeline>(executor);
63759
+ state.SetPipelineSink(*pipeline, this);
63760
+ current.AddDependency(pipeline);
63761
+
63762
+ // recurse into the pipeline child
63763
+ children[0]->BuildPipelines(executor, *pipeline, state);
63764
+ if (type == PhysicalOperatorType::DELIM_JOIN) {
63765
+ // recurse into the actual join
63766
+ // any pipelines in there depend on the main pipeline
63767
+ // any scan of the duplicate eliminated data on the RHS depends on this pipeline
63768
+ // we add an entry to the mapping of (PhysicalOperator*) -> (Pipeline*)
63769
+ for (auto &delim_scan : delim_scans) {
63770
+ state.delim_join_dependencies[delim_scan] = pipeline.get();
63771
+ }
63772
+ join->BuildPipelines(executor, current, state);
63773
+ }
63774
+ if (!state.recursive_cte) {
63775
+ // regular pipeline: schedule it
63776
+ state.AddPipeline(executor, move(pipeline));
63777
+ } else {
63778
+ // CTE pipeline! add it to the CTE pipelines
63779
+ auto &cte = (PhysicalRecursiveCTE &)*state.recursive_cte;
63780
+ cte.pipelines.push_back(move(pipeline));
63781
+ }
63782
+ }
63783
+
63048
63784
  } // namespace duckdb
63049
63785
 
63050
63786
 
@@ -63616,6 +64352,9 @@ public:
63616
64352
  return true;
63617
64353
  }
63618
64354
 
64355
+ public:
64356
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
64357
+
63619
64358
  private:
63620
64359
  // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
63621
64360
  void ResolveComplexJoin(ExecutionContext &context, DataChunk &result, LocalSourceState &state) const;
@@ -64725,6 +65464,38 @@ void PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &result, Globa
64725
65464
  }
64726
65465
  }
64727
65466
 
65467
+ //===--------------------------------------------------------------------===//
65468
+ // Pipeline Construction
65469
+ //===--------------------------------------------------------------------===//
65470
+ void PhysicalIEJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
65471
+ D_ASSERT(children.size() == 2);
65472
+ if (state.recursive_cte) {
65473
+ throw NotImplementedException("IEJoins are not supported in recursive CTEs yet");
65474
+ }
65475
+
65476
+ // Build the LHS
65477
+ auto lhs_pipeline = make_shared<Pipeline>(executor);
65478
+ state.SetPipelineSink(*lhs_pipeline, this);
65479
+ D_ASSERT(children[0].get());
65480
+ children[0]->BuildPipelines(executor, *lhs_pipeline, state);
65481
+
65482
+ // Build the RHS
65483
+ auto rhs_pipeline = make_shared<Pipeline>(executor);
65484
+ state.SetPipelineSink(*rhs_pipeline, this);
65485
+ D_ASSERT(children[1].get());
65486
+ children[1]->BuildPipelines(executor, *rhs_pipeline, state);
65487
+
65488
+ // RHS => LHS => current
65489
+ current.AddDependency(rhs_pipeline);
65490
+ rhs_pipeline->AddDependency(lhs_pipeline);
65491
+
65492
+ state.AddPipeline(executor, move(lhs_pipeline));
65493
+ state.AddPipeline(executor, move(rhs_pipeline));
65494
+
65495
+ // Now build both and scan
65496
+ state.SetPipelineSource(current, this);
65497
+ }
65498
+
64728
65499
  } // namespace duckdb
64729
65500
  //===----------------------------------------------------------------------===//
64730
65501
  // DuckDB
@@ -64790,6 +65561,10 @@ public:
64790
65561
  return true;
64791
65562
  }
64792
65563
 
65564
+ public:
65565
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
65566
+ vector<const PhysicalOperator *> GetSources() const override;
65567
+
64793
65568
  private:
64794
65569
  void GetRHSMatches(ExecutionContext &context, DataChunk &input, OperatorState &state_p) const;
64795
65570
  //! Fills result chunk
@@ -64849,10 +65624,16 @@ public:
64849
65624
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
64850
65625
  void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
64851
65626
  LocalSourceState &lstate) const override;
65627
+ idx_t GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
65628
+ LocalSourceState &lstate) const override;
64852
65629
 
64853
65630
  bool ParallelSource() const override {
64854
65631
  return true;
64855
65632
  }
65633
+
65634
+ bool SupportsBatchIndex() const override {
65635
+ return function.supports_batch_index;
65636
+ }
64856
65637
  };
64857
65638
 
64858
65639
  } // namespace duckdb
@@ -65097,9 +65878,25 @@ OperatorResultType PhysicalIndexJoin::Execute(ExecutionContext &context, DataChu
65097
65878
  return OperatorResultType::HAVE_MORE_OUTPUT;
65098
65879
  }
65099
65880
 
65881
+ //===--------------------------------------------------------------------===//
65882
+ // Pipeline Construction
65883
+ //===--------------------------------------------------------------------===//
65884
+ void PhysicalIndexJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
65885
+ // index join: we only continue into the LHS
65886
+ // the right side is probed by the index join
65887
+ // so we don't need to do anything in the pipeline with this child
65888
+ state.AddPipelineOperator(current, this);
65889
+ children[0]->BuildPipelines(executor, current, state);
65890
+ }
65891
+
65892
+ vector<const PhysicalOperator *> PhysicalIndexJoin::GetSources() const {
65893
+ return children[0]->GetSources();
65894
+ }
65895
+
65100
65896
  } // namespace duckdb
65101
65897
 
65102
65898
 
65899
+
65103
65900
  namespace duckdb {
65104
65901
 
65105
65902
  PhysicalJoin::PhysicalJoin(LogicalOperator &op, PhysicalOperatorType type, JoinType join_type,
@@ -65119,6 +65916,44 @@ bool PhysicalJoin::EmptyResultIfRHSIsEmpty() const {
65119
65916
  }
65120
65917
  }
65121
65918
 
65919
+ //===--------------------------------------------------------------------===//
65920
+ // Pipeline Construction
65921
+ //===--------------------------------------------------------------------===//
65922
+ void PhysicalJoin::BuildJoinPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state,
65923
+ PhysicalOperator &op) {
65924
+ op.op_state.reset();
65925
+ op.sink_state.reset();
65926
+
65927
+ // on the LHS (probe child), the operator becomes a regular operator
65928
+ state.AddPipelineOperator(current, &op);
65929
+ if (op.IsSource()) {
65930
+ // FULL or RIGHT outer join
65931
+ // schedule a scan of the node as a child pipeline
65932
+ // this scan has to be performed AFTER all the probing has happened
65933
+ if (state.recursive_cte) {
65934
+ throw NotImplementedException("FULL and RIGHT outer joins are not supported in recursive CTEs yet");
65935
+ }
65936
+ state.AddChildPipeline(executor, current);
65937
+ }
65938
+ // continue building the pipeline on this child
65939
+ op.children[0]->BuildPipelines(executor, current, state);
65940
+
65941
+ // on the RHS (build side), we construct a new child pipeline with this pipeline as its source
65942
+ op.BuildChildPipeline(executor, current, state, op.children[1].get());
65943
+ }
65944
+
65945
+ void PhysicalJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
65946
+ PhysicalJoin::BuildJoinPipelines(executor, current, state, *this);
65947
+ }
65948
+
65949
+ vector<const PhysicalOperator *> PhysicalJoin::GetSources() const {
65950
+ auto result = children[0]->GetSources();
65951
+ if (IsSource()) {
65952
+ result.push_back(this);
65953
+ }
65954
+ return result;
65955
+ }
65956
+
65122
65957
  } // namespace duckdb
65123
65958
  //===----------------------------------------------------------------------===//
65124
65959
  // DuckDB
@@ -70046,6 +70881,10 @@ public:
70046
70881
  bool IsSink() const override {
70047
70882
  return true;
70048
70883
  }
70884
+
70885
+ public:
70886
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
70887
+ vector<const PhysicalOperator *> GetSources() const override;
70049
70888
  };
70050
70889
 
70051
70890
  } // namespace duckdb
@@ -70057,6 +70896,7 @@ public:
70057
70896
 
70058
70897
 
70059
70898
 
70899
+
70060
70900
  #include <algorithm>
70061
70901
  #include <sstream>
70062
70902
 
@@ -70221,6 +71061,23 @@ SinkResultType PhysicalExport::Sink(ExecutionContext &context, GlobalSinkState &
70221
71061
  return SinkResultType::NEED_MORE_INPUT;
70222
71062
  }
70223
71063
 
71064
+ //===--------------------------------------------------------------------===//
71065
+ // Pipeline Construction
71066
+ //===--------------------------------------------------------------------===//
71067
+ void PhysicalExport::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
71068
+ // EXPORT has an optional child
71069
+ // we only need to schedule child pipelines if there is a child
71070
+ state.SetPipelineSource(current, this);
71071
+ if (children.empty()) {
71072
+ return;
71073
+ }
71074
+ PhysicalOperator::BuildPipelines(executor, current, state);
71075
+ }
71076
+
71077
+ vector<const PhysicalOperator *> PhysicalExport::GetSources() const {
71078
+ return {this};
71079
+ }
71080
+
70224
71081
  } // namespace duckdb
70225
71082
  //===----------------------------------------------------------------------===//
70226
71083
  // DuckDB
@@ -71159,6 +72016,8 @@ OperatorResultType PhysicalUnnest::Execute(ExecutionContext &context, DataChunk
71159
72016
  } // namespace duckdb
71160
72017
 
71161
72018
 
72019
+
72020
+
71162
72021
  namespace duckdb {
71163
72022
 
71164
72023
  class PhysicalChunkScanState : public GlobalSourceState {
@@ -71190,6 +72049,38 @@ void PhysicalChunkScan::GetData(ExecutionContext &context, DataChunk &chunk, Glo
71190
72049
  state.chunk_index++;
71191
72050
  }
71192
72051
 
72052
+ //===--------------------------------------------------------------------===//
72053
+ // Pipeline Construction
72054
+ //===--------------------------------------------------------------------===//
72055
+ void PhysicalChunkScan::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
72056
+ // check if there is any additional action we need to do depending on the type
72057
+ switch (type) {
72058
+ case PhysicalOperatorType::DELIM_SCAN: {
72059
+ auto entry = state.delim_join_dependencies.find(this);
72060
+ D_ASSERT(entry != state.delim_join_dependencies.end());
72061
+ // this chunk scan introduces a dependency to the current pipeline
72062
+ // namely a dependency on the duplicate elimination pipeline to finish
72063
+ auto delim_dependency = entry->second->shared_from_this();
72064
+ auto delim_sink = state.GetPipelineSink(*delim_dependency);
72065
+ D_ASSERT(delim_sink);
72066
+ D_ASSERT(delim_sink->type == PhysicalOperatorType::DELIM_JOIN);
72067
+ auto &delim_join = (PhysicalDelimJoin &)*delim_sink;
72068
+ current.AddDependency(delim_dependency);
72069
+ state.SetPipelineSource(current, (PhysicalOperator *)delim_join.distinct.get());
72070
+ return;
72071
+ }
72072
+ case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
72073
+ if (!state.recursive_cte) {
72074
+ throw InternalException("Recursive CTE scan found without recursive CTE node");
72075
+ }
72076
+ break;
72077
+ default:
72078
+ break;
72079
+ }
72080
+ D_ASSERT(children.empty());
72081
+ state.SetPipelineSource(current, this);
72082
+ }
72083
+
71193
72084
  } // namespace duckdb
71194
72085
  //===----------------------------------------------------------------------===//
71195
72086
  // DuckDB
@@ -71499,6 +72390,16 @@ void PhysicalTableScan::GetData(ExecutionContext &context, DataChunk &chunk, Glo
71499
72390
  }
71500
72391
  }
71501
72392
 
72393
+ idx_t PhysicalTableScan::GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
72394
+ LocalSourceState &lstate) const {
72395
+ D_ASSERT(SupportsBatchIndex());
72396
+ D_ASSERT(function.get_batch_index);
72397
+ auto &gstate = (TableScanGlobalState &)gstate_p;
72398
+ auto &state = (TableScanLocalState &)lstate;
72399
+ return function.get_batch_index(context.client, bind_data.get(), state.operator_data.get(),
72400
+ gstate.parallel_state.get());
72401
+ }
72402
+
71502
72403
  string PhysicalTableScan::GetName() const {
71503
72404
  return StringUtil::Upper(function.name);
71504
72405
  }
@@ -72325,56 +73226,7 @@ void PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSo
72325
73226
  }
72326
73227
 
72327
73228
  } // namespace duckdb
72328
- //===----------------------------------------------------------------------===//
72329
- // DuckDB
72330
- //
72331
- // duckdb/execution/operator/set/physical_recursive_cte.hpp
72332
- //
72333
- //
72334
- //===----------------------------------------------------------------------===//
72335
-
72336
-
72337
-
72338
-
72339
-
72340
- namespace duckdb {
72341
- class Pipeline;
72342
- class RecursiveCTEState;
72343
-
72344
- class PhysicalRecursiveCTE : public PhysicalOperator {
72345
- public:
72346
- PhysicalRecursiveCTE(vector<LogicalType> types, bool union_all, unique_ptr<PhysicalOperator> top,
72347
- unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality);
72348
- ~PhysicalRecursiveCTE() override;
72349
73229
 
72350
- bool union_all;
72351
- std::shared_ptr<ChunkCollection> working_table;
72352
- vector<shared_ptr<Pipeline>> pipelines;
72353
-
72354
- public:
72355
- // Source interface
72356
- void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
72357
- LocalSourceState &lstate) const override;
72358
-
72359
- public:
72360
- // Sink interface
72361
- SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
72362
- DataChunk &input) const override;
72363
-
72364
- unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
72365
-
72366
- bool IsSink() const override {
72367
- return true;
72368
- }
72369
-
72370
- private:
72371
- //! Probe Hash Table and eliminate duplicate rows
72372
- idx_t ProbeHT(DataChunk &chunk, RecursiveCTEState &state) const;
72373
-
72374
- void ExecuteRecursivePipelines(ExecutionContext &context) const;
72375
- };
72376
-
72377
- } // namespace duckdb
72378
73230
 
72379
73231
 
72380
73232
 
@@ -72523,6 +73375,40 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
72523
73375
  }
72524
73376
  }
72525
73377
 
73378
+ //===--------------------------------------------------------------------===//
73379
+ // Pipeline Construction
73380
+ //===--------------------------------------------------------------------===//
73381
+ void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
73382
+ op_state.reset();
73383
+ sink_state.reset();
73384
+
73385
+ // recursive CTE
73386
+ state.SetPipelineSource(current, this);
73387
+ // the LHS of the recursive CTE is our initial state
73388
+ // we build this pipeline as normal
73389
+ auto pipeline_child = children[0].get();
73390
+ // for the RHS, we gather all pipelines that depend on the recursive cte
73391
+ // these pipelines need to be rerun
73392
+ if (state.recursive_cte) {
73393
+ throw InternalException("Recursive CTE detected WITHIN a recursive CTE node");
73394
+ }
73395
+ state.recursive_cte = this;
73396
+
73397
+ auto recursive_pipeline = make_shared<Pipeline>(executor);
73398
+ state.SetPipelineSink(*recursive_pipeline, this);
73399
+ children[1]->BuildPipelines(executor, *recursive_pipeline, state);
73400
+
73401
+ pipelines.push_back(move(recursive_pipeline));
73402
+
73403
+ state.recursive_cte = nullptr;
73404
+
73405
+ BuildChildPipeline(executor, current, state, pipeline_child);
73406
+ }
73407
+
73408
+ vector<const PhysicalOperator *> PhysicalRecursiveCTE::GetSources() const {
73409
+ return {this};
73410
+ }
73411
+
72526
73412
  } // namespace duckdb
72527
73413
  //===----------------------------------------------------------------------===//
72528
73414
  // DuckDB
@@ -72541,12 +73427,17 @@ class PhysicalUnion : public PhysicalOperator {
72541
73427
  public:
72542
73428
  PhysicalUnion(vector<LogicalType> types, unique_ptr<PhysicalOperator> top, unique_ptr<PhysicalOperator> bottom,
72543
73429
  idx_t estimated_cardinality);
73430
+
73431
+ public:
73432
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
73433
+ vector<const PhysicalOperator *> GetSources() const override;
72544
73434
  };
72545
73435
 
72546
73436
  } // namespace duckdb
72547
73437
 
72548
73438
 
72549
73439
 
73440
+
72550
73441
  namespace duckdb {
72551
73442
 
72552
73443
  PhysicalUnion::PhysicalUnion(vector<LogicalType> types, unique_ptr<PhysicalOperator> top,
@@ -72556,6 +73447,49 @@ PhysicalUnion::PhysicalUnion(vector<LogicalType> types, unique_ptr<PhysicalOpera
72556
73447
  children.push_back(move(bottom));
72557
73448
  }
72558
73449
 
73450
+ //===--------------------------------------------------------------------===//
73451
+ // Pipeline Construction
73452
+ //===--------------------------------------------------------------------===//
73453
+ void PhysicalUnion::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
73454
+ if (state.recursive_cte) {
73455
+ throw NotImplementedException("UNIONS are not supported in recursive CTEs yet");
73456
+ }
73457
+ op_state.reset();
73458
+ sink_state.reset();
73459
+
73460
+ auto union_pipeline = make_shared<Pipeline>(executor);
73461
+ auto pipeline_ptr = union_pipeline.get();
73462
+ auto &child_pipelines = state.GetChildPipelines(executor);
73463
+ auto &child_dependencies = state.GetChildDependencies(executor);
73464
+ auto &union_pipelines = state.GetUnionPipelines(executor);
73465
+ // set up dependencies for any child pipelines to this union pipeline
73466
+ auto child_entry = child_pipelines.find(&current);
73467
+ if (child_entry != child_pipelines.end()) {
73468
+ for (auto &current_child : child_entry->second) {
73469
+ D_ASSERT(child_dependencies.find(current_child.get()) != child_dependencies.end());
73470
+ child_dependencies[current_child.get()].push_back(pipeline_ptr);
73471
+ }
73472
+ }
73473
+ // for the current pipeline, continue building on the LHS
73474
+ state.SetPipelineOperators(*union_pipeline, state.GetPipelineOperators(current));
73475
+ children[0]->BuildPipelines(executor, current, state);
73476
+ // insert the union pipeline as a union pipeline of the current node
73477
+ union_pipelines[&current].push_back(move(union_pipeline));
73478
+
73479
+ // for the union pipeline, build on the RHS
73480
+ state.SetPipelineSink(*pipeline_ptr, state.GetPipelineSink(current));
73481
+ children[1]->BuildPipelines(executor, *pipeline_ptr, state);
73482
+ }
73483
+
73484
+ vector<const PhysicalOperator *> PhysicalUnion::GetSources() const {
73485
+ vector<const PhysicalOperator *> result;
73486
+ for (auto &child : children) {
73487
+ auto child_sources = child->GetSources();
73488
+ result.insert(result.end(), child_sources.begin(), child_sources.end());
73489
+ }
73490
+ return result;
73491
+ }
73492
+
72559
73493
  } // namespace duckdb
72560
73494
 
72561
73495
 
@@ -73027,6 +73961,7 @@ void PerfectAggregateHashTable::Destroy() {
73027
73961
 
73028
73962
 
73029
73963
 
73964
+
73030
73965
  namespace duckdb {
73031
73966
 
73032
73967
  string PhysicalOperator::GetName() const {
@@ -73044,6 +73979,14 @@ void PhysicalOperator::Print() const {
73044
73979
  }
73045
73980
  // LCOV_EXCL_STOP
73046
73981
 
73982
+ vector<PhysicalOperator *> PhysicalOperator::GetChildren() const {
73983
+ vector<PhysicalOperator *> result;
73984
+ for (auto &child : children) {
73985
+ result.push_back(child.get());
73986
+ }
73987
+ return result;
73988
+ }
73989
+
73047
73990
  //===--------------------------------------------------------------------===//
73048
73991
  // Operator
73049
73992
  //===--------------------------------------------------------------------===//
@@ -73079,6 +74022,11 @@ void PhysicalOperator::GetData(ExecutionContext &context, DataChunk &chunk, Glob
73079
74022
  LocalSourceState &lstate) const {
73080
74023
  throw InternalException("Calling GetData on a node that is not a source!");
73081
74024
  }
74025
+
74026
+ idx_t PhysicalOperator::GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
74027
+ LocalSourceState &lstate) const {
74028
+ throw InternalException("Calling GetBatchIndex on a node that does not support it");
74029
+ }
73082
74030
  // LCOV_EXCL_STOP
73083
74031
 
73084
74032
  //===--------------------------------------------------------------------===//
@@ -73107,6 +74055,99 @@ unique_ptr<GlobalSinkState> PhysicalOperator::GetGlobalSinkState(ClientContext &
73107
74055
  return make_unique<GlobalSinkState>();
73108
74056
  }
73109
74057
 
74058
+ //===--------------------------------------------------------------------===//
74059
+ // Pipeline Construction
74060
+ //===--------------------------------------------------------------------===//
74061
+ void PhysicalOperator::AddPipeline(Executor &executor, shared_ptr<Pipeline> pipeline, PipelineBuildState &state) {
74062
+ if (!state.recursive_cte) {
74063
+ // regular pipeline: schedule it
74064
+ state.AddPipeline(executor, move(pipeline));
74065
+ } else {
74066
+ // CTE pipeline! add it to the CTE pipelines
74067
+ auto &cte = (PhysicalRecursiveCTE &)*state.recursive_cte;
74068
+ cte.pipelines.push_back(move(pipeline));
74069
+ }
74070
+ }
74071
+
74072
+ void PhysicalOperator::BuildChildPipeline(Executor &executor, Pipeline &current, PipelineBuildState &state,
74073
+ PhysicalOperator *pipeline_child) {
74074
+ auto pipeline = make_shared<Pipeline>(executor);
74075
+ state.SetPipelineSink(*pipeline, this);
74076
+ // the current is dependent on this pipeline to complete
74077
+ current.AddDependency(pipeline);
74078
+ // recurse into the pipeline child
74079
+ pipeline_child->BuildPipelines(executor, *pipeline, state);
74080
+ AddPipeline(executor, move(pipeline), state);
74081
+ }
74082
+
74083
+ void PhysicalOperator::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
74084
+ op_state.reset();
74085
+ if (IsSink()) {
74086
+ // operator is a sink, build a pipeline
74087
+ sink_state.reset();
74088
+
74089
+ // single operator:
74090
+ // the operator becomes the data source of the current pipeline
74091
+ state.SetPipelineSource(current, this);
74092
+ // we create a new pipeline starting from the child
74093
+ D_ASSERT(children.size() == 1);
74094
+
74095
+ BuildChildPipeline(executor, current, state, children[0].get());
74096
+ } else {
74097
+ // operator is not a sink! recurse in children
74098
+ if (children.empty()) {
74099
+ // source
74100
+ state.SetPipelineSource(current, this);
74101
+ } else {
74102
+ if (children.size() != 1) {
74103
+ throw InternalException("Operator not supported in BuildPipelines");
74104
+ }
74105
+ state.AddPipelineOperator(current, this);
74106
+ children[0]->BuildPipelines(executor, current, state);
74107
+ }
74108
+ }
74109
+ }
74110
+
74111
+ vector<const PhysicalOperator *> PhysicalOperator::GetSources() const {
74112
+ vector<const PhysicalOperator *> result;
74113
+ if (IsSink()) {
74114
+ D_ASSERT(children.size() == 1);
74115
+ result.push_back(this);
74116
+ return result;
74117
+ } else {
74118
+ if (children.empty()) {
74119
+ // source
74120
+ result.push_back(this);
74121
+ return result;
74122
+ } else {
74123
+ if (children.size() != 1) {
74124
+ throw InternalException("Operator not supported in GetSource");
74125
+ }
74126
+ return children[0]->GetSources();
74127
+ }
74128
+ }
74129
+ }
74130
+
74131
+ bool PhysicalOperator::AllSourcesSupportBatchIndex() const {
74132
+ auto sources = GetSources();
74133
+ for (auto &source : sources) {
74134
+ if (!source->SupportsBatchIndex()) {
74135
+ return false;
74136
+ }
74137
+ }
74138
+ return true;
74139
+ }
74140
+
74141
+ void PhysicalOperator::Verify() {
74142
+ #ifdef DEBUG
74143
+ auto sources = GetSources();
74144
+ D_ASSERT(!sources.empty());
74145
+ for (auto &child : children) {
74146
+ child->Verify();
74147
+ }
74148
+ #endif
74149
+ }
74150
+
73110
74151
  } // namespace duckdb
73111
74152
 
73112
74153
  //===----------------------------------------------------------------------===//
@@ -75721,6 +76762,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
75721
76762
  } // namespace duckdb
75722
76763
 
75723
76764
 
76765
+
75724
76766
  //===----------------------------------------------------------------------===//
75725
76767
  // DuckDB
75726
76768
  //
@@ -75759,17 +76801,36 @@ protected:
75759
76801
  } // namespace duckdb
75760
76802
 
75761
76803
 
76804
+
75762
76805
  namespace duckdb {
75763
76806
 
75764
76807
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalLimit &op) {
75765
76808
  D_ASSERT(op.children.size() == 1);
75766
76809
 
75767
76810
  auto plan = CreatePlan(*op.children[0]);
76811
+ auto &config = DBConfig::GetConfig(context);
76812
+ unique_ptr<PhysicalOperator> limit;
76813
+ if (!config.preserve_insertion_order) {
76814
+ // use parallel streaming limit if insertion order is not important
76815
+ limit = make_unique<PhysicalStreamingLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
76816
+ move(op.offset), op.estimated_cardinality, true);
76817
+ } else {
76818
+ // maintaining insertion order is important
76819
+ bool all_sources_support_batch_index = plan->AllSourcesSupportBatchIndex();
76820
+
76821
+ if (all_sources_support_batch_index) {
76822
+ // source supports batch index: use parallel batch limit
76823
+ limit = make_unique<PhysicalLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
76824
+ move(op.offset), op.estimated_cardinality);
76825
+ } else {
76826
+ // source does not support batch index: use a non-parallel streaming limit
76827
+ limit = make_unique<PhysicalStreamingLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
76828
+ move(op.offset), op.estimated_cardinality, false);
76829
+ }
76830
+ }
75768
76831
 
75769
- auto limit = make_unique<PhysicalLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
75770
- move(op.offset), op.estimated_cardinality);
75771
76832
  limit->children.push_back(move(plan));
75772
- return move(limit);
76833
+ return limit;
75773
76834
  }
75774
76835
 
75775
76836
  } // namespace duckdb
@@ -76877,6 +77938,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(unique_ptr<Logica
76877
77938
  profiler.StartPhase("create_plan");
76878
77939
  auto plan = CreatePlan(*op);
76879
77940
  profiler.EndPhase();
77941
+
77942
+ plan->Verify();
76880
77943
  return plan;
76881
77944
  }
76882
77945
 
@@ -109047,6 +110110,19 @@ double TableScanProgress(ClientContext &context, const FunctionData *bind_data_p
109047
110110
  return percentage;
109048
110111
  }
109049
110112
 
110113
+ idx_t TableScanGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
110114
+ FunctionOperatorData *operator_state, ParallelState *parallel_state_p) {
110115
+ auto &bind_data = (const TableScanBindData &)*bind_data_p;
110116
+ auto &state = (TableScanOperatorData &)*operator_state;
110117
+ if (state.scan_state.row_group_scan_state.row_group) {
110118
+ return state.scan_state.row_group_scan_state.row_group->start;
110119
+ }
110120
+ if (state.scan_state.local_state.max_index > 0) {
110121
+ return bind_data.table->storage->GetTotalRows() + state.scan_state.local_state.chunk_index;
110122
+ }
110123
+ return 0;
110124
+ }
110125
+
109050
110126
  void TableScanDependency(unordered_set<CatalogEntry *> &entries, const FunctionData *bind_data_p) {
109051
110127
  auto &bind_data = (const TableScanBindData &)*bind_data_p;
109052
110128
  entries.insert(bind_data.table);
@@ -109251,7 +110327,9 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun
109251
110327
  get.function.init_parallel_state = nullptr;
109252
110328
  get.function.parallel_state_next = nullptr;
109253
110329
  get.function.table_scan_progress = nullptr;
110330
+ get.function.get_batch_index = nullptr;
109254
110331
  get.function.filter_pushdown = false;
110332
+ get.function.supports_batch_index = false;
109255
110333
  } else {
109256
110334
  bind_data.result_ids.clear();
109257
110335
  }
@@ -109280,8 +110358,10 @@ TableFunction TableScanFunction::GetFunction() {
109280
110358
  scan_function.parallel_init = TableScanParallelInit;
109281
110359
  scan_function.parallel_state_next = TableScanParallelStateNext;
109282
110360
  scan_function.table_scan_progress = TableScanProgress;
110361
+ scan_function.get_batch_index = TableScanGetBatchIndex;
109283
110362
  scan_function.projection_pushdown = true;
109284
110363
  scan_function.filter_pushdown = true;
110364
+ scan_function.supports_batch_index = true;
109285
110365
  return scan_function;
109286
110366
  }
109287
110367
 
@@ -109474,7 +110554,7 @@ TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_f
109474
110554
  cardinality(cardinality), pushdown_complex_filter(pushdown_complex_filter), to_string(to_string),
109475
110555
  max_threads(max_threads), init_parallel_state(init_parallel_state), parallel_function(parallel_function),
109476
110556
  parallel_init(parallel_init), parallel_state_next(parallel_state_next), table_scan_progress(query_progress),
109477
- projection_pushdown(projection_pushdown), filter_pushdown(filter_pushdown) {
110557
+ projection_pushdown(projection_pushdown), filter_pushdown(filter_pushdown), supports_batch_index(false) {
109478
110558
  }
109479
110559
 
109480
110560
  TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
@@ -113024,6 +114104,7 @@ private:
113024
114104
 
113025
114105
 
113026
114106
 
114107
+
113027
114108
  namespace duckdb {
113028
114109
 
113029
114110
  struct ActiveQueryContext {
@@ -113189,14 +114270,15 @@ const string &ClientContext::GetCurrentQuery() {
113189
114270
  return active_query->query;
113190
114271
  }
113191
114272
 
113192
- unique_ptr<QueryResult> ClientContext::FetchResultInternal(ClientContextLock &lock, PendingQueryResult &pending,
113193
- bool allow_stream_result) {
114273
+ unique_ptr<QueryResult> ClientContext::FetchResultInternal(ClientContextLock &lock, PendingQueryResult &pending) {
113194
114274
  D_ASSERT(active_query);
113195
114275
  D_ASSERT(active_query->open_result == &pending);
113196
114276
  D_ASSERT(active_query->prepared);
114277
+ auto &executor = GetExecutor();
113197
114278
  auto &prepared = *active_query->prepared;
113198
- bool create_stream_result = prepared.properties.allow_stream_result && allow_stream_result;
114279
+ bool create_stream_result = prepared.properties.allow_stream_result && pending.allow_stream_result;
113199
114280
  if (create_stream_result) {
114281
+ D_ASSERT(!executor.HasResultCollector());
113200
114282
  active_query->progress_bar.reset();
113201
114283
  query_progress = -1;
113202
114284
 
@@ -113207,25 +114289,32 @@ unique_ptr<QueryResult> ClientContext::FetchResultInternal(ClientContextLock &lo
113207
114289
  active_query->open_result = stream_result.get();
113208
114290
  return move(stream_result);
113209
114291
  }
113210
- // create a materialized result by continuously fetching
113211
- auto result = make_unique<MaterializedQueryResult>(pending.statement_type, pending.properties, pending.types,
113212
- pending.names, shared_from_this());
113213
- result->properties = pending.properties;
113214
- while (true) {
113215
- auto chunk = FetchInternal(lock, GetExecutor(), *result);
113216
- if (!chunk || chunk->size() == 0) {
113217
- break;
113218
- }
114292
+ unique_ptr<QueryResult> result;
114293
+ if (executor.HasResultCollector()) {
114294
+ // we have a result collector - fetch the result directly from the result collector
114295
+ result = executor.GetResult();
114296
+ CleanupInternal(lock, result.get(), false);
114297
+ } else {
114298
+ // no result collector - create a materialized result by continuously fetching
114299
+ auto materialized_result = make_unique<MaterializedQueryResult>(
114300
+ pending.statement_type, pending.properties, pending.types, pending.names, shared_from_this());
114301
+ while (true) {
114302
+ auto chunk = FetchInternal(lock, GetExecutor(), *materialized_result);
114303
+ if (!chunk || chunk->size() == 0) {
114304
+ break;
114305
+ }
113219
114306
  #ifdef DEBUG
113220
- for (idx_t i = 0; i < chunk->ColumnCount(); i++) {
113221
- if (pending.types[i].id() == LogicalTypeId::VARCHAR) {
113222
- chunk->data[i].UTFVerify(chunk->size());
114307
+ for (idx_t i = 0; i < chunk->ColumnCount(); i++) {
114308
+ if (pending.types[i].id() == LogicalTypeId::VARCHAR) {
114309
+ chunk->data[i].UTFVerify(chunk->size());
114310
+ }
113223
114311
  }
113224
- }
113225
114312
  #endif
113226
- result->collection.Append(*chunk);
114313
+ materialized_result->collection.Append(*chunk);
114314
+ }
114315
+ result = move(materialized_result);
113227
114316
  }
113228
- return move(result);
114317
+ return result;
113229
114318
  }
113230
114319
 
113231
114320
  shared_ptr<PreparedStatementData> ClientContext::CreatePreparedStatement(ClientContextLock &lock, const string &query,
@@ -113288,7 +114377,7 @@ double ClientContext::GetProgress() {
113288
114377
 
113289
114378
  unique_ptr<PendingQueryResult> ClientContext::PendingPreparedStatement(ClientContextLock &lock,
113290
114379
  shared_ptr<PreparedStatementData> statement_p,
113291
- vector<Value> bound_values) {
114380
+ PendingQueryParameters parameters) {
113292
114381
  D_ASSERT(active_query);
113293
114382
  auto &statement = *statement_p;
113294
114383
  if (ActiveTransaction().IsInvalidated() && statement.properties.requires_valid_transaction) {
@@ -113301,7 +114390,7 @@ unique_ptr<PendingQueryResult> ClientContext::PendingPreparedStatement(ClientCon
113301
114390
  }
113302
114391
 
113303
114392
  // bind the bound values before execution
113304
- statement.Bind(move(bound_values));
114393
+ statement.Bind(parameters.parameters ? *parameters.parameters : vector<Value>());
113305
114394
 
113306
114395
  active_query->executor = make_unique<Executor>(*this);
113307
114396
  auto &executor = *active_query->executor;
@@ -113310,12 +114399,23 @@ unique_ptr<PendingQueryResult> ClientContext::PendingPreparedStatement(ClientCon
113310
114399
  active_query->progress_bar->Start();
113311
114400
  query_progress = 0;
113312
114401
  }
113313
- executor.Initialize(statement.plan.get());
114402
+ auto stream_result = parameters.allow_stream_result && statement.properties.allow_stream_result;
114403
+ if (!stream_result && statement.properties.return_type == StatementReturnType::QUERY_RESULT) {
114404
+ unique_ptr<PhysicalResultCollector> collector;
114405
+ auto &config = ClientConfig::GetConfig(*this);
114406
+ auto get_method =
114407
+ config.result_collector ? config.result_collector : PhysicalResultCollector::GetResultCollector;
114408
+ collector = get_method(*this, statement);
114409
+ D_ASSERT(collector->type == PhysicalOperatorType::RESULT_COLLECTOR);
114410
+ executor.Initialize(move(collector));
114411
+ } else {
114412
+ executor.Initialize(statement.plan.get());
114413
+ }
113314
114414
  auto types = executor.GetTypes();
113315
114415
  D_ASSERT(types == statement.types);
113316
114416
  D_ASSERT(!active_query->open_result);
113317
114417
 
113318
- auto pending_result = make_unique<PendingQueryResult>(shared_from_this(), *statement_p, move(types));
114418
+ auto pending_result = make_unique<PendingQueryResult>(shared_from_this(), *statement_p, move(types), stream_result);
113319
114419
  active_query->prepared = move(statement_p);
113320
114420
  active_query->open_result = pending_result.get();
113321
114421
  return pending_result;
@@ -113443,49 +114543,59 @@ unique_ptr<PreparedStatement> ClientContext::Prepare(const string &query) {
113443
114543
 
113444
114544
  unique_ptr<PendingQueryResult> ClientContext::PendingQueryPreparedInternal(ClientContextLock &lock, const string &query,
113445
114545
  shared_ptr<PreparedStatementData> &prepared,
113446
- vector<Value> &values) {
114546
+ PendingQueryParameters parameters) {
113447
114547
  try {
113448
114548
  InitialCleanup(lock);
113449
114549
  } catch (std::exception &ex) {
113450
114550
  return make_unique<PendingQueryResult>(ex.what());
113451
114551
  }
113452
- return PendingStatementOrPreparedStatementInternal(lock, query, nullptr, prepared, &values);
114552
+ return PendingStatementOrPreparedStatementInternal(lock, query, nullptr, prepared, parameters);
113453
114553
  }
113454
114554
 
113455
- unique_ptr<PendingQueryResult>
113456
- ClientContext::PendingQuery(const string &query, shared_ptr<PreparedStatementData> &prepared, vector<Value> &values) {
114555
+ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query,
114556
+ shared_ptr<PreparedStatementData> &prepared,
114557
+ PendingQueryParameters parameters) {
113457
114558
  auto lock = LockContext();
113458
- return PendingQueryPreparedInternal(*lock, query, prepared, values);
114559
+ return PendingQueryPreparedInternal(*lock, query, prepared, parameters);
113459
114560
  }
113460
114561
 
113461
114562
  unique_ptr<QueryResult> ClientContext::Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
113462
- vector<Value> &values, bool allow_stream_result) {
114563
+ PendingQueryParameters parameters) {
113463
114564
  auto lock = LockContext();
113464
- auto pending = PendingQueryPreparedInternal(*lock, query, prepared, values);
114565
+ auto pending = PendingQueryPreparedInternal(*lock, query, prepared, parameters);
113465
114566
  if (!pending->success) {
113466
114567
  return make_unique<MaterializedQueryResult>(pending->error);
113467
114568
  }
113468
- return pending->ExecuteInternal(*lock, allow_stream_result);
114569
+ return pending->ExecuteInternal(*lock);
114570
+ }
114571
+
114572
+ unique_ptr<QueryResult> ClientContext::Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
114573
+ vector<Value> &values, bool allow_stream_result) {
114574
+ PendingQueryParameters parameters;
114575
+ parameters.parameters = &values;
114576
+ parameters.allow_stream_result = allow_stream_result;
114577
+ return Execute(query, prepared, parameters);
113469
114578
  }
113470
114579
 
113471
114580
  unique_ptr<PendingQueryResult> ClientContext::PendingStatementInternal(ClientContextLock &lock, const string &query,
113472
- unique_ptr<SQLStatement> statement) {
114581
+ unique_ptr<SQLStatement> statement,
114582
+ PendingQueryParameters parameters) {
113473
114583
  // prepare the query for execution
113474
114584
  auto prepared = CreatePreparedStatement(lock, query, move(statement));
113475
- // by default, no values are bound
113476
- vector<Value> bound_values;
113477
114585
  // execute the prepared statement
113478
- return PendingPreparedStatement(lock, move(prepared), move(bound_values));
114586
+ return PendingPreparedStatement(lock, move(prepared), parameters);
113479
114587
  }
113480
114588
 
113481
114589
  unique_ptr<QueryResult> ClientContext::RunStatementInternal(ClientContextLock &lock, const string &query,
113482
114590
  unique_ptr<SQLStatement> statement,
113483
114591
  bool allow_stream_result, bool verify) {
113484
- auto pending = PendingQueryInternal(lock, move(statement), verify);
114592
+ PendingQueryParameters parameters;
114593
+ parameters.allow_stream_result = allow_stream_result;
114594
+ auto pending = PendingQueryInternal(lock, move(statement), parameters, verify);
113485
114595
  if (!pending->success) {
113486
114596
  return make_unique<MaterializedQueryResult>(move(pending->error));
113487
114597
  }
113488
- return ExecutePendingQueryInternal(lock, *pending, allow_stream_result);
114598
+ return ExecutePendingQueryInternal(lock, *pending);
113489
114599
  }
113490
114600
 
113491
114601
  bool ClientContext::IsActiveResult(ClientContextLock &lock, BaseQueryResult *result) {
@@ -113508,7 +114618,7 @@ static bool IsExplainAnalyze(SQLStatement *statement) {
113508
114618
 
113509
114619
  unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatementInternal(
113510
114620
  ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
113511
- shared_ptr<PreparedStatementData> &prepared, vector<Value> *values) {
114621
+ shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters) {
113512
114622
  // check if we are on AutoCommit. In this case we should start a transaction.
113513
114623
  if (statement && config.query_verification_enabled) {
113514
114624
  // query verification is enabled
@@ -113545,13 +114655,12 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
113545
114655
  break;
113546
114656
  }
113547
114657
  }
113548
- return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, values);
114658
+ return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, parameters);
113549
114659
  }
113550
114660
 
113551
- unique_ptr<PendingQueryResult>
113552
- ClientContext::PendingStatementOrPreparedStatement(ClientContextLock &lock, const string &query,
113553
- unique_ptr<SQLStatement> statement,
113554
- shared_ptr<PreparedStatementData> &prepared, vector<Value> *values) {
114661
+ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatement(
114662
+ ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
114663
+ shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters) {
113555
114664
  unique_ptr<PendingQueryResult> result;
113556
114665
 
113557
114666
  BeginQueryInternal(lock, query);
@@ -113561,13 +114670,14 @@ ClientContext::PendingStatementOrPreparedStatement(ClientContextLock &lock, cons
113561
114670
  bool invalidate_query = true;
113562
114671
  try {
113563
114672
  if (statement) {
113564
- result = PendingStatementInternal(lock, query, move(statement));
114673
+ result = PendingStatementInternal(lock, query, move(statement), parameters);
113565
114674
  } else {
113566
114675
  auto &catalog = Catalog::GetCatalog(*this);
113567
114676
  if (prepared->unbound_statement && (catalog.GetCatalogVersion() != prepared->catalog_version ||
113568
114677
  !prepared->properties.bound_all_parameters)) {
113569
114678
  // catalog was modified: rebind the statement before execution
113570
- auto new_prepared = CreatePreparedStatement(lock, query, prepared->unbound_statement->Copy(), values);
114679
+ auto new_prepared =
114680
+ CreatePreparedStatement(lock, query, prepared->unbound_statement->Copy(), parameters.parameters);
113571
114681
  if (prepared->types != new_prepared->types && prepared->properties.bound_all_parameters) {
113572
114682
  throw BinderException("Rebinding statement after catalog change resulted in change of types");
113573
114683
  }
@@ -113576,7 +114686,7 @@ ClientContext::PendingStatementOrPreparedStatement(ClientContextLock &lock, cons
113576
114686
  prepared = move(new_prepared);
113577
114687
  prepared->properties.bound_all_parameters = false;
113578
114688
  }
113579
- result = PendingPreparedStatement(lock, prepared, *values);
114689
+ result = PendingPreparedStatement(lock, prepared, parameters);
113580
114690
  }
113581
114691
  } catch (StandardException &ex) {
113582
114692
  // standard exceptions do not invalidate the current transaction
@@ -113618,8 +114728,8 @@ void ClientContext::LogQueryInternal(ClientContextLock &, const string &query) {
113618
114728
  }
113619
114729
 
113620
114730
  unique_ptr<QueryResult> ClientContext::Query(unique_ptr<SQLStatement> statement, bool allow_stream_result) {
113621
- auto pending_query = PendingQuery(move(statement));
113622
- return pending_query->Execute(allow_stream_result);
114731
+ auto pending_query = PendingQuery(move(statement), allow_stream_result);
114732
+ return pending_query->Execute();
113623
114733
  }
113624
114734
 
113625
114735
  unique_ptr<QueryResult> ClientContext::Query(const string &query, bool allow_stream_result) {
@@ -113644,13 +114754,14 @@ unique_ptr<QueryResult> ClientContext::Query(const string &query, bool allow_str
113644
114754
  for (idx_t i = 0; i < statements.size(); i++) {
113645
114755
  auto &statement = statements[i];
113646
114756
  bool is_last_statement = i + 1 == statements.size();
113647
- bool stream_result = allow_stream_result && is_last_statement;
113648
- auto pending_query = PendingQueryInternal(*lock, move(statement));
114757
+ PendingQueryParameters parameters;
114758
+ parameters.allow_stream_result = allow_stream_result && is_last_statement;
114759
+ auto pending_query = PendingQueryInternal(*lock, move(statement), parameters);
113649
114760
  unique_ptr<QueryResult> current_result;
113650
114761
  if (!pending_query->success) {
113651
114762
  current_result = make_unique<MaterializedQueryResult>(pending_query->error);
113652
114763
  } else {
113653
- current_result = ExecutePendingQueryInternal(*lock, *pending_query, stream_result);
114764
+ current_result = ExecutePendingQueryInternal(*lock, *pending_query);
113654
114765
  }
113655
114766
  // now append the result to the list of results
113656
114767
  if (!last_result) {
@@ -113679,7 +114790,7 @@ bool ClientContext::ParseStatements(ClientContextLock &lock, const string &query
113679
114790
  }
113680
114791
  }
113681
114792
 
113682
- unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query) {
114793
+ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query, bool allow_stream_result) {
113683
114794
  auto lock = LockContext();
113684
114795
 
113685
114796
  string error;
@@ -113690,28 +114801,33 @@ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query)
113690
114801
  if (statements.size() != 1) {
113691
114802
  return make_unique<PendingQueryResult>("PendingQuery can only take a single statement");
113692
114803
  }
113693
- return PendingQueryInternal(*lock, move(statements[0]));
114804
+ PendingQueryParameters parameters;
114805
+ parameters.allow_stream_result = allow_stream_result;
114806
+ return PendingQueryInternal(*lock, move(statements[0]), parameters);
113694
114807
  }
113695
114808
 
113696
- unique_ptr<PendingQueryResult> ClientContext::PendingQuery(unique_ptr<SQLStatement> statement) {
114809
+ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(unique_ptr<SQLStatement> statement,
114810
+ bool allow_stream_result) {
113697
114811
  auto lock = LockContext();
113698
- return PendingQueryInternal(*lock, move(statement));
114812
+ PendingQueryParameters parameters;
114813
+ parameters.allow_stream_result = allow_stream_result;
114814
+ return PendingQueryInternal(*lock, move(statement), parameters);
113699
114815
  }
113700
114816
 
113701
114817
  unique_ptr<PendingQueryResult> ClientContext::PendingQueryInternal(ClientContextLock &lock,
113702
- unique_ptr<SQLStatement> statement, bool verify) {
114818
+ unique_ptr<SQLStatement> statement,
114819
+ PendingQueryParameters parameters, bool verify) {
113703
114820
  auto query = statement->query;
113704
114821
  shared_ptr<PreparedStatementData> prepared;
113705
114822
  if (verify) {
113706
- return PendingStatementOrPreparedStatementInternal(lock, query, move(statement), prepared, nullptr);
114823
+ return PendingStatementOrPreparedStatementInternal(lock, query, move(statement), prepared, parameters);
113707
114824
  } else {
113708
- return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, nullptr);
114825
+ return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, parameters);
113709
114826
  }
113710
114827
  }
113711
114828
 
113712
- unique_ptr<QueryResult> ClientContext::ExecutePendingQueryInternal(ClientContextLock &lock, PendingQueryResult &query,
113713
- bool allow_stream_result) {
113714
- return query.ExecuteInternal(lock, allow_stream_result);
114829
+ unique_ptr<QueryResult> ClientContext::ExecutePendingQueryInternal(ClientContextLock &lock, PendingQueryResult &query) {
114830
+ return query.ExecuteInternal(lock);
113715
114831
  }
113716
114832
 
113717
114833
  void ClientContext::Interrupt() {
@@ -114394,6 +115510,16 @@ struct PreserveIdentifierCase {
114394
115510
  static Value GetSetting(ClientContext &context);
114395
115511
  };
114396
115512
 
115513
+ struct PreserveInsertionOrder {
115514
+ static constexpr const char *Name = "preserve_insertion_order";
115515
+ static constexpr const char *Description =
115516
+ "Whether or not to preserve insertion order. If set to false the system is allowed to re-order any results "
115517
+ "that do not contain ORDER BY clauses.";
115518
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
115519
+ static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
115520
+ static Value GetSetting(ClientContext &context);
115521
+ };
115522
+
114397
115523
  struct ProfilerHistorySize {
114398
115524
  static constexpr const char *Name = "profiler_history_size";
114399
115525
  static constexpr const char *Description = "Sets the profiler history size";
@@ -114509,6 +115635,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
114509
115635
  DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting),
114510
115636
  DUCKDB_LOCAL(PerfectHashThresholdSetting),
114511
115637
  DUCKDB_LOCAL(PreserveIdentifierCase),
115638
+ DUCKDB_GLOBAL(PreserveInsertionOrder),
114512
115639
  DUCKDB_LOCAL(ProfilerHistorySize),
114513
115640
  DUCKDB_LOCAL(ProfileOutputSetting),
114514
115641
  DUCKDB_LOCAL(ProfilingModeSetting),
@@ -114978,12 +116105,12 @@ unique_ptr<MaterializedQueryResult> Connection::Query(unique_ptr<SQLStatement> s
114978
116105
  return unique_ptr_cast<QueryResult, MaterializedQueryResult>(move(result));
114979
116106
  }
114980
116107
 
114981
- unique_ptr<PendingQueryResult> Connection::PendingQuery(const string &query) {
114982
- return context->PendingQuery(query);
116108
+ unique_ptr<PendingQueryResult> Connection::PendingQuery(const string &query, bool allow_stream_result) {
116109
+ return context->PendingQuery(query, allow_stream_result);
114983
116110
  }
114984
116111
 
114985
- unique_ptr<PendingQueryResult> Connection::PendingQuery(unique_ptr<SQLStatement> statement) {
114986
- return context->PendingQuery(move(statement));
116112
+ unique_ptr<PendingQueryResult> Connection::PendingQuery(unique_ptr<SQLStatement> statement, bool allow_stream_result) {
116113
+ return context->PendingQuery(move(statement), allow_stream_result);
114987
116114
  }
114988
116115
 
114989
116116
  unique_ptr<PreparedStatement> Connection::Prepare(const string &query) {
@@ -124082,10 +125209,10 @@ unique_ptr<DataChunk> MaterializedQueryResult::FetchRaw() {
124082
125209
  namespace duckdb {
124083
125210
 
124084
125211
  PendingQueryResult::PendingQueryResult(shared_ptr<ClientContext> context_p, PreparedStatementData &statement,
124085
- vector<LogicalType> types_p)
125212
+ vector<LogicalType> types_p, bool allow_stream_result)
124086
125213
  : BaseQueryResult(QueryResultType::PENDING_RESULT, statement.statement_type, statement.properties, move(types_p),
124087
125214
  statement.names),
124088
- context(move(context_p)) {
125215
+ context(move(context_p)), allow_stream_result(allow_stream_result) {
124089
125216
  }
124090
125217
 
124091
125218
  PendingQueryResult::PendingQueryResult(string error) : BaseQueryResult(QueryResultType::PENDING_RESULT, move(error)) {
@@ -124123,21 +125250,21 @@ PendingExecutionResult PendingQueryResult::ExecuteTaskInternal(ClientContextLock
124123
125250
  return context->ExecuteTaskInternal(lock, *this);
124124
125251
  }
124125
125252
 
124126
- unique_ptr<QueryResult> PendingQueryResult::ExecuteInternal(ClientContextLock &lock, bool allow_streaming_result) {
125253
+ unique_ptr<QueryResult> PendingQueryResult::ExecuteInternal(ClientContextLock &lock) {
124127
125254
  CheckExecutableInternal(lock);
124128
125255
  while (ExecuteTaskInternal(lock) == PendingExecutionResult::RESULT_NOT_READY) {
124129
125256
  }
124130
125257
  if (!success) {
124131
125258
  return make_unique<MaterializedQueryResult>(error);
124132
125259
  }
124133
- auto result = context->FetchResultInternal(lock, *this, allow_streaming_result);
125260
+ auto result = context->FetchResultInternal(lock, *this);
124134
125261
  Close();
124135
125262
  return result;
124136
125263
  }
124137
125264
 
124138
- unique_ptr<QueryResult> PendingQueryResult::Execute(bool allow_streaming_result) {
125265
+ unique_ptr<QueryResult> PendingQueryResult::Execute() {
124139
125266
  auto lock = LockContext();
124140
- return ExecuteInternal(*lock, allow_streaming_result);
125267
+ return ExecuteInternal(*lock);
124141
125268
  }
124142
125269
 
124143
125270
  void PendingQueryResult::Close() {
@@ -124190,19 +125317,22 @@ const vector<string> &PreparedStatement::GetNames() {
124190
125317
  }
124191
125318
 
124192
125319
  unique_ptr<QueryResult> PreparedStatement::Execute(vector<Value> &values, bool allow_stream_result) {
124193
- auto pending = PendingQuery(values);
125320
+ auto pending = PendingQuery(values, allow_stream_result);
124194
125321
  if (!pending->success) {
124195
125322
  return make_unique<MaterializedQueryResult>(pending->error);
124196
125323
  }
124197
- return pending->Execute(allow_stream_result && data->properties.allow_stream_result);
125324
+ return pending->Execute();
124198
125325
  }
124199
125326
 
124200
- unique_ptr<PendingQueryResult> PreparedStatement::PendingQuery(vector<Value> &values) {
125327
+ unique_ptr<PendingQueryResult> PreparedStatement::PendingQuery(vector<Value> &values, bool allow_stream_result) {
124201
125328
  if (!success) {
124202
125329
  throw InvalidInputException("Attempting to execute an unsuccessfully prepared statement!");
124203
125330
  }
124204
125331
  D_ASSERT(data);
124205
- auto result = context->PendingQuery(query, data, values);
125332
+ PendingQueryParameters parameters;
125333
+ parameters.parameters = &values;
125334
+ parameters.allow_stream_result = allow_stream_result && data->properties.allow_stream_result;
125335
+ auto result = context->PendingQuery(query, data, parameters);
124206
125336
  return result;
124207
125337
  }
124208
125338
 
@@ -124344,6 +125474,7 @@ bool QueryProfiler::OperatorRequiresProfiling(PhysicalOperatorType op_type) {
124344
125474
  case PhysicalOperatorType::STREAMING_SAMPLE:
124345
125475
  case PhysicalOperatorType::LIMIT:
124346
125476
  case PhysicalOperatorType::LIMIT_PERCENT:
125477
+ case PhysicalOperatorType::STREAMING_LIMIT:
124347
125478
  case PhysicalOperatorType::TOP_N:
124348
125479
  case PhysicalOperatorType::WINDOW:
124349
125480
  case PhysicalOperatorType::UNNEST:
@@ -124847,27 +125978,10 @@ unique_ptr<QueryProfiler::TreeNode> QueryProfiler::CreateTree(PhysicalOperator *
124847
125978
  node->extra_info = root->ParamsToString();
124848
125979
  node->depth = depth;
124849
125980
  tree_map[root] = node.get();
124850
- for (auto &child : root->children) {
124851
- auto child_node = CreateTree(child.get(), depth + 1);
124852
- node->children.push_back(move(child_node));
124853
- }
124854
- switch (root->type) {
124855
- case PhysicalOperatorType::DELIM_JOIN: {
124856
- auto &delim_join = (PhysicalDelimJoin &)*root;
124857
- auto child_node = CreateTree((PhysicalOperator *)delim_join.join.get(), depth + 1);
124858
- node->children.push_back(move(child_node));
124859
- child_node = CreateTree((PhysicalOperator *)delim_join.distinct.get(), depth + 1);
124860
- node->children.push_back(move(child_node));
124861
- break;
124862
- }
124863
- case PhysicalOperatorType::EXECUTE: {
124864
- auto &execute = (PhysicalExecute &)*root;
124865
- auto child_node = CreateTree((PhysicalOperator *)execute.plan, depth + 1);
125981
+ auto children = root->GetChildren();
125982
+ for (auto &child : children) {
125983
+ auto child_node = CreateTree(child, depth + 1);
124866
125984
  node->children.push_back(move(child_node));
124867
- break;
124868
- }
124869
- default:
124870
- break;
124871
125985
  }
124872
125986
  return node;
124873
125987
  }
@@ -128174,6 +129288,18 @@ Value PreserveIdentifierCase::GetSetting(ClientContext &context) {
128174
129288
  return Value::BOOLEAN(ClientConfig::GetConfig(context).preserve_identifier_case);
128175
129289
  }
128176
129290
 
129291
+ //===--------------------------------------------------------------------===//
129292
+ // PreserveInsertionOrder
129293
+ //===--------------------------------------------------------------------===//
129294
+ void PreserveInsertionOrder::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
129295
+ config.preserve_insertion_order = input.GetValue<bool>();
129296
+ }
129297
+
129298
+ Value PreserveInsertionOrder::GetSetting(ClientContext &context) {
129299
+ auto &config = DBConfig::GetConfig(context);
129300
+ return Value::BOOLEAN(config.preserve_insertion_order);
129301
+ }
129302
+
128177
129303
  //===--------------------------------------------------------------------===//
128178
129304
  // Profiler History Size
128179
129305
  //===--------------------------------------------------------------------===//
@@ -137610,11 +138736,6 @@ void Event::SetTasks(vector<unique_ptr<Task>> tasks) {
137610
138736
 
137611
138737
 
137612
138738
 
137613
-
137614
-
137615
-
137616
-
137617
-
137618
138739
  //===----------------------------------------------------------------------===//
137619
138740
  // DuckDB
137620
138741
  //
@@ -137709,7 +138830,9 @@ private:
137709
138830
  //! Whether or not the pipeline has been finalized (used for verification only)
137710
138831
  bool finalized = false;
137711
138832
  //! Whether or not the pipeline has finished processing
137712
- bool finished_processing = false;
138833
+ int32_t finished_processing_idx = -1;
138834
+ //! Whether or not this pipeline requires keeping track of the batch index of the source
138835
+ bool requires_batch_index = false;
137713
138836
 
137714
138837
  //! Cached chunks for any operators that require caching
137715
138838
  vector<unique_ptr<DataChunk>> cached_chunks;
@@ -137722,6 +138845,9 @@ private:
137722
138845
  void GoToSource(idx_t &current_idx, idx_t initial_idx);
137723
138846
  void FetchFromSource(DataChunk &result);
137724
138847
 
138848
+ void FinishProcessing(int32_t operator_idx = -1);
138849
+ bool IsFinished();
138850
+
137725
138851
  OperatorResultType ExecutePushInternal(DataChunk &input, idx_t initial_idx = 0);
137726
138852
  //! Pushes a chunk through the pipeline and returns a single result chunk
137727
138853
  //! Returns whether or not a new input chunk is needed, or whether or not we are finished
@@ -137822,6 +138948,8 @@ public:
137822
138948
  } // namespace duckdb
137823
138949
 
137824
138950
 
138951
+
138952
+
137825
138953
  #include <algorithm>
137826
138954
 
137827
138955
  namespace duckdb {
@@ -138062,8 +139190,18 @@ void Executor::VerifyPipelines() {
138062
139190
  #endif
138063
139191
  }
138064
139192
 
139193
+ void Executor::Initialize(unique_ptr<PhysicalOperator> physical_plan) {
139194
+ Reset();
139195
+ owned_plan = move(physical_plan);
139196
+ InitializeInternal(owned_plan.get());
139197
+ }
139198
+
138065
139199
  void Executor::Initialize(PhysicalOperator *plan) {
138066
139200
  Reset();
139201
+ InitializeInternal(plan);
139202
+ }
139203
+
139204
+ void Executor::InitializeInternal(PhysicalOperator *plan) {
138067
139205
 
138068
139206
  auto &scheduler = TaskScheduler::GetScheduler(context);
138069
139207
  {
@@ -138076,7 +139214,9 @@ void Executor::Initialize(PhysicalOperator *plan) {
138076
139214
 
138077
139215
  auto root_pipeline = make_shared<Pipeline>(*this);
138078
139216
  root_pipeline->sink = nullptr;
138079
- BuildPipelines(physical_plan, root_pipeline.get());
139217
+
139218
+ PipelineBuildState state;
139219
+ physical_plan->BuildPipelines(*this, *root_pipeline, state);
138080
139220
 
138081
139221
  this->total_pipelines = pipelines.size();
138082
139222
 
@@ -138187,9 +139327,8 @@ PendingExecutionResult Executor::ExecuteTask() {
138187
139327
 
138188
139328
  void Executor::Reset() {
138189
139329
  lock_guard<mutex> elock(executor_lock);
138190
- delim_join_dependencies.clear();
138191
- recursive_cte = nullptr;
138192
139330
  physical_plan = nullptr;
139331
+ owned_plan.reset();
138193
139332
  root_executor.reset();
138194
139333
  root_pipelines.clear();
138195
139334
  root_pipeline_idx = 0;
@@ -138230,235 +139369,6 @@ void Executor::AddChildPipeline(Pipeline *current) {
138230
139369
  child_pipelines[current].push_back(move(child_pipeline));
138231
139370
  }
138232
139371
 
138233
- void Executor::BuildPipelines(PhysicalOperator *op, Pipeline *current) {
138234
- D_ASSERT(current);
138235
- op->op_state.reset();
138236
- if (op->IsSink()) {
138237
- // operator is a sink, build a pipeline
138238
- op->sink_state.reset();
138239
-
138240
- PhysicalOperator *pipeline_child = nullptr;
138241
- switch (op->type) {
138242
- case PhysicalOperatorType::CREATE_TABLE_AS:
138243
- case PhysicalOperatorType::INSERT:
138244
- case PhysicalOperatorType::DELETE_OPERATOR:
138245
- case PhysicalOperatorType::UPDATE:
138246
- case PhysicalOperatorType::HASH_GROUP_BY:
138247
- case PhysicalOperatorType::SIMPLE_AGGREGATE:
138248
- case PhysicalOperatorType::PERFECT_HASH_GROUP_BY:
138249
- case PhysicalOperatorType::WINDOW:
138250
- case PhysicalOperatorType::ORDER_BY:
138251
- case PhysicalOperatorType::RESERVOIR_SAMPLE:
138252
- case PhysicalOperatorType::TOP_N:
138253
- case PhysicalOperatorType::COPY_TO_FILE:
138254
- case PhysicalOperatorType::LIMIT:
138255
- case PhysicalOperatorType::LIMIT_PERCENT:
138256
- case PhysicalOperatorType::EXPLAIN_ANALYZE:
138257
- D_ASSERT(op->children.size() == 1);
138258
- // single operator:
138259
- // the operator becomes the data source of the current pipeline
138260
- current->source = op;
138261
- // we create a new pipeline starting from the child
138262
- pipeline_child = op->children[0].get();
138263
- break;
138264
- case PhysicalOperatorType::EXPORT:
138265
- // EXPORT has an optional child
138266
- // we only need to schedule child pipelines if there is a child
138267
- current->source = op;
138268
- if (op->children.empty()) {
138269
- return;
138270
- }
138271
- D_ASSERT(op->children.size() == 1);
138272
- pipeline_child = op->children[0].get();
138273
- break;
138274
- case PhysicalOperatorType::NESTED_LOOP_JOIN:
138275
- case PhysicalOperatorType::BLOCKWISE_NL_JOIN:
138276
- case PhysicalOperatorType::HASH_JOIN:
138277
- case PhysicalOperatorType::PIECEWISE_MERGE_JOIN:
138278
- case PhysicalOperatorType::CROSS_PRODUCT:
138279
- // regular join, create a pipeline with RHS source that sinks into this pipeline
138280
- pipeline_child = op->children[1].get();
138281
- // on the LHS (probe child), the operator becomes a regular operator
138282
- current->operators.push_back(op);
138283
- if (op->IsSource()) {
138284
- // FULL or RIGHT outer join
138285
- // schedule a scan of the node as a child pipeline
138286
- // this scan has to be performed AFTER all the probing has happened
138287
- if (recursive_cte) {
138288
- throw NotImplementedException("FULL and RIGHT outer joins are not supported in recursive CTEs yet");
138289
- }
138290
- AddChildPipeline(current);
138291
- }
138292
- BuildPipelines(op->children[0].get(), current);
138293
- break;
138294
- case PhysicalOperatorType::IE_JOIN: {
138295
- D_ASSERT(op->children.size() == 2);
138296
- if (recursive_cte) {
138297
- throw NotImplementedException("IEJoins are not supported in recursive CTEs yet");
138298
- }
138299
-
138300
- // Build the LHS
138301
- auto lhs_pipeline = make_shared<Pipeline>(*this);
138302
- lhs_pipeline->sink = op;
138303
- D_ASSERT(op->children[0].get());
138304
- BuildPipelines(op->children[0].get(), lhs_pipeline.get());
138305
-
138306
- // Build the RHS
138307
- auto rhs_pipeline = make_shared<Pipeline>(*this);
138308
- rhs_pipeline->sink = op;
138309
- D_ASSERT(op->children[1].get());
138310
- BuildPipelines(op->children[1].get(), rhs_pipeline.get());
138311
-
138312
- // RHS => LHS => current
138313
- current->AddDependency(rhs_pipeline);
138314
- rhs_pipeline->AddDependency(lhs_pipeline);
138315
-
138316
- pipelines.emplace_back(move(lhs_pipeline));
138317
- pipelines.emplace_back(move(rhs_pipeline));
138318
-
138319
- // Now build both and scan
138320
- current->source = op;
138321
- return;
138322
- }
138323
- case PhysicalOperatorType::DELIM_JOIN: {
138324
- // duplicate eliminated join
138325
- // for delim joins, recurse into the actual join
138326
- pipeline_child = op->children[0].get();
138327
- break;
138328
- }
138329
- case PhysicalOperatorType::RECURSIVE_CTE: {
138330
- auto &cte_node = (PhysicalRecursiveCTE &)*op;
138331
-
138332
- // recursive CTE
138333
- current->source = op;
138334
- // the LHS of the recursive CTE is our initial state
138335
- // we build this pipeline as normal
138336
- pipeline_child = op->children[0].get();
138337
- // for the RHS, we gather all pipelines that depend on the recursive cte
138338
- // these pipelines need to be rerun
138339
- if (recursive_cte) {
138340
- throw InternalException("Recursive CTE detected WITHIN a recursive CTE node");
138341
- }
138342
- recursive_cte = op;
138343
-
138344
- auto recursive_pipeline = make_shared<Pipeline>(*this);
138345
- recursive_pipeline->sink = op;
138346
- op->sink_state.reset();
138347
- BuildPipelines(op->children[1].get(), recursive_pipeline.get());
138348
-
138349
- cte_node.pipelines.push_back(move(recursive_pipeline));
138350
-
138351
- recursive_cte = nullptr;
138352
- break;
138353
- }
138354
- default:
138355
- throw InternalException("Unimplemented sink type!");
138356
- }
138357
- // the current is dependent on this pipeline to complete
138358
- auto pipeline = make_shared<Pipeline>(*this);
138359
- pipeline->sink = op;
138360
- current->AddDependency(pipeline);
138361
- D_ASSERT(pipeline_child);
138362
- // recurse into the pipeline child
138363
- BuildPipelines(pipeline_child, pipeline.get());
138364
- if (op->type == PhysicalOperatorType::DELIM_JOIN) {
138365
- // for delim joins, recurse into the actual join
138366
- // any pipelines in there depend on the main pipeline
138367
- auto &delim_join = (PhysicalDelimJoin &)*op;
138368
- // any scan of the duplicate eliminated data on the RHS depends on this pipeline
138369
- // we add an entry to the mapping of (PhysicalOperator*) -> (Pipeline*)
138370
- for (auto &delim_scan : delim_join.delim_scans) {
138371
- delim_join_dependencies[delim_scan] = pipeline.get();
138372
- }
138373
- BuildPipelines(delim_join.join.get(), current);
138374
- }
138375
- if (!recursive_cte) {
138376
- // regular pipeline: schedule it
138377
- pipelines.push_back(move(pipeline));
138378
- } else {
138379
- // CTE pipeline! add it to the CTE pipelines
138380
- D_ASSERT(recursive_cte);
138381
- auto &cte = (PhysicalRecursiveCTE &)*recursive_cte;
138382
- cte.pipelines.push_back(move(pipeline));
138383
- }
138384
- } else {
138385
- // operator is not a sink! recurse in children
138386
- // first check if there is any additional action we need to do depending on the type
138387
- switch (op->type) {
138388
- case PhysicalOperatorType::DELIM_SCAN: {
138389
- D_ASSERT(op->children.empty());
138390
- auto entry = delim_join_dependencies.find(op);
138391
- D_ASSERT(entry != delim_join_dependencies.end());
138392
- // this chunk scan introduces a dependency to the current pipeline
138393
- // namely a dependency on the duplicate elimination pipeline to finish
138394
- auto delim_dependency = entry->second->shared_from_this();
138395
- D_ASSERT(delim_dependency->sink->type == PhysicalOperatorType::DELIM_JOIN);
138396
- auto &delim_join = (PhysicalDelimJoin &)*delim_dependency->sink;
138397
- current->AddDependency(delim_dependency);
138398
- current->source = (PhysicalOperator *)delim_join.distinct.get();
138399
- return;
138400
- }
138401
- case PhysicalOperatorType::EXECUTE: {
138402
- // EXECUTE statement: build pipeline on child
138403
- auto &execute = (PhysicalExecute &)*op;
138404
- BuildPipelines(execute.plan, current);
138405
- return;
138406
- }
138407
- case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
138408
- if (!recursive_cte) {
138409
- throw InternalException("Recursive CTE scan found without recursive CTE node");
138410
- }
138411
- break;
138412
- }
138413
- case PhysicalOperatorType::INDEX_JOIN: {
138414
- // index join: we only continue into the LHS
138415
- // the right side is probed by the index join
138416
- // so we don't need to do anything in the pipeline with this child
138417
- current->operators.push_back(op);
138418
- BuildPipelines(op->children[0].get(), current);
138419
- return;
138420
- }
138421
- case PhysicalOperatorType::UNION: {
138422
- if (recursive_cte) {
138423
- throw NotImplementedException("UNIONS are not supported in recursive CTEs yet");
138424
- }
138425
- auto union_pipeline = make_shared<Pipeline>(*this);
138426
- auto pipeline_ptr = union_pipeline.get();
138427
- // set up dependencies for any child pipelines to this union pipeline
138428
- auto child_entry = child_pipelines.find(current);
138429
- if (child_entry != child_pipelines.end()) {
138430
- for (auto &current_child : child_entry->second) {
138431
- D_ASSERT(child_dependencies.find(current_child.get()) != child_dependencies.end());
138432
- child_dependencies[current_child.get()].push_back(pipeline_ptr);
138433
- }
138434
- }
138435
- // for the current pipeline, continue building on the LHS
138436
- union_pipeline->operators = current->operators;
138437
- BuildPipelines(op->children[0].get(), current);
138438
- // insert the union pipeline as a union pipeline of the current node
138439
- union_pipelines[current].push_back(move(union_pipeline));
138440
-
138441
- // for the union pipeline, build on the RHS
138442
- pipeline_ptr->sink = current->sink;
138443
- BuildPipelines(op->children[1].get(), pipeline_ptr);
138444
- return;
138445
- }
138446
- default:
138447
- break;
138448
- }
138449
- if (op->children.empty()) {
138450
- // source
138451
- current->source = op;
138452
- } else {
138453
- if (op->children.size() != 1) {
138454
- throw InternalException("Operator not supported yet");
138455
- }
138456
- current->operators.push_back(op);
138457
- BuildPipelines(op->children[0].get(), current);
138458
- }
138459
- }
138460
- }
138461
-
138462
139372
  vector<LogicalType> Executor::GetTypes() {
138463
139373
  D_ASSERT(physical_plan);
138464
139374
  return physical_plan->GetTypes();
@@ -138526,6 +139436,17 @@ bool Executor::GetPipelinesProgress(double &current_progress) { // LCOV_EXCL_STA
138526
139436
  }
138527
139437
  } // LCOV_EXCL_STOP
138528
139438
 
139439
+ bool Executor::HasResultCollector() {
139440
+ return physical_plan->type == PhysicalOperatorType::RESULT_COLLECTOR;
139441
+ }
139442
+
139443
+ unique_ptr<QueryResult> Executor::GetResult() {
139444
+ D_ASSERT(HasResultCollector());
139445
+ auto &result_collector = (PhysicalResultCollector &)*physical_plan;
139446
+ D_ASSERT(result_collector.sink_state);
139447
+ return result_collector.GetResult(*result_collector.sink_state);
139448
+ }
139449
+
138529
139450
  unique_ptr<DataChunk> Executor::FetchChunk() {
138530
139451
  D_ASSERT(physical_plan);
138531
139452
 
@@ -138591,10 +139512,6 @@ TaskExecutionResult ExecutorTask::Execute(TaskExecutionMode mode) {
138591
139512
 
138592
139513
 
138593
139514
 
138594
-
138595
-
138596
-
138597
-
138598
139515
 
138599
139516
  namespace duckdb {
138600
139517
 
@@ -138689,6 +139606,7 @@ void Pipeline::ScheduleSequentialTask(shared_ptr<Event> &event) {
138689
139606
  }
138690
139607
 
138691
139608
  bool Pipeline::ScheduleParallel(shared_ptr<Event> &event) {
139609
+ // check if the sink, source and all intermediate operators support parallelism
138692
139610
  if (!sink->ParallelSink()) {
138693
139611
  return false;
138694
139612
  }
@@ -138700,10 +139618,35 @@ bool Pipeline::ScheduleParallel(shared_ptr<Event> &event) {
138700
139618
  return false;
138701
139619
  }
138702
139620
  }
139621
+ if (sink->RequiresBatchIndex()) {
139622
+ if (!source->SupportsBatchIndex()) {
139623
+ throw InternalException(
139624
+ "Attempting to schedule a pipeline where the sink requires batch index but source does not support it");
139625
+ }
139626
+ }
138703
139627
  idx_t max_threads = source_state->MaxThreads();
138704
139628
  return LaunchScanTasks(event, max_threads);
138705
139629
  }
138706
139630
 
139631
+ bool Pipeline::IsOrderDependent() const {
139632
+ auto &config = DBConfig::GetConfig(executor.context);
139633
+ if (!config.preserve_insertion_order) {
139634
+ return false;
139635
+ }
139636
+ if (sink && sink->IsOrderDependent()) {
139637
+ return true;
139638
+ }
139639
+ if (source->IsOrderDependent()) {
139640
+ return true;
139641
+ }
139642
+ for (auto &op : operators) {
139643
+ if (op->IsOrderDependent()) {
139644
+ return true;
139645
+ }
139646
+ }
139647
+ return false;
139648
+ }
139649
+
138707
139650
  void Pipeline::Schedule(shared_ptr<Event> &event) {
138708
139651
  D_ASSERT(ready);
138709
139652
  D_ASSERT(sink);
@@ -138801,6 +139744,59 @@ vector<PhysicalOperator *> Pipeline::GetOperators() const {
138801
139744
  return result;
138802
139745
  }
138803
139746
 
139747
+ //===--------------------------------------------------------------------===//
139748
+ // Pipeline Build State
139749
+ //===--------------------------------------------------------------------===//
139750
+ void PipelineBuildState::SetPipelineSource(Pipeline &pipeline, PhysicalOperator *op) {
139751
+ pipeline.source = op;
139752
+ }
139753
+
139754
+ void PipelineBuildState::SetPipelineSink(Pipeline &pipeline, PhysicalOperator *op) {
139755
+ pipeline.sink = op;
139756
+ // set the base batch index of this pipeline based on how many other pipelines have this node as their sink
139757
+ pipeline.base_batch_index = BATCH_INCREMENT * sink_pipeline_count[op];
139758
+ // increment the number of nodes that have this pipeline as their sink
139759
+ sink_pipeline_count[op]++;
139760
+ }
139761
+
139762
+ void PipelineBuildState::AddPipelineOperator(Pipeline &pipeline, PhysicalOperator *op) {
139763
+ pipeline.operators.push_back(op);
139764
+ }
139765
+
139766
+ void PipelineBuildState::AddPipeline(Executor &executor, shared_ptr<Pipeline> pipeline) {
139767
+ executor.pipelines.push_back(move(pipeline));
139768
+ }
139769
+
139770
+ PhysicalOperator *PipelineBuildState::GetPipelineSource(Pipeline &pipeline) {
139771
+ return pipeline.source;
139772
+ }
139773
+
139774
+ PhysicalOperator *PipelineBuildState::GetPipelineSink(Pipeline &pipeline) {
139775
+ return pipeline.sink;
139776
+ }
139777
+
139778
+ void PipelineBuildState::SetPipelineOperators(Pipeline &pipeline, vector<PhysicalOperator *> operators) {
139779
+ pipeline.operators = move(operators);
139780
+ }
139781
+
139782
+ void PipelineBuildState::AddChildPipeline(Executor &executor, Pipeline &pipeline) {
139783
+ executor.AddChildPipeline(&pipeline);
139784
+ }
139785
+
139786
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &PipelineBuildState::GetUnionPipelines(Executor &executor) {
139787
+ return executor.union_pipelines;
139788
+ }
139789
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &PipelineBuildState::GetChildPipelines(Executor &executor) {
139790
+ return executor.child_pipelines;
139791
+ }
139792
+ unordered_map<Pipeline *, vector<Pipeline *>> &PipelineBuildState::GetChildDependencies(Executor &executor) {
139793
+ return executor.child_dependencies;
139794
+ }
139795
+
139796
+ vector<PhysicalOperator *> PipelineBuildState::GetPipelineOperators(Pipeline &pipeline) {
139797
+ return pipeline.operators;
139798
+ }
139799
+
138804
139800
  } // namespace duckdb
138805
139801
 
138806
139802
 
@@ -138842,6 +139838,7 @@ void PipelineEvent::FinishEvent() {
138842
139838
 
138843
139839
 
138844
139840
 
139841
+
138845
139842
  namespace duckdb {
138846
139843
 
138847
139844
  PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_p)
@@ -138850,7 +139847,9 @@ PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_
138850
139847
  local_source_state = pipeline.source->GetLocalSourceState(context, *pipeline.source_state);
138851
139848
  if (pipeline.sink) {
138852
139849
  local_sink_state = pipeline.sink->GetLocalSinkState(context);
139850
+ requires_batch_index = pipeline.sink->RequiresBatchIndex() && pipeline.source->SupportsBatchIndex();
138853
139851
  }
139852
+ bool can_cache_in_pipeline = pipeline.sink && !pipeline.IsOrderDependent() && !requires_batch_index;
138854
139853
  intermediate_chunks.reserve(pipeline.operators.size());
138855
139854
  intermediate_states.reserve(pipeline.operators.size());
138856
139855
  cached_chunks.resize(pipeline.operators.size());
@@ -138861,7 +139860,7 @@ PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_
138861
139860
  chunk->Initialize(prev_operator->GetTypes());
138862
139861
  intermediate_chunks.push_back(move(chunk));
138863
139862
  intermediate_states.push_back(current_operator->GetOperatorState(context.client));
138864
- if (pipeline.sink && !pipeline.sink->SinkOrderMatters() && current_operator->RequiresCache()) {
139863
+ if (can_cache_in_pipeline && current_operator->RequiresCache()) {
138865
139864
  auto &cache_types = current_operator->GetTypes();
138866
139865
  bool can_cache = true;
138867
139866
  for (auto &type : cache_types) {
@@ -138879,7 +139878,7 @@ PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_
138879
139878
  if (current_operator->IsSink() && current_operator->sink_state->state == SinkFinalizeType::NO_OUTPUT_POSSIBLE) {
138880
139879
  // one of the operators has already figured out no output is possible
138881
139880
  // we can skip executing the pipeline
138882
- finished_processing = true;
139881
+ FinishProcessing();
138883
139882
  }
138884
139883
  }
138885
139884
  InitializeChunk(final_chunk);
@@ -138890,7 +139889,7 @@ bool PipelineExecutor::Execute(idx_t max_chunks) {
138890
139889
  bool exhausted_source = false;
138891
139890
  auto &source_chunk = pipeline.operators.empty() ? final_chunk : *intermediate_chunks[0];
138892
139891
  for (idx_t i = 0; i < max_chunks; i++) {
138893
- if (finished_processing) {
139892
+ if (IsFinished()) {
138894
139893
  break;
138895
139894
  }
138896
139895
  source_chunk.Reset();
@@ -138901,11 +139900,11 @@ bool PipelineExecutor::Execute(idx_t max_chunks) {
138901
139900
  }
138902
139901
  auto result = ExecutePushInternal(source_chunk);
138903
139902
  if (result == OperatorResultType::FINISHED) {
138904
- finished_processing = true;
139903
+ D_ASSERT(IsFinished());
138905
139904
  break;
138906
139905
  }
138907
139906
  }
138908
- if (!exhausted_source && !finished_processing) {
139907
+ if (!exhausted_source && !IsFinished()) {
138909
139908
  return false;
138910
139909
  }
138911
139910
  PushFinalize();
@@ -138920,6 +139919,15 @@ OperatorResultType PipelineExecutor::ExecutePush(DataChunk &input) { // LCOV_EXC
138920
139919
  return ExecutePushInternal(input);
138921
139920
  } // LCOV_EXCL_STOP
138922
139921
 
139922
+ void PipelineExecutor::FinishProcessing(int32_t operator_idx) {
139923
+ finished_processing_idx = operator_idx < 0 ? NumericLimits<int32_t>::Maximum() : operator_idx;
139924
+ in_process_operators = stack<idx_t>();
139925
+ }
139926
+
139927
+ bool PipelineExecutor::IsFinished() {
139928
+ return finished_processing_idx >= 0;
139929
+ }
139930
+
138923
139931
  OperatorResultType PipelineExecutor::ExecutePushInternal(DataChunk &input, idx_t initial_idx) {
138924
139932
  D_ASSERT(pipeline.sink);
138925
139933
  if (input.size() == 0) { // LCOV_EXCL_START
@@ -138944,6 +139952,7 @@ OperatorResultType PipelineExecutor::ExecutePushInternal(DataChunk &input, idx_t
138944
139952
  auto sink_result = pipeline.sink->Sink(context, *pipeline.sink->sink_state, *local_sink_state, sink_chunk);
138945
139953
  EndOperator(pipeline.sink, nullptr);
138946
139954
  if (sink_result == SinkResultType::FINISHED) {
139955
+ FinishProcessing();
138947
139956
  return OperatorResultType::FINISHED;
138948
139957
  }
138949
139958
  }
@@ -138951,7 +139960,6 @@ OperatorResultType PipelineExecutor::ExecutePushInternal(DataChunk &input, idx_t
138951
139960
  return OperatorResultType::NEED_MORE_INPUT;
138952
139961
  }
138953
139962
  }
138954
- return OperatorResultType::FINISHED;
138955
139963
  }
138956
139964
 
138957
139965
  void PipelineExecutor::PushFinalize() {
@@ -138960,13 +139968,15 @@ void PipelineExecutor::PushFinalize() {
138960
139968
  }
138961
139969
  finalized = true;
138962
139970
  // flush all caches
138963
- if (!finished_processing) {
138964
- D_ASSERT(in_process_operators.empty());
138965
- for (idx_t i = 0; i < cached_chunks.size(); i++) {
138966
- if (cached_chunks[i] && cached_chunks[i]->size() > 0) {
138967
- ExecutePushInternal(*cached_chunks[i], i + 1);
138968
- cached_chunks[i].reset();
138969
- }
139971
+ // note that even if an operator has finished, we might still need to flush caches AFTER that operator
139972
+ // e.g. if we have SOURCE -> LIMIT -> CROSS_PRODUCT -> SINK, if the LIMIT reports no more rows will be passed on
139973
+ // we still need to flush caches from the CROSS_PRODUCT
139974
+ D_ASSERT(in_process_operators.empty());
139975
+ idx_t start_idx = IsFinished() ? idx_t(finished_processing_idx) : 0;
139976
+ for (idx_t i = start_idx; i < cached_chunks.size(); i++) {
139977
+ if (cached_chunks[i] && cached_chunks[i]->size() > 0) {
139978
+ ExecutePushInternal(*cached_chunks[i], i + 1);
139979
+ cached_chunks[i].reset();
138970
139980
  }
138971
139981
  }
138972
139982
  D_ASSERT(local_sink_state);
@@ -139022,7 +140032,7 @@ void PipelineExecutor::CacheChunk(DataChunk &current_chunk, idx_t operator_idx)
139022
140032
  }
139023
140033
 
139024
140034
  void PipelineExecutor::ExecutePull(DataChunk &result) {
139025
- if (finished_processing) {
140035
+ if (IsFinished()) {
139026
140036
  return;
139027
140037
  }
139028
140038
  auto &executor = pipeline.executor;
@@ -139038,7 +140048,10 @@ void PipelineExecutor::ExecutePull(DataChunk &result) {
139038
140048
  }
139039
140049
  }
139040
140050
  if (!pipeline.operators.empty()) {
139041
- Execute(source_chunk, result);
140051
+ auto state = Execute(source_chunk, result);
140052
+ if (state == OperatorResultType::FINISHED) {
140053
+ break;
140054
+ }
139042
140055
  }
139043
140056
  }
139044
140057
  } catch (std::exception &ex) { // LCOV_EXCL_START
@@ -139122,6 +140135,7 @@ OperatorResultType PipelineExecutor::Execute(DataChunk &input, DataChunk &result
139122
140135
  in_process_operators.push(current_idx);
139123
140136
  } else if (result == OperatorResultType::FINISHED) {
139124
140137
  D_ASSERT(current_chunk.size() == 0);
140138
+ FinishProcessing(current_idx);
139125
140139
  return OperatorResultType::FINISHED;
139126
140140
  }
139127
140141
  current_chunk.Verify();
@@ -139155,6 +140169,14 @@ OperatorResultType PipelineExecutor::Execute(DataChunk &input, DataChunk &result
139155
140169
  void PipelineExecutor::FetchFromSource(DataChunk &result) {
139156
140170
  StartOperator(pipeline.source);
139157
140171
  pipeline.source->GetData(context, result, *pipeline.source_state, *local_source_state);
140172
+ if (result.size() != 0 && requires_batch_index) {
140173
+ auto next_batch_index =
140174
+ pipeline.source->GetBatchIndex(context, result, *pipeline.source_state, *local_source_state);
140175
+ next_batch_index += pipeline.base_batch_index;
140176
+ D_ASSERT(local_sink_state->batch_index <= next_batch_index ||
140177
+ local_sink_state->batch_index == DConstants::INVALID_INDEX);
140178
+ local_sink_state->batch_index = next_batch_index;
140179
+ }
139158
140180
  EndOperator(pipeline.source, &result);
139159
140181
  }
139160
140182