duckdb 0.3.5-dev617.0 → 0.3.5-dev658.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -7044,6 +7044,8 @@ string PhysicalOperatorToString(PhysicalOperatorType type) {
7044
7044
  return "LIMIT";
7045
7045
  case PhysicalOperatorType::LIMIT_PERCENT:
7046
7046
  return "LIMIT_PERCENT";
7047
+ case PhysicalOperatorType::STREAMING_LIMIT:
7048
+ return "STREAMING_LIMIT";
7047
7049
  case PhysicalOperatorType::RESERVOIR_SAMPLE:
7048
7050
  return "RESERVOIR_SAMPLE";
7049
7051
  case PhysicalOperatorType::STREAMING_SAMPLE:
@@ -7142,6 +7144,8 @@ string PhysicalOperatorToString(PhysicalOperatorType type) {
7142
7144
  return "INOUT_FUNCTION";
7143
7145
  case PhysicalOperatorType::CREATE_TYPE:
7144
7146
  return "CREATE_TYPE";
7147
+ case PhysicalOperatorType::RESULT_COLLECTOR:
7148
+ return "RESULT_COLLECTOR";
7145
7149
  case PhysicalOperatorType::INVALID:
7146
7150
  break;
7147
7151
  }
@@ -25197,8 +25201,9 @@ bool Printer::IsTerminal() {
25197
25201
 
25198
25202
  namespace duckdb {
25199
25203
 
25200
- ProgressBar::ProgressBar(Executor &executor, idx_t show_progress_after)
25201
- : executor(executor), show_progress_after(show_progress_after), current_percentage(-1) {
25204
+ ProgressBar::ProgressBar(Executor &executor, idx_t show_progress_after, bool print_progress)
25205
+ : executor(executor), show_progress_after(show_progress_after), current_percentage(-1),
25206
+ print_progress(print_progress) {
25202
25207
  }
25203
25208
 
25204
25209
  double ProgressBar::GetCurrentPercentage() {
@@ -25221,7 +25226,6 @@ void ProgressBar::Update(bool final) {
25221
25226
  return;
25222
25227
  }
25223
25228
  auto sufficient_time_elapsed = profiler.Elapsed() > show_progress_after / 1000.0;
25224
- auto print_progress = ClientConfig::GetConfig(executor.context).print_progress_bar;
25225
25229
  if (new_percentage > current_percentage) {
25226
25230
  current_percentage = new_percentage;
25227
25231
  }
@@ -36623,6 +36627,9 @@ public:
36623
36627
  unique_ptr<PhysicalHashAggregate> distinct;
36624
36628
  vector<PhysicalOperator *> delim_scans;
36625
36629
 
36630
+ public:
36631
+ vector<PhysicalOperator *> GetChildren() const override;
36632
+
36626
36633
  public:
36627
36634
  unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
36628
36635
  unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
@@ -36640,6 +36647,9 @@ public:
36640
36647
  }
36641
36648
 
36642
36649
  string ParamsToString() const override;
36650
+
36651
+ public:
36652
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
36643
36653
  };
36644
36654
 
36645
36655
  } // namespace duckdb
@@ -37666,6 +37676,122 @@ unique_ptr<RenderTree> TreeRenderer::CreateTree(const Pipeline &op) {
37666
37676
  return CreateRenderTree<PipelineRenderNode>(*node);
37667
37677
  }
37668
37678
 
37679
+ } // namespace duckdb//===----------------------------------------------------------------------===//
37680
+ // DuckDB
37681
+ //
37682
+ // duckdb/common/types/batched_chunk_collection.hpp
37683
+ //
37684
+ //
37685
+ //===----------------------------------------------------------------------===//
37686
+
37687
+
37688
+
37689
+
37690
+
37691
+
37692
+ namespace duckdb {
37693
+
37694
+ struct BatchedChunkScanState {
37695
+ map<idx_t, unique_ptr<ChunkCollection>>::iterator iterator;
37696
+ idx_t chunk_index;
37697
+ };
37698
+
37699
+ //! A BatchedChunkCollection holds a number of data entries that are partitioned by batch index
37700
+ //! Scans over a BatchedChunkCollection are ordered by batch index
37701
+ class BatchedChunkCollection {
37702
+ public:
37703
+ DUCKDB_API BatchedChunkCollection();
37704
+
37705
+ //! Appends a datachunk with the given batch index to the batched collection
37706
+ DUCKDB_API void Append(DataChunk &input, idx_t batch_index);
37707
+
37708
+ //! Merge the other batched chunk collection into this batched collection
37709
+ DUCKDB_API void Merge(BatchedChunkCollection &other);
37710
+
37711
+ //! Initialize a scan over the batched chunk collection
37712
+ DUCKDB_API void InitializeScan(BatchedChunkScanState &state);
37713
+
37714
+ //! Scan a chunk from the batched chunk collection, in-order of batch index
37715
+ DUCKDB_API void Scan(BatchedChunkScanState &state, DataChunk &output);
37716
+
37717
+ DUCKDB_API string ToString() const;
37718
+ DUCKDB_API void Print() const;
37719
+
37720
+ private:
37721
+ //! The data of the batched chunk collection - a set of batch_index -> ChunkCollection pointers
37722
+ map<idx_t, unique_ptr<ChunkCollection>> data;
37723
+ };
37724
+ } // namespace duckdb
37725
+
37726
+
37727
+
37728
+ namespace duckdb {
37729
+
37730
+ BatchedChunkCollection::BatchedChunkCollection() {
37731
+ }
37732
+
37733
+ void BatchedChunkCollection::Append(DataChunk &input, idx_t batch_index) {
37734
+ D_ASSERT(batch_index != DConstants::INVALID_INDEX);
37735
+ auto entry = data.find(batch_index);
37736
+ ChunkCollection *collection;
37737
+ if (entry == data.end()) {
37738
+ auto new_collection = make_unique<ChunkCollection>();
37739
+ collection = new_collection.get();
37740
+ data.insert(make_pair(batch_index, move(new_collection)));
37741
+ } else {
37742
+ collection = entry->second.get();
37743
+ }
37744
+ collection->Append(input);
37745
+ }
37746
+
37747
+ void BatchedChunkCollection::Merge(BatchedChunkCollection &other) {
37748
+ for (auto &entry : other.data) {
37749
+ if (data.find(entry.first) != data.end()) {
37750
+ throw InternalException(
37751
+ "BatchChunkCollection::Merge error - batch index %d is present in both collections. This occurs when "
37752
+ "batch indexes are not uniquely distributed over threads",
37753
+ entry.first);
37754
+ }
37755
+ data[entry.first] = move(entry.second);
37756
+ }
37757
+ other.data.clear();
37758
+ }
37759
+
37760
+ void BatchedChunkCollection::InitializeScan(BatchedChunkScanState &state) {
37761
+ state.iterator = data.begin();
37762
+ state.chunk_index = 0;
37763
+ }
37764
+
37765
+ void BatchedChunkCollection::Scan(BatchedChunkScanState &state, DataChunk &output) {
37766
+ while (state.iterator != data.end()) {
37767
+ // check if there is a chunk remaining in this collection
37768
+ auto collection = state.iterator->second.get();
37769
+ if (state.chunk_index < collection->ChunkCount()) {
37770
+ // there is! increment the chunk count
37771
+ output.Reference(collection->GetChunk(state.chunk_index));
37772
+ state.chunk_index++;
37773
+ return;
37774
+ }
37775
+ // there isn't! move to the next collection
37776
+ state.iterator++;
37777
+ state.chunk_index = 0;
37778
+ }
37779
+ }
37780
+
37781
+ string BatchedChunkCollection::ToString() const {
37782
+ string result;
37783
+ result += "Batched Chunk Collection\n";
37784
+ for (auto &entry : data) {
37785
+ result += "Batch Index - " + to_string(entry.first) + "\n";
37786
+ result += entry.second->ToString() + "\n\n";
37787
+ }
37788
+ return result;
37789
+ }
37790
+
37791
+ void BatchedChunkCollection::Print() const {
37792
+ Printer::Print(ToString());
37793
+ }
37794
+
37669
37795
  } // namespace duckdb
37670
37796
 
37671
37797
 
@@ -37673,6 +37799,7 @@ unique_ptr<RenderTree> TreeRenderer::CreateTree(const Pipeline &op) {
37673
37799
 
37674
37800
 
37675
37801
 
37802
+
37676
37803
  namespace duckdb {
37677
37804
 
37678
37805
  constexpr const char *Blob::HEX_TABLE;
@@ -38507,7 +38634,12 @@ void ChunkCollection::CopyCell(idx_t column, idx_t index, Vector &target, idx_t
38507
38634
  VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
38508
38635
  }
38509
38636
 
38510
- void ChunkCollection::Print() {
38637
+ string ChunkCollection::ToString() const {
38638
+ return chunks.empty() ? "ChunkCollection [ 0 ]"
38639
+ : "ChunkCollection [ " + std::to_string(count) + " ]: \n" + chunks[0]->ToString();
38640
+ }
38641
+
38642
+ void ChunkCollection::Print() const {
38511
38643
  Printer::Print(ToString());
38512
38644
  }
38513
38645
 
@@ -48636,17 +48768,14 @@ inline idx_t ComparisonSelector::Select<duckdb::LessThanEquals>(Vector &left, Ve
48636
48768
  return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel);
48637
48769
  }
48638
48770
 
48639
- static idx_t ComparesNotNull(ValidityMask &vleft, ValidityMask &vright, ValidityMask &vresult, idx_t count,
48640
- SelectionVector &not_null) {
48641
- idx_t valid = 0;
48771
+ static void ComparesNotNull(VectorData &ldata, VectorData &rdata, ValidityMask &vresult, idx_t count) {
48642
48772
  for (idx_t i = 0; i < count; ++i) {
48643
- if (vleft.RowIsValid(i) && vright.RowIsValid(i)) {
48644
- not_null.set_index(valid++, i);
48645
- } else {
48773
+ auto lidx = ldata.sel->get_index(i);
48774
+ auto ridx = rdata.sel->get_index(i);
48775
+ if (!ldata.validity.RowIsValid(lidx) || !rdata.validity.RowIsValid(ridx)) {
48646
48776
  vresult.SetInvalid(i);
48647
48777
  }
48648
48778
  }
48649
- return valid;
48650
48779
  }
48651
48780
 
48652
48781
  template <typename OP>
@@ -48673,23 +48802,17 @@ static void NestedComparisonExecutor(Vector &left, Vector &right, Vector &result
48673
48802
 
48674
48803
  result.SetVectorType(VectorType::FLAT_VECTOR);
48675
48804
  auto result_data = FlatVector::GetData<bool>(result);
48676
- auto &validity = FlatVector::Validity(result);
48805
+ auto &result_validity = FlatVector::Validity(result);
48677
48806
 
48678
48807
  VectorData leftv, rightv;
48679
48808
  left.Orrify(count, leftv);
48680
48809
  right.Orrify(count, rightv);
48681
-
48810
+ if (!leftv.validity.AllValid() || !rightv.validity.AllValid()) {
48811
+ ComparesNotNull(leftv, rightv, result_validity, count);
48812
+ }
48682
48813
  SelectionVector true_sel(count);
48683
48814
  SelectionVector false_sel(count);
48684
-
48685
- idx_t match_count = 0;
48686
- if (leftv.validity.AllValid() && rightv.validity.AllValid()) {
48687
- match_count = ComparisonSelector::Select<OP>(left, right, nullptr, count, &true_sel, &false_sel);
48688
- } else {
48689
- SelectionVector not_null(count);
48690
- count = ComparesNotNull(leftv.validity, rightv.validity, validity, count, not_null);
48691
- match_count = ComparisonSelector::Select<OP>(left, right, &not_null, count, &true_sel, &false_sel);
48692
- }
48815
+ idx_t match_count = ComparisonSelector::Select<OP>(left, right, nullptr, count, &true_sel, &false_sel);
48693
48816
 
48694
48817
  for (idx_t i = 0; i < match_count; ++i) {
48695
48818
  const auto idx = true_sel.get_index(i);
@@ -58954,7 +59077,7 @@ public:
58954
59077
  using FrameBounds = std::pair<idx_t, idx_t>;
58955
59078
 
58956
59079
  WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, const LogicalType &result_type,
58957
- ChunkCollection *input, WindowAggregationMode mode);
59080
+ ChunkCollection *input, const ValidityMask &filter_mask, WindowAggregationMode mode);
58958
59081
  ~WindowSegmentTree();
58959
59082
 
58960
59083
  //! First row contains the result.
@@ -58987,6 +59110,8 @@ private:
58987
59110
  vector<data_t> state;
58988
59111
  //! Input data chunk, used for intermediate window segment aggregation
58989
59112
  DataChunk inputs;
59113
+ //! The filtered rows in inputs.
59114
+ SelectionVector filter_sel;
58990
59115
  //! A vector of pointers to "state", used for intermediate window segment aggregation
58991
59116
  Vector statep;
58992
59117
  //! The frame boundaries, used for the window functions
@@ -59007,6 +59132,9 @@ private:
59007
59132
  //! The (sorted) input chunk collection on which the tree is built
59008
59133
  ChunkCollection *input_ref;
59009
59134
 
59135
+ //! The filtered rows in input_ref.
59136
+ const ValidityMask &filter_mask;
59137
+
59010
59138
  //! Use the window API, if available
59011
59139
  WindowAggregationMode mode;
59012
59140
 
@@ -59909,6 +60037,25 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
59909
60037
  }
59910
60038
  }
59911
60039
 
60040
+ // evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
60041
+ ValidityMask filter_mask;
60042
+ vector<validity_t> filter_bits;
60043
+ if (wexpr->filter_expr) {
60044
+ // Start with all invalid and set the ones that pass
60045
+ filter_bits.resize(ValidityMask::ValidityMaskSize(input.Count()), 0);
60046
+ filter_mask.Initialize(filter_bits.data());
60047
+ ExpressionExecutor filter_execution(*wexpr->filter_expr);
60048
+ SelectionVector true_sel(STANDARD_VECTOR_SIZE);
60049
+ idx_t base_idx = 0;
60050
+ for (auto &chunk : input.Chunks()) {
60051
+ const auto filtered = filter_execution.SelectExpression(*chunk, true_sel);
60052
+ for (idx_t f = 0; f < filtered; ++f) {
60053
+ filter_mask.SetValid(base_idx + true_sel[f]);
60054
+ }
60055
+ base_idx += chunk->size();
60056
+ }
60057
+ }
60058
+
59912
60059
  // evaluate boundaries if present. Parser has checked boundary types.
59913
60060
  ChunkCollection boundary_start_collection;
59914
60061
  if (wexpr->start_expr) {
@@ -59962,7 +60109,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
59962
60109
 
59963
60110
  if (wexpr->aggregate) {
59964
60111
  segment_tree = make_unique<WindowSegmentTree>(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type,
59965
- &payload_collection, mode);
60112
+ &payload_collection, filter_mask, mode);
59966
60113
  }
59967
60114
 
59968
60115
  WindowBoundariesState bounds(wexpr);
@@ -60510,6 +60657,171 @@ string PhysicalFilter::ParamsToString() const {
60510
60657
  return expression->GetName();
60511
60658
  }
60512
60659
 
60660
+ } // namespace duckdb
60661
+ //===----------------------------------------------------------------------===//
60662
+ // DuckDB
60663
+ //
60664
+ // duckdb/execution/operator/helper/physical_batch_collector.hpp
60665
+ //
60666
+ //
60667
+ //===----------------------------------------------------------------------===//
60668
+
60669
+
60670
+
60671
+ //===----------------------------------------------------------------------===//
60672
+ // DuckDB
60673
+ //
60674
+ // duckdb/execution/operator/helper/physical_result_collector.hpp
60675
+ //
60676
+ //
60677
+ //===----------------------------------------------------------------------===//
60678
+
60679
+
60680
+
60681
+
60682
+
60683
+
60684
+ namespace duckdb {
60685
+ class PreparedStatementData;
60686
+
60687
+ //! PhysicalResultCollector is an abstract class that is used to generate the final result of a query
60688
+ class PhysicalResultCollector : public PhysicalOperator {
60689
+ public:
60690
+ PhysicalResultCollector(PreparedStatementData &data);
60691
+
60692
+ StatementType statement_type;
60693
+ StatementProperties properties;
60694
+ PhysicalOperator *plan;
60695
+ vector<string> names;
60696
+
60697
+ public:
60698
+ static unique_ptr<PhysicalResultCollector> GetResultCollector(ClientContext &context, PreparedStatementData &data);
60699
+
60700
+ public:
60701
+ //! The final method used to fetch the query result from this operator
60702
+ virtual unique_ptr<QueryResult> GetResult(GlobalSinkState &state) = 0;
60703
+
60704
+ bool IsSink() const override {
60705
+ return true;
60706
+ }
60707
+
60708
+ public:
60709
+ vector<PhysicalOperator *> GetChildren() const override;
60710
+
60711
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
60712
+ };
60713
+
60714
+ } // namespace duckdb
60715
+
60716
+
60717
+ namespace duckdb {
60718
+
60719
+ class PhysicalBatchCollector : public PhysicalResultCollector {
60720
+ public:
60721
+ PhysicalBatchCollector(PreparedStatementData &data);
60722
+
60723
+ public:
60724
+ unique_ptr<QueryResult> GetResult(GlobalSinkState &state) override;
60725
+
60726
+ public:
60727
+ // Sink interface
60728
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
60729
+ DataChunk &input) const override;
60730
+ void Combine(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate) const override;
60731
+ SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
60732
+ GlobalSinkState &gstate) const override;
60733
+
60734
+ unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
60735
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
60736
+
60737
+ bool RequiresBatchIndex() const override {
60738
+ return true;
60739
+ }
60740
+
60741
+ bool ParallelSink() const override {
60742
+ return true;
60743
+ }
60744
+ };
60745
+
60746
+ } // namespace duckdb
60747
+
60748
+
60749
+
60750
+
60751
+
60752
+ namespace duckdb {
60753
+
60754
+ PhysicalBatchCollector::PhysicalBatchCollector(PreparedStatementData &data) : PhysicalResultCollector(data) {
60755
+ }
60756
+
60757
+ //===--------------------------------------------------------------------===//
60758
+ // Sink
60759
+ //===--------------------------------------------------------------------===//
60760
+ class BatchCollectorGlobalState : public GlobalSinkState {
60761
+ public:
60762
+ mutex glock;
60763
+ BatchedChunkCollection data;
60764
+ unique_ptr<MaterializedQueryResult> result;
60765
+ };
60766
+
60767
+ class BatchCollectorLocalState : public LocalSinkState {
60768
+ public:
60769
+ BatchedChunkCollection data;
60770
+ };
60771
+
60772
+ SinkResultType PhysicalBatchCollector::Sink(ExecutionContext &context, GlobalSinkState &gstate,
60773
+ LocalSinkState &lstate_p, DataChunk &input) const {
60774
+ auto &state = (BatchCollectorLocalState &)lstate_p;
60775
+ state.data.Append(input, state.batch_index);
60776
+ return SinkResultType::NEED_MORE_INPUT;
60777
+ }
60778
+
60779
+ void PhysicalBatchCollector::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
60780
+ LocalSinkState &lstate_p) const {
60781
+ auto &gstate = (BatchCollectorGlobalState &)gstate_p;
60782
+ auto &state = (BatchCollectorLocalState &)lstate_p;
60783
+
60784
+ lock_guard<mutex> lock(gstate.glock);
60785
+ gstate.data.Merge(state.data);
60786
+ }
60787
+
60788
+ SinkFinalizeType PhysicalBatchCollector::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
60789
+ GlobalSinkState &gstate_p) const {
60790
+ auto &gstate = (BatchCollectorGlobalState &)gstate_p;
60791
+ auto result =
60792
+ make_unique<MaterializedQueryResult>(statement_type, properties, types, names, context.shared_from_this());
60793
+ DataChunk output;
60794
+ output.Initialize(types);
60795
+
60796
+ BatchedChunkScanState state;
60797
+ gstate.data.InitializeScan(state);
60798
+ while (true) {
60799
+ output.Reset();
60800
+ gstate.data.Scan(state, output);
60801
+ if (output.size() == 0) {
60802
+ break;
60803
+ }
60804
+ result->collection.Append(output);
60805
+ }
60806
+
60807
+ gstate.result = move(result);
60808
+ return SinkFinalizeType::READY;
60809
+ }
60810
+
60811
+ unique_ptr<LocalSinkState> PhysicalBatchCollector::GetLocalSinkState(ExecutionContext &context) const {
60812
+ return make_unique<BatchCollectorLocalState>();
60813
+ }
60814
+
60815
+ unique_ptr<GlobalSinkState> PhysicalBatchCollector::GetGlobalSinkState(ClientContext &context) const {
60816
+ return make_unique<BatchCollectorGlobalState>();
60817
+ }
60818
+
60819
+ unique_ptr<QueryResult> PhysicalBatchCollector::GetResult(GlobalSinkState &state) {
60820
+ auto &gstate = (BatchCollectorGlobalState &)state;
60821
+ D_ASSERT(gstate.result);
60822
+ return move(gstate.result);
60823
+ }
60824
+
60513
60825
  } // namespace duckdb
60514
60826
  //===----------------------------------------------------------------------===//
60515
60827
  // DuckDB
@@ -60586,6 +60898,12 @@ public:
60586
60898
  PhysicalOperator *plan;
60587
60899
  unique_ptr<PhysicalOperator> owned_plan;
60588
60900
  shared_ptr<PreparedStatementData> prepared;
60901
+
60902
+ public:
60903
+ vector<PhysicalOperator *> GetChildren() const override;
60904
+
60905
+ public:
60906
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
60589
60907
  };
60590
60908
 
60591
60909
  } // namespace duckdb
@@ -60597,6 +60915,15 @@ PhysicalExecute::PhysicalExecute(PhysicalOperator *plan)
60597
60915
  : PhysicalOperator(PhysicalOperatorType::EXECUTE, plan->types, -1), plan(plan) {
60598
60916
  }
60599
60917
 
60918
+ vector<PhysicalOperator *> PhysicalExecute::GetChildren() const {
60919
+ return {plan};
60920
+ }
60921
+
60922
+ void PhysicalExecute::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
60923
+ // EXECUTE statement: build pipeline on child
60924
+ plan->BuildPipelines(executor, current, state);
60925
+ }
60926
+
60600
60927
  } // namespace duckdb
60601
60928
  //===----------------------------------------------------------------------===//
60602
60929
  // DuckDB
@@ -60724,16 +61051,18 @@ namespace duckdb {
60724
61051
  class PhysicalLimit : public PhysicalOperator {
60725
61052
  public:
60726
61053
  PhysicalLimit(vector<LogicalType> types, idx_t limit, idx_t offset, unique_ptr<Expression> limit_expression,
60727
- unique_ptr<Expression> offset_expression, idx_t estimated_cardinality)
60728
- : PhysicalOperator(PhysicalOperatorType::LIMIT, move(types), estimated_cardinality), limit_value(limit),
60729
- offset_value(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)) {
60730
- }
61054
+ unique_ptr<Expression> offset_expression, idx_t estimated_cardinality);
60731
61055
 
60732
61056
  idx_t limit_value;
60733
61057
  idx_t offset_value;
60734
61058
  unique_ptr<Expression> limit_expression;
60735
61059
  unique_ptr<Expression> offset_expression;
60736
61060
 
61061
+ public:
61062
+ bool IsOrderDependent() const override {
61063
+ return true;
61064
+ }
61065
+
60737
61066
  public:
60738
61067
  // Source interface
60739
61068
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
@@ -60742,18 +61071,27 @@ public:
60742
61071
 
60743
61072
  public:
60744
61073
  // Sink Interface
60745
- unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
60746
61074
  SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
60747
61075
  DataChunk &input) const override;
61076
+ void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const override;
61077
+ unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
61078
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
60748
61079
 
60749
61080
  bool IsSink() const override {
60750
61081
  return true;
60751
61082
  }
60752
61083
 
60753
- bool SinkOrderMatters() const override {
61084
+ bool ParallelSink() const override {
61085
+ return true;
61086
+ }
61087
+
61088
+ bool RequiresBatchIndex() const override {
60754
61089
  return true;
60755
61090
  }
60756
61091
 
61092
+ public:
61093
+ static bool ComputeOffset(DataChunk &input, idx_t &limit, idx_t &offset, idx_t current_offset, idx_t &max_element,
61094
+ Expression *limit_expression, Expression *offset_expression);
60757
61095
  static bool HandleOffset(DataChunk &input, idx_t &current_offset, idx_t offset, idx_t limit);
60758
61096
  static Value GetDelimiter(DataChunk &input, Expression *expr);
60759
61097
  };
@@ -60766,14 +61104,75 @@ public:
60766
61104
 
60767
61105
 
60768
61106
 
61107
+ //===----------------------------------------------------------------------===//
61108
+ // DuckDB
61109
+ //
61110
+ // duckdb/execution/operator/helper/physical_streaming_limit.hpp
61111
+ //
61112
+ //
61113
+ //===----------------------------------------------------------------------===//
61114
+
61115
+
61116
+
61117
+
61118
+
61119
+
60769
61120
  namespace duckdb {
60770
61121
 
61122
+ class PhysicalStreamingLimit : public PhysicalOperator {
61123
+ public:
61124
+ PhysicalStreamingLimit(vector<LogicalType> types, idx_t limit, idx_t offset,
61125
+ unique_ptr<Expression> limit_expression, unique_ptr<Expression> offset_expression,
61126
+ idx_t estimated_cardinality, bool parallel);
61127
+
61128
+ idx_t limit_value;
61129
+ idx_t offset_value;
61130
+ unique_ptr<Expression> limit_expression;
61131
+ unique_ptr<Expression> offset_expression;
61132
+ bool parallel;
61133
+
61134
+ public:
61135
+ // Operator interface
61136
+ unique_ptr<OperatorState> GetOperatorState(ClientContext &context) const override;
61137
+ unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
61138
+ OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
61139
+ GlobalOperatorState &gstate, OperatorState &state) const override;
61140
+
61141
+ bool IsOrderDependent() const override;
61142
+ bool ParallelOperator() const override;
61143
+ };
61144
+
61145
+ } // namespace duckdb
61146
+
61147
+
61148
+ namespace duckdb {
61149
+
61150
+ PhysicalLimit::PhysicalLimit(vector<LogicalType> types, idx_t limit, idx_t offset,
61151
+ unique_ptr<Expression> limit_expression, unique_ptr<Expression> offset_expression,
61152
+ idx_t estimated_cardinality)
61153
+ : PhysicalOperator(PhysicalOperatorType::LIMIT, move(types), estimated_cardinality), limit_value(limit),
61154
+ offset_value(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)) {
61155
+ }
61156
+
60771
61157
  //===--------------------------------------------------------------------===//
60772
61158
  // Sink
60773
61159
  //===--------------------------------------------------------------------===//
60774
61160
  class LimitGlobalState : public GlobalSinkState {
60775
61161
  public:
60776
- explicit LimitGlobalState(const PhysicalLimit &op) : current_offset(0) {
61162
+ explicit LimitGlobalState(const PhysicalLimit &op) {
61163
+ limit = 0;
61164
+ offset = 0;
61165
+ }
61166
+
61167
+ mutex glock;
61168
+ idx_t limit;
61169
+ idx_t offset;
61170
+ BatchedChunkCollection data;
61171
+ };
61172
+
61173
+ class LimitLocalState : public LocalSinkState {
61174
+ public:
61175
+ explicit LimitLocalState(const PhysicalLimit &op) : current_offset(0) {
60777
61176
  this->limit = op.limit_expression ? DConstants::INVALID_INDEX : op.limit_value;
60778
61177
  this->offset = op.offset_expression ? DConstants::INVALID_INDEX : op.offset_value;
60779
61178
  }
@@ -60781,31 +61180,30 @@ public:
60781
61180
  idx_t current_offset;
60782
61181
  idx_t limit;
60783
61182
  idx_t offset;
60784
- ChunkCollection data;
61183
+ BatchedChunkCollection data;
60785
61184
  };
60786
61185
 
60787
61186
  unique_ptr<GlobalSinkState> PhysicalLimit::GetGlobalSinkState(ClientContext &context) const {
60788
61187
  return make_unique<LimitGlobalState>(*this);
60789
61188
  }
60790
61189
 
60791
- SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
60792
- DataChunk &input) const {
60793
- D_ASSERT(input.size() > 0);
60794
- auto &state = (LimitGlobalState &)gstate;
60795
- auto &limit = state.limit;
60796
- auto &offset = state.offset;
61190
+ unique_ptr<LocalSinkState> PhysicalLimit::GetLocalSinkState(ExecutionContext &context) const {
61191
+ return make_unique<LimitLocalState>(*this);
61192
+ }
60797
61193
 
61194
+ bool PhysicalLimit::ComputeOffset(DataChunk &input, idx_t &limit, idx_t &offset, idx_t current_offset,
61195
+ idx_t &max_element, Expression *limit_expression, Expression *offset_expression) {
60798
61196
  if (limit != DConstants::INVALID_INDEX && offset != DConstants::INVALID_INDEX) {
60799
- idx_t max_element = limit + offset;
60800
- if ((limit == 0 || state.current_offset >= max_element) && !(limit_expression || offset_expression)) {
60801
- return SinkResultType::FINISHED;
61197
+ max_element = limit + offset;
61198
+ if ((limit == 0 || current_offset >= max_element) && !(limit_expression || offset_expression)) {
61199
+ return false;
60802
61200
  }
60803
61201
  }
60804
61202
 
60805
61203
  // get the next chunk from the child
60806
61204
  if (limit == DConstants::INVALID_INDEX) {
60807
61205
  limit = 1ULL << 62ULL;
60808
- Value val = GetDelimiter(input, limit_expression.get());
61206
+ Value val = GetDelimiter(input, limit_expression);
60809
61207
  if (!val.IsNull()) {
60810
61208
  limit = val.GetValue<idx_t>();
60811
61209
  }
@@ -60815,7 +61213,7 @@ SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &g
60815
61213
  }
60816
61214
  if (offset == DConstants::INVALID_INDEX) {
60817
61215
  offset = 0;
60818
- Value val = GetDelimiter(input, offset_expression.get());
61216
+ Value val = GetDelimiter(input, offset_expression);
60819
61217
  if (!val.IsNull()) {
60820
61218
  offset = val.GetValue<idx_t>();
60821
61219
  }
@@ -60823,42 +61221,77 @@ SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &g
60823
61221
  throw BinderException("Max value %lld for LIMIT/OFFSET is %lld", offset, 1ULL << 62ULL);
60824
61222
  }
60825
61223
  }
60826
- idx_t max_element = limit + offset;
60827
- if (limit == 0 || state.current_offset >= max_element) {
60828
- return SinkResultType::FINISHED;
60829
- }
60830
- if (!HandleOffset(input, state.current_offset, offset, limit)) {
60831
- return SinkResultType::NEED_MORE_INPUT;
61224
+ max_element = limit + offset;
61225
+ if (limit == 0 || current_offset >= max_element) {
61226
+ return false;
60832
61227
  }
61228
+ return true;
61229
+ }
60833
61230
 
60834
- state.data.Append(input);
61231
+ SinkResultType PhysicalLimit::Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
61232
+ DataChunk &input) const {
61233
+
61234
+ D_ASSERT(input.size() > 0);
61235
+ auto &state = (LimitLocalState &)lstate;
61236
+ auto &limit = state.limit;
61237
+ auto &offset = state.offset;
61238
+
61239
+ idx_t max_element;
61240
+ if (!ComputeOffset(input, limit, offset, state.current_offset, max_element, limit_expression.get(),
61241
+ offset_expression.get())) {
61242
+ return SinkResultType::FINISHED;
61243
+ }
61244
+ state.data.Append(input, lstate.batch_index);
61245
+ state.current_offset += input.size();
60835
61246
  return SinkResultType::NEED_MORE_INPUT;
60836
61247
  }
60837
61248
 
61249
+ void PhysicalLimit::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
61250
+ auto &gstate = (LimitGlobalState &)gstate_p;
61251
+ auto &state = (LimitLocalState &)lstate_p;
61252
+
61253
+ lock_guard<mutex> lock(gstate.glock);
61254
+ gstate.limit = state.limit;
61255
+ gstate.offset = state.offset;
61256
+ gstate.data.Merge(state.data);
61257
+ }
61258
+
60838
61259
  //===--------------------------------------------------------------------===//
60839
61260
  // Source
60840
61261
  //===--------------------------------------------------------------------===//
60841
- class LimitOperatorState : public GlobalSourceState {
61262
+ class LimitSourceState : public GlobalSourceState {
60842
61263
  public:
60843
- LimitOperatorState() : chunk_idx(0) {
61264
+ LimitSourceState() {
61265
+ initialized = false;
61266
+ current_offset = 0;
60844
61267
  }
60845
61268
 
60846
- idx_t chunk_idx;
61269
+ bool initialized;
61270
+ idx_t current_offset;
61271
+ BatchedChunkScanState scan_state;
60847
61272
  };
60848
61273
 
60849
61274
  unique_ptr<GlobalSourceState> PhysicalLimit::GetGlobalSourceState(ClientContext &context) const {
60850
- return make_unique<LimitOperatorState>();
61275
+ return make_unique<LimitSourceState>();
60851
61276
  }
60852
61277
 
60853
61278
  void PhysicalLimit::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
60854
61279
  LocalSourceState &lstate) const {
60855
61280
  auto &gstate = (LimitGlobalState &)*sink_state;
60856
- auto &state = (LimitOperatorState &)gstate_p;
60857
- if (state.chunk_idx >= gstate.data.ChunkCount()) {
60858
- return;
61281
+ auto &state = (LimitSourceState &)gstate_p;
61282
+ while (state.current_offset < gstate.limit + gstate.offset) {
61283
+ if (!state.initialized) {
61284
+ gstate.data.InitializeScan(state.scan_state);
61285
+ state.initialized = true;
61286
+ }
61287
+ gstate.data.Scan(state.scan_state, chunk);
61288
+ if (chunk.size() == 0) {
61289
+ break;
61290
+ }
61291
+ if (HandleOffset(chunk, state.current_offset, gstate.offset, gstate.limit)) {
61292
+ break;
61293
+ }
60859
61294
  }
60860
- chunk.Reference(gstate.data.GetChunk(state.chunk_idx));
60861
- state.chunk_idx++;
60862
61295
  }
60863
61296
 
60864
61297
  bool PhysicalLimit::HandleOffset(DataChunk &input, idx_t &current_offset, idx_t offset, idx_t limit) {
@@ -60948,6 +61381,11 @@ public:
60948
61381
  unique_ptr<Expression> limit_expression;
60949
61382
  unique_ptr<Expression> offset_expression;
60950
61383
 
61384
+ public:
61385
+ bool IsOrderDependent() const override {
61386
+ return true;
61387
+ }
61388
+
60951
61389
  public:
60952
61390
  // Source interface
60953
61391
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
@@ -60963,10 +61401,6 @@ public:
60963
61401
  bool IsSink() const override {
60964
61402
  return true;
60965
61403
  }
60966
-
60967
- bool SinkOrderMatters() const override {
60968
- return true;
60969
- }
60970
61404
  };
60971
61405
 
60972
61406
  } // namespace duckdb
@@ -61182,6 +61616,86 @@ void PhysicalLoad::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSo
61182
61616
  }
61183
61617
  }
61184
61618
 
61619
+ } // namespace duckdb
61620
+ //===----------------------------------------------------------------------===//
61621
+ // DuckDB
61622
+ //
61623
+ // duckdb/execution/operator/helper/physical_materialized_collector.hpp
61624
+ //
61625
+ //
61626
+ //===----------------------------------------------------------------------===//
61627
+
61628
+
61629
+
61630
+
61631
+
61632
+ namespace duckdb {
61633
+
61634
+ class PhysicalMaterializedCollector : public PhysicalResultCollector {
61635
+ public:
61636
+ PhysicalMaterializedCollector(PreparedStatementData &data, bool parallel);
61637
+
61638
+ bool parallel;
61639
+
61640
+ public:
61641
+ unique_ptr<QueryResult> GetResult(GlobalSinkState &state) override;
61642
+
61643
+ public:
61644
+ // Sink interface
61645
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
61646
+ DataChunk &input) const override;
61647
+
61648
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
61649
+
61650
+ bool ParallelSink() const override;
61651
+ };
61652
+
61653
+ } // namespace duckdb
61654
+
61655
+
61656
+
61657
+
61658
+
61659
+ namespace duckdb {
61660
+
61661
+ PhysicalMaterializedCollector::PhysicalMaterializedCollector(PreparedStatementData &data, bool parallel)
61662
+ : PhysicalResultCollector(data), parallel(parallel) {
61663
+ }
61664
+
61665
+ //===--------------------------------------------------------------------===//
61666
+ // Sink
61667
+ //===--------------------------------------------------------------------===//
61668
+ class MaterializedCollectorGlobalState : public GlobalSinkState {
61669
+ public:
61670
+ mutex glock;
61671
+ unique_ptr<MaterializedQueryResult> result;
61672
+ };
61673
+
61674
+ SinkResultType PhysicalMaterializedCollector::Sink(ExecutionContext &context, GlobalSinkState &gstate_p,
61675
+ LocalSinkState &lstate, DataChunk &input) const {
61676
+ auto &gstate = (MaterializedCollectorGlobalState &)gstate_p;
61677
+ lock_guard<mutex> lock(gstate.glock);
61678
+ gstate.result->collection.Append(input);
61679
+ return SinkResultType::NEED_MORE_INPUT;
61680
+ }
61681
+
61682
+ unique_ptr<GlobalSinkState> PhysicalMaterializedCollector::GetGlobalSinkState(ClientContext &context) const {
61683
+ auto state = make_unique<MaterializedCollectorGlobalState>();
61684
+ state->result =
61685
+ make_unique<MaterializedQueryResult>(statement_type, properties, types, names, context.shared_from_this());
61686
+ return move(state);
61687
+ }
61688
+
61689
+ unique_ptr<QueryResult> PhysicalMaterializedCollector::GetResult(GlobalSinkState &state) {
61690
+ auto &gstate = (MaterializedCollectorGlobalState &)state;
61691
+ D_ASSERT(gstate.result);
61692
+ return move(gstate.result);
61693
+ }
61694
+
61695
+ bool PhysicalMaterializedCollector::ParallelSink() const {
61696
+ return parallel;
61697
+ }
61698
+
61185
61699
  } // namespace duckdb
61186
61700
  //===----------------------------------------------------------------------===//
61187
61701
  // DuckDB
@@ -61520,6 +62034,55 @@ string PhysicalReservoirSample::ParamsToString() const {
61520
62034
  }
61521
62035
 
61522
62036
  } // namespace duckdb
62037
+
62038
+
62039
+
62040
+
62041
+
62042
+
62043
+
62044
+ namespace duckdb {
62045
+
62046
+ PhysicalResultCollector::PhysicalResultCollector(PreparedStatementData &data)
62047
+ : PhysicalOperator(PhysicalOperatorType::RESULT_COLLECTOR, {LogicalType::BOOLEAN}, 0),
62048
+ statement_type(data.statement_type), properties(data.properties), plan(data.plan.get()), names(data.names) {
62049
+ this->types = data.types;
62050
+ }
62051
+
62052
+ unique_ptr<PhysicalResultCollector> PhysicalResultCollector::GetResultCollector(ClientContext &context,
62053
+ PreparedStatementData &data) {
62054
+ auto &config = DBConfig::GetConfig(context);
62055
+ bool use_materialized_collector = !config.preserve_insertion_order || !data.plan->AllSourcesSupportBatchIndex();
62056
+ if (use_materialized_collector) {
62057
+ // parallel materialized collector only if we don't care about maintaining insertion order
62058
+ return make_unique_base<PhysicalResultCollector, PhysicalMaterializedCollector>(
62059
+ data, !config.preserve_insertion_order);
62060
+ } else {
62061
+ // we care about maintaining insertion order and the sources all support batch indexes
62062
+ // use a batch collector
62063
+ return make_unique_base<PhysicalResultCollector, PhysicalBatchCollector>(data);
62064
+ }
62065
+ }
62066
+
62067
+ vector<PhysicalOperator *> PhysicalResultCollector::GetChildren() const {
62068
+ return {plan};
62069
+ }
62070
+
62071
+ void PhysicalResultCollector::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
62072
+ // operator is a sink, build a pipeline
62073
+ sink_state.reset();
62074
+
62075
+ // single operator:
62076
+ // the operator becomes the data source of the current pipeline
62077
+ state.SetPipelineSource(current, this);
62078
+ // we create a new pipeline starting from the child
62079
+ D_ASSERT(children.size() == 0);
62080
+ D_ASSERT(plan);
62081
+
62082
+ BuildChildPipeline(executor, current, state, plan);
62083
+ }
62084
+
62085
+ } // namespace duckdb
61523
62086
  //===----------------------------------------------------------------------===//
61524
62087
  // DuckDB
61525
62088
  //
@@ -61633,6 +62196,77 @@ void PhysicalSet::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSou
61633
62196
  }
61634
62197
 
61635
62198
  } // namespace duckdb
62199
+
62200
+
62201
+
62202
+ namespace duckdb {
62203
+
62204
+ PhysicalStreamingLimit::PhysicalStreamingLimit(vector<LogicalType> types, idx_t limit, idx_t offset,
62205
+ unique_ptr<Expression> limit_expression,
62206
+ unique_ptr<Expression> offset_expression, idx_t estimated_cardinality,
62207
+ bool parallel)
62208
+ : PhysicalOperator(PhysicalOperatorType::STREAMING_LIMIT, move(types), estimated_cardinality), limit_value(limit),
62209
+ offset_value(offset), limit_expression(move(limit_expression)), offset_expression(move(offset_expression)),
62210
+ parallel(parallel) {
62211
+ }
62212
+
62213
+ //===--------------------------------------------------------------------===//
62214
+ // Operator
62215
+ //===--------------------------------------------------------------------===//
62216
+ class StreamingLimitOperatorState : public OperatorState {
62217
+ public:
62218
+ explicit StreamingLimitOperatorState(const PhysicalStreamingLimit &op) {
62219
+ this->limit = op.limit_expression ? DConstants::INVALID_INDEX : op.limit_value;
62220
+ this->offset = op.offset_expression ? DConstants::INVALID_INDEX : op.offset_value;
62221
+ }
62222
+
62223
+ idx_t limit;
62224
+ idx_t offset;
62225
+ };
62226
+
62227
+ class StreamingLimitGlobalState : public GlobalOperatorState {
62228
+ public:
62229
+ StreamingLimitGlobalState() : current_offset(0) {
62230
+ }
62231
+
62232
+ std::atomic<idx_t> current_offset;
62233
+ };
62234
+
62235
+ unique_ptr<OperatorState> PhysicalStreamingLimit::GetOperatorState(ClientContext &context) const {
62236
+ return make_unique<StreamingLimitOperatorState>(*this);
62237
+ }
62238
+
62239
+ unique_ptr<GlobalOperatorState> PhysicalStreamingLimit::GetGlobalOperatorState(ClientContext &context) const {
62240
+ return make_unique<StreamingLimitGlobalState>();
62241
+ }
62242
+
62243
+ OperatorResultType PhysicalStreamingLimit::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
62244
+ GlobalOperatorState &gstate_p, OperatorState &state_p) const {
62245
+ auto &gstate = (StreamingLimitGlobalState &)gstate_p;
62246
+ auto &state = (StreamingLimitOperatorState &)state_p;
62247
+ auto &limit = state.limit;
62248
+ auto &offset = state.offset;
62249
+ idx_t current_offset = gstate.current_offset.fetch_add(input.size());
62250
+ idx_t max_element;
62251
+ if (!PhysicalLimit::ComputeOffset(input, limit, offset, current_offset, max_element, limit_expression.get(),
62252
+ offset_expression.get())) {
62253
+ return OperatorResultType::FINISHED;
62254
+ }
62255
+ if (PhysicalLimit::HandleOffset(input, current_offset, offset, limit)) {
62256
+ chunk.Reference(input);
62257
+ }
62258
+ return OperatorResultType::NEED_MORE_INPUT;
62259
+ }
62260
+
62261
+ bool PhysicalStreamingLimit::IsOrderDependent() const {
62262
+ return !parallel;
62263
+ }
62264
+
62265
+ bool PhysicalStreamingLimit::ParallelOperator() const {
62266
+ return parallel;
62267
+ }
62268
+
62269
+ } // namespace duckdb
61636
62270
  //===----------------------------------------------------------------------===//
61637
62271
  // DuckDB
61638
62272
  //
@@ -62002,6 +62636,12 @@ public:
62002
62636
  static void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left, DataChunk &result, bool found_match[],
62003
62637
  bool has_null);
62004
62638
  static void ConstructLeftJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
62639
+
62640
+ public:
62641
+ static void BuildJoinPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state,
62642
+ PhysicalOperator &op);
62643
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
62644
+ vector<const PhysicalOperator *> GetSources() const override;
62005
62645
  };
62006
62646
 
62007
62647
  } // namespace duckdb
@@ -62819,6 +63459,10 @@ public:
62819
63459
  bool ParallelSink() const override {
62820
63460
  return true;
62821
63461
  }
63462
+
63463
+ public:
63464
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
63465
+ vector<const PhysicalOperator *> GetSources() const override;
62822
63466
  };
62823
63467
 
62824
63468
  } // namespace duckdb
@@ -62826,6 +63470,7 @@ public:
62826
63470
 
62827
63471
 
62828
63472
 
63473
+
62829
63474
  namespace duckdb {
62830
63475
 
62831
63476
  PhysicalCrossProduct::PhysicalCrossProduct(vector<LogicalType> types, unique_ptr<PhysicalOperator> left,
@@ -62912,6 +63557,17 @@ OperatorResultType PhysicalCrossProduct::Execute(ExecutionContext &context, Data
62912
63557
  return OperatorResultType::HAVE_MORE_OUTPUT;
62913
63558
  }
62914
63559
 
63560
+ //===--------------------------------------------------------------------===//
63561
+ // Pipeline Construction
63562
+ //===--------------------------------------------------------------------===//
63563
+ void PhysicalCrossProduct::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
63564
+ PhysicalJoin::BuildJoinPipelines(executor, current, state, *this);
63565
+ }
63566
+
63567
+ vector<const PhysicalOperator *> PhysicalCrossProduct::GetSources() const {
63568
+ return children[0]->GetSources();
63569
+ }
63570
+
62915
63571
  } // namespace duckdb
62916
63572
 
62917
63573
 
@@ -62947,6 +63603,66 @@ public:
62947
63603
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
62948
63604
  void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
62949
63605
  LocalSourceState &lstate) const override;
63606
+
63607
+ public:
63608
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
63609
+ };
63610
+
63611
+ } // namespace duckdb
63612
+
63613
+
63614
+ //===----------------------------------------------------------------------===//
63615
+ // DuckDB
63616
+ //
63617
+ // duckdb/execution/operator/set/physical_recursive_cte.hpp
63618
+ //
63619
+ //
63620
+ //===----------------------------------------------------------------------===//
63621
+
63622
+
63623
+
63624
+
63625
+
63626
+ namespace duckdb {
63627
+ class Pipeline;
63628
+ class RecursiveCTEState;
63629
+
63630
+ class PhysicalRecursiveCTE : public PhysicalOperator {
63631
+ public:
63632
+ PhysicalRecursiveCTE(vector<LogicalType> types, bool union_all, unique_ptr<PhysicalOperator> top,
63633
+ unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality);
63634
+ ~PhysicalRecursiveCTE() override;
63635
+
63636
+ bool union_all;
63637
+ std::shared_ptr<ChunkCollection> working_table;
63638
+ vector<shared_ptr<Pipeline>> pipelines;
63639
+
63640
+ public:
63641
+ // Source interface
63642
+ void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
63643
+ LocalSourceState &lstate) const override;
63644
+
63645
+ public:
63646
+ // Sink interface
63647
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
63648
+ DataChunk &input) const override;
63649
+
63650
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
63651
+
63652
+ bool IsSink() const override {
63653
+ return true;
63654
+ }
63655
+
63656
+ public:
63657
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
63658
+
63659
+ vector<const PhysicalOperator *> GetSources() const override;
63660
+
63661
+ private:
63662
+ //! Probe Hash Table and eliminate duplicate rows
63663
+ idx_t ProbeHT(DataChunk &chunk, RecursiveCTEState &state) const;
63664
+
63665
+ void ExecuteRecursivePipelines(ExecutionContext &context) const;
62950
63666
  };
62951
63667
 
62952
63668
  } // namespace duckdb
@@ -62972,6 +63688,16 @@ PhysicalDelimJoin::PhysicalDelimJoin(vector<LogicalType> types, unique_ptr<Physi
62972
63688
  join->children[0] = move(cached_chunk_scan);
62973
63689
  }
62974
63690
 
63691
+ vector<PhysicalOperator *> PhysicalDelimJoin::GetChildren() const {
63692
+ vector<PhysicalOperator *> result;
63693
+ for (auto &child : children) {
63694
+ result.push_back(child.get());
63695
+ }
63696
+ result.push_back(join.get());
63697
+ result.push_back(distinct.get());
63698
+ return result;
63699
+ }
63700
+
62975
63701
  //===--------------------------------------------------------------------===//
62976
63702
  // Sink
62977
63703
  //===--------------------------------------------------------------------===//
@@ -63045,6 +63771,40 @@ string PhysicalDelimJoin::ParamsToString() const {
63045
63771
  return join->ParamsToString();
63046
63772
  }
63047
63773
 
63774
+ //===--------------------------------------------------------------------===//
63775
+ // Pipeline Construction
63776
+ //===--------------------------------------------------------------------===//
63777
+ void PhysicalDelimJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
63778
+ op_state.reset();
63779
+ sink_state.reset();
63780
+
63781
+ // duplicate eliminated join
63782
+ auto pipeline = make_shared<Pipeline>(executor);
63783
+ state.SetPipelineSink(*pipeline, this);
63784
+ current.AddDependency(pipeline);
63785
+
63786
+ // recurse into the pipeline child
63787
+ children[0]->BuildPipelines(executor, *pipeline, state);
63788
+ if (type == PhysicalOperatorType::DELIM_JOIN) {
63789
+ // recurse into the actual join
63790
+ // any pipelines in there depend on the main pipeline
63791
+ // any scan of the duplicate eliminated data on the RHS depends on this pipeline
63792
+ // we add an entry to the mapping of (PhysicalOperator*) -> (Pipeline*)
63793
+ for (auto &delim_scan : delim_scans) {
63794
+ state.delim_join_dependencies[delim_scan] = pipeline.get();
63795
+ }
63796
+ join->BuildPipelines(executor, current, state);
63797
+ }
63798
+ if (!state.recursive_cte) {
63799
+ // regular pipeline: schedule it
63800
+ state.AddPipeline(executor, move(pipeline));
63801
+ } else {
63802
+ // CTE pipeline! add it to the CTE pipelines
63803
+ auto &cte = (PhysicalRecursiveCTE &)*state.recursive_cte;
63804
+ cte.pipelines.push_back(move(pipeline));
63805
+ }
63806
+ }
63807
+
63048
63808
  } // namespace duckdb
63049
63809
 
63050
63810
 
@@ -63616,6 +64376,9 @@ public:
63616
64376
  return true;
63617
64377
  }
63618
64378
 
64379
+ public:
64380
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
64381
+
63619
64382
  private:
63620
64383
  // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
63621
64384
  void ResolveComplexJoin(ExecutionContext &context, DataChunk &result, LocalSourceState &state) const;
@@ -64725,6 +65488,38 @@ void PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &result, Globa
64725
65488
  }
64726
65489
  }
64727
65490
 
65491
+ //===--------------------------------------------------------------------===//
65492
+ // Pipeline Construction
65493
+ //===--------------------------------------------------------------------===//
65494
+ void PhysicalIEJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
65495
+ D_ASSERT(children.size() == 2);
65496
+ if (state.recursive_cte) {
65497
+ throw NotImplementedException("IEJoins are not supported in recursive CTEs yet");
65498
+ }
65499
+
65500
+ // Build the LHS
65501
+ auto lhs_pipeline = make_shared<Pipeline>(executor);
65502
+ state.SetPipelineSink(*lhs_pipeline, this);
65503
+ D_ASSERT(children[0].get());
65504
+ children[0]->BuildPipelines(executor, *lhs_pipeline, state);
65505
+
65506
+ // Build the RHS
65507
+ auto rhs_pipeline = make_shared<Pipeline>(executor);
65508
+ state.SetPipelineSink(*rhs_pipeline, this);
65509
+ D_ASSERT(children[1].get());
65510
+ children[1]->BuildPipelines(executor, *rhs_pipeline, state);
65511
+
65512
+ // RHS => LHS => current
65513
+ current.AddDependency(rhs_pipeline);
65514
+ rhs_pipeline->AddDependency(lhs_pipeline);
65515
+
65516
+ state.AddPipeline(executor, move(lhs_pipeline));
65517
+ state.AddPipeline(executor, move(rhs_pipeline));
65518
+
65519
+ // Now build both and scan
65520
+ state.SetPipelineSource(current, this);
65521
+ }
65522
+
64728
65523
  } // namespace duckdb
64729
65524
  //===----------------------------------------------------------------------===//
64730
65525
  // DuckDB
@@ -64790,6 +65585,10 @@ public:
64790
65585
  return true;
64791
65586
  }
64792
65587
 
65588
+ public:
65589
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
65590
+ vector<const PhysicalOperator *> GetSources() const override;
65591
+
64793
65592
  private:
64794
65593
  void GetRHSMatches(ExecutionContext &context, DataChunk &input, OperatorState &state_p) const;
64795
65594
  //! Fills result chunk
@@ -64849,10 +65648,18 @@ public:
64849
65648
  unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
64850
65649
  void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
64851
65650
  LocalSourceState &lstate) const override;
65651
+ idx_t GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
65652
+ LocalSourceState &lstate) const override;
64852
65653
 
64853
65654
  bool ParallelSource() const override {
64854
65655
  return true;
64855
65656
  }
65657
+
65658
+ bool SupportsBatchIndex() const override {
65659
+ return function.supports_batch_index;
65660
+ }
65661
+
65662
+ double GetProgress(ClientContext &context, GlobalSourceState &gstate) const override;
64856
65663
  };
64857
65664
 
64858
65665
  } // namespace duckdb
@@ -65097,9 +65904,25 @@ OperatorResultType PhysicalIndexJoin::Execute(ExecutionContext &context, DataChu
65097
65904
  return OperatorResultType::HAVE_MORE_OUTPUT;
65098
65905
  }
65099
65906
 
65907
+ //===--------------------------------------------------------------------===//
65908
+ // Pipeline Construction
65909
+ //===--------------------------------------------------------------------===//
65910
+ void PhysicalIndexJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
65911
+ // index join: we only continue into the LHS
65912
+ // the right side is probed by the index join
65913
+ // so we don't need to do anything in the pipeline with this child
65914
+ state.AddPipelineOperator(current, this);
65915
+ children[0]->BuildPipelines(executor, current, state);
65916
+ }
65917
+
65918
+ vector<const PhysicalOperator *> PhysicalIndexJoin::GetSources() const {
65919
+ return children[0]->GetSources();
65920
+ }
65921
+
65100
65922
  } // namespace duckdb
65101
65923
 
65102
65924
 
65925
+
65103
65926
  namespace duckdb {
65104
65927
 
65105
65928
  PhysicalJoin::PhysicalJoin(LogicalOperator &op, PhysicalOperatorType type, JoinType join_type,
@@ -65119,6 +65942,44 @@ bool PhysicalJoin::EmptyResultIfRHSIsEmpty() const {
65119
65942
  }
65120
65943
  }
65121
65944
 
65945
+ //===--------------------------------------------------------------------===//
65946
+ // Pipeline Construction
65947
+ //===--------------------------------------------------------------------===//
65948
+ void PhysicalJoin::BuildJoinPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state,
65949
+ PhysicalOperator &op) {
65950
+ op.op_state.reset();
65951
+ op.sink_state.reset();
65952
+
65953
+ // on the LHS (probe child), the operator becomes a regular operator
65954
+ state.AddPipelineOperator(current, &op);
65955
+ if (op.IsSource()) {
65956
+ // FULL or RIGHT outer join
65957
+ // schedule a scan of the node as a child pipeline
65958
+ // this scan has to be performed AFTER all the probing has happened
65959
+ if (state.recursive_cte) {
65960
+ throw NotImplementedException("FULL and RIGHT outer joins are not supported in recursive CTEs yet");
65961
+ }
65962
+ state.AddChildPipeline(executor, current);
65963
+ }
65964
+ // continue building the pipeline on this child
65965
+ op.children[0]->BuildPipelines(executor, current, state);
65966
+
65967
+ // on the RHS (build side), we construct a new child pipeline with this pipeline as its source
65968
+ op.BuildChildPipeline(executor, current, state, op.children[1].get());
65969
+ }
65970
+
65971
+ void PhysicalJoin::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
65972
+ PhysicalJoin::BuildJoinPipelines(executor, current, state, *this);
65973
+ }
65974
+
65975
+ vector<const PhysicalOperator *> PhysicalJoin::GetSources() const {
65976
+ auto result = children[0]->GetSources();
65977
+ if (IsSource()) {
65978
+ result.push_back(this);
65979
+ }
65980
+ return result;
65981
+ }
65982
+
65122
65983
  } // namespace duckdb
65123
65984
  //===----------------------------------------------------------------------===//
65124
65985
  // DuckDB
@@ -70046,6 +70907,10 @@ public:
70046
70907
  bool IsSink() const override {
70047
70908
  return true;
70048
70909
  }
70910
+
70911
+ public:
70912
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
70913
+ vector<const PhysicalOperator *> GetSources() const override;
70049
70914
  };
70050
70915
 
70051
70916
  } // namespace duckdb
@@ -70057,6 +70922,7 @@ public:
70057
70922
 
70058
70923
 
70059
70924
 
70925
+
70060
70926
  #include <algorithm>
70061
70927
  #include <sstream>
70062
70928
 
@@ -70221,6 +71087,23 @@ SinkResultType PhysicalExport::Sink(ExecutionContext &context, GlobalSinkState &
70221
71087
  return SinkResultType::NEED_MORE_INPUT;
70222
71088
  }
70223
71089
 
71090
+ //===--------------------------------------------------------------------===//
71091
+ // Pipeline Construction
71092
+ //===--------------------------------------------------------------------===//
71093
+ void PhysicalExport::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
71094
+ // EXPORT has an optional child
71095
+ // we only need to schedule child pipelines if there is a child
71096
+ state.SetPipelineSource(current, this);
71097
+ if (children.empty()) {
71098
+ return;
71099
+ }
71100
+ PhysicalOperator::BuildPipelines(executor, current, state);
71101
+ }
71102
+
71103
+ vector<const PhysicalOperator *> PhysicalExport::GetSources() const {
71104
+ return {this};
71105
+ }
71106
+
70224
71107
  } // namespace duckdb
70225
71108
  //===----------------------------------------------------------------------===//
70226
71109
  // DuckDB
@@ -71159,6 +72042,8 @@ OperatorResultType PhysicalUnnest::Execute(ExecutionContext &context, DataChunk
71159
72042
  } // namespace duckdb
71160
72043
 
71161
72044
 
72045
+
72046
+
71162
72047
  namespace duckdb {
71163
72048
 
71164
72049
  class PhysicalChunkScanState : public GlobalSourceState {
@@ -71190,6 +72075,38 @@ void PhysicalChunkScan::GetData(ExecutionContext &context, DataChunk &chunk, Glo
71190
72075
  state.chunk_index++;
71191
72076
  }
71192
72077
 
72078
+ //===--------------------------------------------------------------------===//
72079
+ // Pipeline Construction
72080
+ //===--------------------------------------------------------------------===//
72081
+ void PhysicalChunkScan::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
72082
+ // check if there is any additional action we need to do depending on the type
72083
+ switch (type) {
72084
+ case PhysicalOperatorType::DELIM_SCAN: {
72085
+ auto entry = state.delim_join_dependencies.find(this);
72086
+ D_ASSERT(entry != state.delim_join_dependencies.end());
72087
+ // this chunk scan introduces a dependency to the current pipeline
72088
+ // namely a dependency on the duplicate elimination pipeline to finish
72089
+ auto delim_dependency = entry->second->shared_from_this();
72090
+ auto delim_sink = state.GetPipelineSink(*delim_dependency);
72091
+ D_ASSERT(delim_sink);
72092
+ D_ASSERT(delim_sink->type == PhysicalOperatorType::DELIM_JOIN);
72093
+ auto &delim_join = (PhysicalDelimJoin &)*delim_sink;
72094
+ current.AddDependency(delim_dependency);
72095
+ state.SetPipelineSource(current, (PhysicalOperator *)delim_join.distinct.get());
72096
+ return;
72097
+ }
72098
+ case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
72099
+ if (!state.recursive_cte) {
72100
+ throw InternalException("Recursive CTE scan found without recursive CTE node");
72101
+ }
72102
+ break;
72103
+ default:
72104
+ break;
72105
+ }
72106
+ D_ASSERT(children.empty());
72107
+ state.SetPipelineSource(current, this);
72108
+ }
72109
+
71193
72110
  } // namespace duckdb
71194
72111
  //===----------------------------------------------------------------------===//
71195
72112
  // DuckDB
@@ -71499,6 +72416,24 @@ void PhysicalTableScan::GetData(ExecutionContext &context, DataChunk &chunk, Glo
71499
72416
  }
71500
72417
  }
71501
72418
 
72419
+ double PhysicalTableScan::GetProgress(ClientContext &context, GlobalSourceState &gstate_p) const {
72420
+ if (function.table_scan_progress) {
72421
+ return function.table_scan_progress(context, bind_data.get());
72422
+ }
72423
+ // if table_scan_progress is not implemented we don't support this function yet in the progress bar
72424
+ return -1;
72425
+ }
72426
+
72427
+ idx_t PhysicalTableScan::GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
72428
+ LocalSourceState &lstate) const {
72429
+ D_ASSERT(SupportsBatchIndex());
72430
+ D_ASSERT(function.get_batch_index);
72431
+ auto &gstate = (TableScanGlobalState &)gstate_p;
72432
+ auto &state = (TableScanLocalState &)lstate;
72433
+ return function.get_batch_index(context.client, bind_data.get(), state.operator_data.get(),
72434
+ gstate.parallel_state.get());
72435
+ }
72436
+
71502
72437
  string PhysicalTableScan::GetName() const {
71503
72438
  return StringUtil::Upper(function.name);
71504
72439
  }
@@ -72325,56 +73260,7 @@ void PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSo
72325
73260
  }
72326
73261
 
72327
73262
  } // namespace duckdb
72328
- //===----------------------------------------------------------------------===//
72329
- // DuckDB
72330
- //
72331
- // duckdb/execution/operator/set/physical_recursive_cte.hpp
72332
- //
72333
- //
72334
- //===----------------------------------------------------------------------===//
72335
-
72336
-
72337
-
72338
-
72339
-
72340
- namespace duckdb {
72341
- class Pipeline;
72342
- class RecursiveCTEState;
72343
-
72344
- class PhysicalRecursiveCTE : public PhysicalOperator {
72345
- public:
72346
- PhysicalRecursiveCTE(vector<LogicalType> types, bool union_all, unique_ptr<PhysicalOperator> top,
72347
- unique_ptr<PhysicalOperator> bottom, idx_t estimated_cardinality);
72348
- ~PhysicalRecursiveCTE() override;
72349
-
72350
- bool union_all;
72351
- std::shared_ptr<ChunkCollection> working_table;
72352
- vector<shared_ptr<Pipeline>> pipelines;
72353
-
72354
- public:
72355
- // Source interface
72356
- void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
72357
- LocalSourceState &lstate) const override;
72358
-
72359
- public:
72360
- // Sink interface
72361
- SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
72362
- DataChunk &input) const override;
72363
-
72364
- unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
72365
-
72366
- bool IsSink() const override {
72367
- return true;
72368
- }
72369
73263
 
72370
- private:
72371
- //! Probe Hash Table and eliminate duplicate rows
72372
- idx_t ProbeHT(DataChunk &chunk, RecursiveCTEState &state) const;
72373
-
72374
- void ExecuteRecursivePipelines(ExecutionContext &context) const;
72375
- };
72376
-
72377
- } // namespace duckdb
72378
73264
 
72379
73265
 
72380
73266
 
@@ -72523,6 +73409,40 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
72523
73409
  }
72524
73410
  }
72525
73411
 
73412
+ //===--------------------------------------------------------------------===//
73413
+ // Pipeline Construction
73414
+ //===--------------------------------------------------------------------===//
73415
+ void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
73416
+ op_state.reset();
73417
+ sink_state.reset();
73418
+
73419
+ // recursive CTE
73420
+ state.SetPipelineSource(current, this);
73421
+ // the LHS of the recursive CTE is our initial state
73422
+ // we build this pipeline as normal
73423
+ auto pipeline_child = children[0].get();
73424
+ // for the RHS, we gather all pipelines that depend on the recursive cte
73425
+ // these pipelines need to be rerun
73426
+ if (state.recursive_cte) {
73427
+ throw InternalException("Recursive CTE detected WITHIN a recursive CTE node");
73428
+ }
73429
+ state.recursive_cte = this;
73430
+
73431
+ auto recursive_pipeline = make_shared<Pipeline>(executor);
73432
+ state.SetPipelineSink(*recursive_pipeline, this);
73433
+ children[1]->BuildPipelines(executor, *recursive_pipeline, state);
73434
+
73435
+ pipelines.push_back(move(recursive_pipeline));
73436
+
73437
+ state.recursive_cte = nullptr;
73438
+
73439
+ BuildChildPipeline(executor, current, state, pipeline_child);
73440
+ }
73441
+
73442
+ vector<const PhysicalOperator *> PhysicalRecursiveCTE::GetSources() const {
73443
+ return {this};
73444
+ }
73445
+
72526
73446
  } // namespace duckdb
72527
73447
  //===----------------------------------------------------------------------===//
72528
73448
  // DuckDB
@@ -72541,12 +73461,17 @@ class PhysicalUnion : public PhysicalOperator {
72541
73461
  public:
72542
73462
  PhysicalUnion(vector<LogicalType> types, unique_ptr<PhysicalOperator> top, unique_ptr<PhysicalOperator> bottom,
72543
73463
  idx_t estimated_cardinality);
73464
+
73465
+ public:
73466
+ void BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) override;
73467
+ vector<const PhysicalOperator *> GetSources() const override;
72544
73468
  };
72545
73469
 
72546
73470
  } // namespace duckdb
72547
73471
 
72548
73472
 
72549
73473
 
73474
+
72550
73475
  namespace duckdb {
72551
73476
 
72552
73477
  PhysicalUnion::PhysicalUnion(vector<LogicalType> types, unique_ptr<PhysicalOperator> top,
@@ -72556,6 +73481,49 @@ PhysicalUnion::PhysicalUnion(vector<LogicalType> types, unique_ptr<PhysicalOpera
72556
73481
  children.push_back(move(bottom));
72557
73482
  }
72558
73483
 
73484
+ //===--------------------------------------------------------------------===//
73485
+ // Pipeline Construction
73486
+ //===--------------------------------------------------------------------===//
73487
+ void PhysicalUnion::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
73488
+ if (state.recursive_cte) {
73489
+ throw NotImplementedException("UNIONS are not supported in recursive CTEs yet");
73490
+ }
73491
+ op_state.reset();
73492
+ sink_state.reset();
73493
+
73494
+ auto union_pipeline = make_shared<Pipeline>(executor);
73495
+ auto pipeline_ptr = union_pipeline.get();
73496
+ auto &child_pipelines = state.GetChildPipelines(executor);
73497
+ auto &child_dependencies = state.GetChildDependencies(executor);
73498
+ auto &union_pipelines = state.GetUnionPipelines(executor);
73499
+ // set up dependencies for any child pipelines to this union pipeline
73500
+ auto child_entry = child_pipelines.find(&current);
73501
+ if (child_entry != child_pipelines.end()) {
73502
+ for (auto &current_child : child_entry->second) {
73503
+ D_ASSERT(child_dependencies.find(current_child.get()) != child_dependencies.end());
73504
+ child_dependencies[current_child.get()].push_back(pipeline_ptr);
73505
+ }
73506
+ }
73507
+ // for the current pipeline, continue building on the LHS
73508
+ state.SetPipelineOperators(*union_pipeline, state.GetPipelineOperators(current));
73509
+ children[0]->BuildPipelines(executor, current, state);
73510
+ // insert the union pipeline as a union pipeline of the current node
73511
+ union_pipelines[&current].push_back(move(union_pipeline));
73512
+
73513
+ // for the union pipeline, build on the RHS
73514
+ state.SetPipelineSink(*pipeline_ptr, state.GetPipelineSink(current));
73515
+ children[1]->BuildPipelines(executor, *pipeline_ptr, state);
73516
+ }
73517
+
73518
+ vector<const PhysicalOperator *> PhysicalUnion::GetSources() const {
73519
+ vector<const PhysicalOperator *> result;
73520
+ for (auto &child : children) {
73521
+ auto child_sources = child->GetSources();
73522
+ result.insert(result.end(), child_sources.begin(), child_sources.end());
73523
+ }
73524
+ return result;
73525
+ }
73526
+
72559
73527
  } // namespace duckdb
72560
73528
 
72561
73529
 
@@ -73027,6 +73995,7 @@ void PerfectAggregateHashTable::Destroy() {
73027
73995
 
73028
73996
 
73029
73997
 
73998
+
73030
73999
  namespace duckdb {
73031
74000
 
73032
74001
  string PhysicalOperator::GetName() const {
@@ -73044,6 +74013,14 @@ void PhysicalOperator::Print() const {
73044
74013
  }
73045
74014
  // LCOV_EXCL_STOP
73046
74015
 
74016
+ vector<PhysicalOperator *> PhysicalOperator::GetChildren() const {
74017
+ vector<PhysicalOperator *> result;
74018
+ for (auto &child : children) {
74019
+ result.push_back(child.get());
74020
+ }
74021
+ return result;
74022
+ }
74023
+
73047
74024
  //===--------------------------------------------------------------------===//
73048
74025
  // Operator
73049
74026
  //===--------------------------------------------------------------------===//
@@ -73079,6 +74056,15 @@ void PhysicalOperator::GetData(ExecutionContext &context, DataChunk &chunk, Glob
73079
74056
  LocalSourceState &lstate) const {
73080
74057
  throw InternalException("Calling GetData on a node that is not a source!");
73081
74058
  }
74059
+
74060
+ idx_t PhysicalOperator::GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
74061
+ LocalSourceState &lstate) const {
74062
+ throw InternalException("Calling GetBatchIndex on a node that does not support it");
74063
+ }
74064
+
74065
+ double PhysicalOperator::GetProgress(ClientContext &context, GlobalSourceState &gstate) const {
74066
+ return -1;
74067
+ }
73082
74068
  // LCOV_EXCL_STOP
73083
74069
 
73084
74070
  //===--------------------------------------------------------------------===//
@@ -73107,6 +74093,99 @@ unique_ptr<GlobalSinkState> PhysicalOperator::GetGlobalSinkState(ClientContext &
73107
74093
  return make_unique<GlobalSinkState>();
73108
74094
  }
73109
74095
 
74096
+ //===--------------------------------------------------------------------===//
74097
+ // Pipeline Construction
74098
+ //===--------------------------------------------------------------------===//
74099
+ void PhysicalOperator::AddPipeline(Executor &executor, shared_ptr<Pipeline> pipeline, PipelineBuildState &state) {
74100
+ if (!state.recursive_cte) {
74101
+ // regular pipeline: schedule it
74102
+ state.AddPipeline(executor, move(pipeline));
74103
+ } else {
74104
+ // CTE pipeline! add it to the CTE pipelines
74105
+ auto &cte = (PhysicalRecursiveCTE &)*state.recursive_cte;
74106
+ cte.pipelines.push_back(move(pipeline));
74107
+ }
74108
+ }
74109
+
74110
+ void PhysicalOperator::BuildChildPipeline(Executor &executor, Pipeline &current, PipelineBuildState &state,
74111
+ PhysicalOperator *pipeline_child) {
74112
+ auto pipeline = make_shared<Pipeline>(executor);
74113
+ state.SetPipelineSink(*pipeline, this);
74114
+ // the current is dependent on this pipeline to complete
74115
+ current.AddDependency(pipeline);
74116
+ // recurse into the pipeline child
74117
+ pipeline_child->BuildPipelines(executor, *pipeline, state);
74118
+ AddPipeline(executor, move(pipeline), state);
74119
+ }
74120
+
74121
+ void PhysicalOperator::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
74122
+ op_state.reset();
74123
+ if (IsSink()) {
74124
+ // operator is a sink, build a pipeline
74125
+ sink_state.reset();
74126
+
74127
+ // single operator:
74128
+ // the operator becomes the data source of the current pipeline
74129
+ state.SetPipelineSource(current, this);
74130
+ // we create a new pipeline starting from the child
74131
+ D_ASSERT(children.size() == 1);
74132
+
74133
+ BuildChildPipeline(executor, current, state, children[0].get());
74134
+ } else {
74135
+ // operator is not a sink! recurse in children
74136
+ if (children.empty()) {
74137
+ // source
74138
+ state.SetPipelineSource(current, this);
74139
+ } else {
74140
+ if (children.size() != 1) {
74141
+ throw InternalException("Operator not supported in BuildPipelines");
74142
+ }
74143
+ state.AddPipelineOperator(current, this);
74144
+ children[0]->BuildPipelines(executor, current, state);
74145
+ }
74146
+ }
74147
+ }
74148
+
74149
+ vector<const PhysicalOperator *> PhysicalOperator::GetSources() const {
74150
+ vector<const PhysicalOperator *> result;
74151
+ if (IsSink()) {
74152
+ D_ASSERT(children.size() == 1);
74153
+ result.push_back(this);
74154
+ return result;
74155
+ } else {
74156
+ if (children.empty()) {
74157
+ // source
74158
+ result.push_back(this);
74159
+ return result;
74160
+ } else {
74161
+ if (children.size() != 1) {
74162
+ throw InternalException("Operator not supported in GetSource");
74163
+ }
74164
+ return children[0]->GetSources();
74165
+ }
74166
+ }
74167
+ }
74168
+
74169
+ bool PhysicalOperator::AllSourcesSupportBatchIndex() const {
74170
+ auto sources = GetSources();
74171
+ for (auto &source : sources) {
74172
+ if (!source->SupportsBatchIndex()) {
74173
+ return false;
74174
+ }
74175
+ }
74176
+ return true;
74177
+ }
74178
+
74179
+ void PhysicalOperator::Verify() {
74180
+ #ifdef DEBUG
74181
+ auto sources = GetSources();
74182
+ D_ASSERT(!sources.empty());
74183
+ for (auto &child : children) {
74184
+ child->Verify();
74185
+ }
74186
+ #endif
74187
+ }
74188
+
73110
74189
  } // namespace duckdb
73111
74190
 
73112
74191
  //===----------------------------------------------------------------------===//
@@ -75721,6 +76800,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
75721
76800
  } // namespace duckdb
75722
76801
 
75723
76802
 
76803
+
75724
76804
  //===----------------------------------------------------------------------===//
75725
76805
  // DuckDB
75726
76806
  //
@@ -75759,17 +76839,36 @@ protected:
75759
76839
  } // namespace duckdb
75760
76840
 
75761
76841
 
76842
+
75762
76843
  namespace duckdb {
75763
76844
 
75764
76845
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalLimit &op) {
75765
76846
  D_ASSERT(op.children.size() == 1);
75766
76847
 
75767
76848
  auto plan = CreatePlan(*op.children[0]);
76849
+ auto &config = DBConfig::GetConfig(context);
76850
+ unique_ptr<PhysicalOperator> limit;
76851
+ if (!config.preserve_insertion_order) {
76852
+ // use parallel streaming limit if insertion order is not important
76853
+ limit = make_unique<PhysicalStreamingLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
76854
+ move(op.offset), op.estimated_cardinality, true);
76855
+ } else {
76856
+ // maintaining insertion order is important
76857
+ bool all_sources_support_batch_index = plan->AllSourcesSupportBatchIndex();
76858
+
76859
+ if (all_sources_support_batch_index) {
76860
+ // source supports batch index: use parallel batch limit
76861
+ limit = make_unique<PhysicalLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
76862
+ move(op.offset), op.estimated_cardinality);
76863
+ } else {
76864
+ // source does not support batch index: use a non-parallel streaming limit
76865
+ limit = make_unique<PhysicalStreamingLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
76866
+ move(op.offset), op.estimated_cardinality, false);
76867
+ }
76868
+ }
75768
76869
 
75769
- auto limit = make_unique<PhysicalLimit>(op.types, (idx_t)op.limit_val, op.offset_val, move(op.limit),
75770
- move(op.offset), op.estimated_cardinality);
75771
76870
  limit->children.push_back(move(plan));
75772
- return move(limit);
76871
+ return limit;
75773
76872
  }
75774
76873
 
75775
76874
  } // namespace duckdb
@@ -76877,6 +77976,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(unique_ptr<Logica
76877
77976
  profiler.StartPhase("create_plan");
76878
77977
  auto plan = CreatePlan(*op);
76879
77978
  profiler.EndPhase();
77979
+
77980
+ plan->Verify();
76880
77981
  return plan;
76881
77982
  }
76882
77983
 
@@ -77615,10 +78716,11 @@ namespace duckdb {
77615
78716
 
77616
78717
  WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info,
77617
78718
  const LogicalType &result_type_p, ChunkCollection *input,
77618
- WindowAggregationMode mode_p)
78719
+ const ValidityMask &filter_mask_p, WindowAggregationMode mode_p)
77619
78720
  : aggregate(aggregate), bind_info(bind_info), result_type(result_type_p), state(aggregate.state_size()),
77620
78721
  statep(Value::POINTER((idx_t)state.data())), frame(0, 0), active(0, 1),
77621
- statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input), mode(mode_p) {
78722
+ statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input), filter_mask(filter_mask_p),
78723
+ mode(mode_p) {
77622
78724
  #if STANDARD_VECTOR_SIZE < 512
77623
78725
  throw NotImplementedException("Window functions are not supported for vector sizes < 512");
77624
78726
  #endif
@@ -77626,6 +78728,7 @@ WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData
77626
78728
  statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
77627
78729
 
77628
78730
  if (input_ref && input_ref->ColumnCount() > 0) {
78731
+ filter_sel.Initialize(STANDARD_VECTOR_SIZE);
77629
78732
  inputs.Initialize(input_ref->Types());
77630
78733
  // if we have a frame-by-frame method, share the single state
77631
78734
  if (aggregate.window && UseWindowAPI()) {
@@ -77706,6 +78809,19 @@ void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end) {
77706
78809
  VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count);
77707
78810
  }
77708
78811
  }
78812
+
78813
+ // Slice to any filtered rows
78814
+ if (!filter_mask.AllValid()) {
78815
+ idx_t filtered = 0;
78816
+ for (idx_t i = begin; i < end; ++i) {
78817
+ if (filter_mask.RowIsValid(i)) {
78818
+ filter_sel.set_index(filtered++, i - begin);
78819
+ }
78820
+ }
78821
+ if (filtered != inputs.size()) {
78822
+ inputs.Slice(filter_sel, filtered);
78823
+ }
78824
+ }
77709
78825
  }
77710
78826
 
77711
78827
  void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end) {
@@ -77786,7 +78902,16 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
77786
78902
  if (inputs.ColumnCount() == 0) {
77787
78903
  D_ASSERT(GetTypeIdSize(result_type.InternalType()) == sizeof(idx_t));
77788
78904
  auto data = FlatVector::GetData<idx_t>(result);
77789
- data[rid] = end - begin;
78905
+ // Slice to any filtered rows
78906
+ if (!filter_mask.AllValid()) {
78907
+ idx_t filtered = 0;
78908
+ for (idx_t i = begin; i < end; ++i) {
78909
+ filtered += filter_mask.RowIsValid(i);
78910
+ }
78911
+ data[rid] = filtered;
78912
+ } else {
78913
+ data[rid] = end - begin;
78914
+ }
77790
78915
  return;
77791
78916
  }
77792
78917
 
@@ -77827,8 +78952,8 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
77827
78952
  active = FrameBounds(active_chunks.first * STANDARD_VECTOR_SIZE,
77828
78953
  MinValue((active_chunks.second + 1) * STANDARD_VECTOR_SIZE, coll.Count()));
77829
78954
 
77830
- aggregate.window(inputs.data.data(), bind_info, inputs.ColumnCount(), state.data(), frame, prev, result, rid,
77831
- active.first);
78955
+ aggregate.window(inputs.data.data(), filter_mask, bind_info, inputs.ColumnCount(), state.data(), frame, prev,
78956
+ result, rid, active.first);
77832
78957
  return;
77833
78958
  }
77834
78959
 
@@ -82359,6 +83484,19 @@ struct ModeState {
82359
83484
  }
82360
83485
  };
82361
83486
 
83487
+ struct ModeIncluded {
83488
+ inline explicit ModeIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
83489
+ : fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
83490
+ }
83491
+
83492
+ inline bool operator()(const idx_t &idx) const {
83493
+ return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
83494
+ }
83495
+ const ValidityMask &fmask;
83496
+ const ValidityMask &dmask;
83497
+ const idx_t bias;
83498
+ };
83499
+
82362
83500
  template <typename KEY_TYPE>
82363
83501
  struct ModeFunction {
82364
83502
  template <class STATE>
@@ -82415,11 +83553,14 @@ struct ModeFunction {
82415
83553
  }
82416
83554
 
82417
83555
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
82418
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
82419
- const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t rid, idx_t bias) {
83556
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
83557
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
83558
+ Vector &result, idx_t rid, idx_t bias) {
82420
83559
  auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
82421
83560
  auto &rmask = FlatVector::Validity(result);
82422
83561
 
83562
+ ModeIncluded included(fmask, dmask, bias);
83563
+
82423
83564
  if (!state->frequency_map) {
82424
83565
  state->frequency_map = new unordered_map<KEY_TYPE, size_t>();
82425
83566
  }
@@ -82428,31 +83569,31 @@ struct ModeFunction {
82428
83569
  state->Reset();
82429
83570
  // for f ∈ F do
82430
83571
  for (auto f = frame.first; f < frame.second; ++f) {
82431
- if (dmask.RowIsValid(f - bias)) {
83572
+ if (included(f)) {
82432
83573
  state->ModeAdd(KEY_TYPE(data[f]));
82433
83574
  }
82434
83575
  }
82435
83576
  } else {
82436
83577
  // for f ∈ P \ F do
82437
83578
  for (auto p = prev.first; p < frame.first; ++p) {
82438
- if (dmask.RowIsValid(p - bias)) {
83579
+ if (included(p)) {
82439
83580
  state->ModeRm(KEY_TYPE(data[p]));
82440
83581
  }
82441
83582
  }
82442
83583
  for (auto p = frame.second; p < prev.second; ++p) {
82443
- if (dmask.RowIsValid(p - bias)) {
83584
+ if (included(p)) {
82444
83585
  state->ModeRm(KEY_TYPE(data[p]));
82445
83586
  }
82446
83587
  }
82447
83588
 
82448
83589
  // for f ∈ F \ P do
82449
83590
  for (auto f = frame.first; f < prev.first; ++f) {
82450
- if (dmask.RowIsValid(f - bias)) {
83591
+ if (included(f)) {
82451
83592
  state->ModeAdd(KEY_TYPE(data[f]));
82452
83593
  }
82453
83594
  }
82454
83595
  for (auto f = prev.second; f < frame.second; ++f) {
82455
- if (dmask.RowIsValid(f - bias)) {
83596
+ if (included(f)) {
82456
83597
  state->ModeAdd(KEY_TYPE(data[f]));
82457
83598
  }
82458
83599
  }
@@ -82667,14 +83808,21 @@ struct QuantileState {
82667
83808
  }
82668
83809
  };
82669
83810
 
82670
- struct QuantileNotNull {
82671
- inline explicit QuantileNotNull(const ValidityMask &mask_p, idx_t bias_p) : mask(mask_p), bias(bias_p) {
83811
+ struct QuantileIncluded {
83812
+ inline explicit QuantileIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
83813
+ : fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
82672
83814
  }
82673
83815
 
82674
83816
  inline bool operator()(const idx_t &idx) const {
82675
- return mask.RowIsValid(idx - bias);
83817
+ return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
82676
83818
  }
82677
- const ValidityMask &mask;
83819
+
83820
+ inline bool AllValid() const {
83821
+ return fmask.AllValid() && dmask.AllValid();
83822
+ }
83823
+
83824
+ const ValidityMask &fmask;
83825
+ const ValidityMask &dmask;
82678
83826
  const idx_t bias;
82679
83827
  };
82680
83828
 
@@ -82734,7 +83882,7 @@ static idx_t ReplaceIndex(idx_t *index, const FrameBounds &frame, const FrameBou
82734
83882
 
82735
83883
  template <class INPUT_TYPE>
82736
83884
  static inline int CanReplace(const idx_t *index, const INPUT_TYPE *fdata, const idx_t j, const idx_t k0, const idx_t k1,
82737
- const QuantileNotNull &validity) {
83885
+ const QuantileIncluded &validity) {
82738
83886
  D_ASSERT(index);
82739
83887
 
82740
83888
  // NULLs sort to the end, so if we have inserted a NULL,
@@ -83065,12 +84213,13 @@ struct QuantileScalarOperation : public QuantileOperation {
83065
84213
  }
83066
84214
 
83067
84215
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
83068
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
83069
- const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t ridx, idx_t bias) {
84216
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
84217
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
84218
+ Vector &result, idx_t ridx, idx_t bias) {
83070
84219
  auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
83071
84220
  auto &rmask = FlatVector::Validity(result);
83072
84221
 
83073
- QuantileNotNull not_null(dmask, bias);
84222
+ QuantileIncluded included(fmask, dmask, bias);
83074
84223
 
83075
84224
  // Lazily initialise frame state
83076
84225
  auto prev_pos = state->pos;
@@ -83090,9 +84239,9 @@ struct QuantileScalarOperation : public QuantileOperation {
83090
84239
  // Fixed frame size
83091
84240
  const auto j = ReplaceIndex(index, frame, prev);
83092
84241
  // We can only replace if the number of NULLs has not changed
83093
- if (dmask.AllValid() || not_null(prev.first) == not_null(prev.second)) {
84242
+ if (included.AllValid() || included(prev.first) == included(prev.second)) {
83094
84243
  Interpolator<DISCRETE> interp(q, prev_pos);
83095
- replace = CanReplace(index, data, j, interp.FRN, interp.CRN, not_null);
84244
+ replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
83096
84245
  if (replace) {
83097
84246
  state->pos = prev_pos;
83098
84247
  }
@@ -83101,9 +84250,9 @@ struct QuantileScalarOperation : public QuantileOperation {
83101
84250
  ReuseIndexes(index, frame, prev);
83102
84251
  }
83103
84252
 
83104
- if (!replace && !dmask.AllValid()) {
84253
+ if (!replace && !included.AllValid()) {
83105
84254
  // Remove the NULLs
83106
- state->pos = std::partition(index, index + state->pos, not_null) - index;
84255
+ state->pos = std::partition(index, index + state->pos, included) - index;
83107
84256
  }
83108
84257
  if (state->pos) {
83109
84258
  Interpolator<DISCRETE> interp(q, state->pos);
@@ -83216,12 +84365,13 @@ struct QuantileListOperation : public QuantileOperation {
83216
84365
  }
83217
84366
 
83218
84367
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
83219
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
83220
- const FrameBounds &frame, const FrameBounds &prev, Vector &list, idx_t lidx, idx_t bias) {
84368
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
84369
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
84370
+ Vector &list, idx_t lidx, idx_t bias) {
83221
84371
  D_ASSERT(bind_data_p);
83222
84372
  auto bind_data = (QuantileBindData *)bind_data_p;
83223
84373
 
83224
- QuantileNotNull not_null(dmask, bias);
84374
+ QuantileIncluded included(fmask, dmask, bias);
83225
84375
 
83226
84376
  // Result is a constant LIST<RESULT_TYPE> with a fixed length
83227
84377
  auto ldata = FlatVector::GetData<RESULT_TYPE>(list);
@@ -83252,11 +84402,11 @@ struct QuantileListOperation : public QuantileOperation {
83252
84402
  // Fixed frame size
83253
84403
  const auto j = ReplaceIndex(index, frame, prev);
83254
84404
  // We can only replace if the number of NULLs has not changed
83255
- if (dmask.AllValid() || not_null(prev.first) == not_null(prev.second)) {
84405
+ if (included.AllValid() || included(prev.first) == included(prev.second)) {
83256
84406
  for (const auto &q : bind_data->order) {
83257
84407
  const auto &quantile = bind_data->quantiles[q];
83258
84408
  Interpolator<DISCRETE> interp(quantile, prev_pos);
83259
- const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN, not_null);
84409
+ const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
83260
84410
  if (replace < 0) {
83261
84411
  // Replacement is before this quantile, so the rest will be replaceable too.
83262
84412
  replaceable.first = MinValue(replaceable.first, interp.FRN);
@@ -83276,9 +84426,9 @@ struct QuantileListOperation : public QuantileOperation {
83276
84426
  ReuseIndexes(index, frame, prev);
83277
84427
  }
83278
84428
 
83279
- if (replaceable.first >= replaceable.second && !dmask.AllValid()) {
84429
+ if (replaceable.first >= replaceable.second && !included.AllValid()) {
83280
84430
  // Remove the NULLs
83281
- state->pos = std::partition(index, index + state->pos, not_null) - index;
84431
+ state->pos = std::partition(index, index + state->pos, included) - index;
83282
84432
  }
83283
84433
 
83284
84434
  if (state->pos) {
@@ -83575,12 +84725,13 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
83575
84725
  }
83576
84726
 
83577
84727
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
83578
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
83579
- const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t ridx, idx_t bias) {
84728
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
84729
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
84730
+ Vector &result, idx_t ridx, idx_t bias) {
83580
84731
  auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
83581
84732
  auto &rmask = FlatVector::Validity(result);
83582
84733
 
83583
- QuantileNotNull not_null(dmask, bias);
84734
+ QuantileIncluded included(fmask, dmask, bias);
83584
84735
 
83585
84736
  // Lazily initialise frame state
83586
84737
  auto prev_pos = state->pos;
@@ -83601,7 +84752,7 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
83601
84752
  // the median has changed, the previous order is not correct.
83602
84753
  // It is probably close, however, and so reuse is helpful.
83603
84754
  ReuseIndexes(index2, frame, prev);
83604
- std::partition(index2, index2 + state->pos, not_null);
84755
+ std::partition(index2, index2 + state->pos, included);
83605
84756
 
83606
84757
  // Find the two positions needed for the median
83607
84758
  const float q = 0.5;
@@ -83610,10 +84761,10 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
83610
84761
  if (frame.first == prev.first + 1 && frame.second == prev.second + 1) {
83611
84762
  // Fixed frame size
83612
84763
  const auto j = ReplaceIndex(index, frame, prev);
83613
- // We can only replace if the number of NULls has not changed
83614
- if (dmask.AllValid() || not_null(prev.first) == not_null(prev.second)) {
84764
+ // We can only replace if the number of NULLs has not changed
84765
+ if (included.AllValid() || included(prev.first) == included(prev.second)) {
83615
84766
  Interpolator<false> interp(q, prev_pos);
83616
- replace = CanReplace(index, data, j, interp.FRN, interp.CRN, not_null);
84767
+ replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
83617
84768
  if (replace) {
83618
84769
  state->pos = prev_pos;
83619
84770
  }
@@ -83622,9 +84773,9 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
83622
84773
  ReuseIndexes(index, frame, prev);
83623
84774
  }
83624
84775
 
83625
- if (!replace && !dmask.AllValid()) {
84776
+ if (!replace && !included.AllValid()) {
83626
84777
  // Remove the NULLs
83627
- state->pos = std::partition(index, index + state->pos, not_null) - index;
84778
+ state->pos = std::partition(index, index + state->pos, included) - index;
83628
84779
  }
83629
84780
 
83630
84781
  if (state->pos) {
@@ -109047,6 +110198,19 @@ double TableScanProgress(ClientContext &context, const FunctionData *bind_data_p
109047
110198
  return percentage;
109048
110199
  }
109049
110200
 
110201
+ idx_t TableScanGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
110202
+ FunctionOperatorData *operator_state, ParallelState *parallel_state_p) {
110203
+ auto &bind_data = (const TableScanBindData &)*bind_data_p;
110204
+ auto &state = (TableScanOperatorData &)*operator_state;
110205
+ if (state.scan_state.row_group_scan_state.row_group) {
110206
+ return state.scan_state.row_group_scan_state.row_group->start;
110207
+ }
110208
+ if (state.scan_state.local_state.max_index > 0) {
110209
+ return bind_data.table->storage->GetTotalRows() + state.scan_state.local_state.chunk_index;
110210
+ }
110211
+ return 0;
110212
+ }
110213
+
109050
110214
  void TableScanDependency(unordered_set<CatalogEntry *> &entries, const FunctionData *bind_data_p) {
109051
110215
  auto &bind_data = (const TableScanBindData &)*bind_data_p;
109052
110216
  entries.insert(bind_data.table);
@@ -109251,7 +110415,9 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun
109251
110415
  get.function.init_parallel_state = nullptr;
109252
110416
  get.function.parallel_state_next = nullptr;
109253
110417
  get.function.table_scan_progress = nullptr;
110418
+ get.function.get_batch_index = nullptr;
109254
110419
  get.function.filter_pushdown = false;
110420
+ get.function.supports_batch_index = false;
109255
110421
  } else {
109256
110422
  bind_data.result_ids.clear();
109257
110423
  }
@@ -109280,8 +110446,10 @@ TableFunction TableScanFunction::GetFunction() {
109280
110446
  scan_function.parallel_init = TableScanParallelInit;
109281
110447
  scan_function.parallel_state_next = TableScanParallelStateNext;
109282
110448
  scan_function.table_scan_progress = TableScanProgress;
110449
+ scan_function.get_batch_index = TableScanGetBatchIndex;
109283
110450
  scan_function.projection_pushdown = true;
109284
110451
  scan_function.filter_pushdown = true;
110452
+ scan_function.supports_batch_index = true;
109285
110453
  return scan_function;
109286
110454
  }
109287
110455
 
@@ -109474,7 +110642,7 @@ TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_f
109474
110642
  cardinality(cardinality), pushdown_complex_filter(pushdown_complex_filter), to_string(to_string),
109475
110643
  max_threads(max_threads), init_parallel_state(init_parallel_state), parallel_function(parallel_function),
109476
110644
  parallel_init(parallel_init), parallel_state_next(parallel_state_next), table_scan_progress(query_progress),
109477
- projection_pushdown(projection_pushdown), filter_pushdown(filter_pushdown) {
110645
+ projection_pushdown(projection_pushdown), filter_pushdown(filter_pushdown), supports_batch_index(false) {
109478
110646
  }
109479
110647
 
109480
110648
  TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
@@ -113024,6 +114192,7 @@ private:
113024
114192
 
113025
114193
 
113026
114194
 
114195
+
113027
114196
  namespace duckdb {
113028
114197
 
113029
114198
  struct ActiveQueryContext {
@@ -113189,14 +114358,15 @@ const string &ClientContext::GetCurrentQuery() {
113189
114358
  return active_query->query;
113190
114359
  }
113191
114360
 
113192
- unique_ptr<QueryResult> ClientContext::FetchResultInternal(ClientContextLock &lock, PendingQueryResult &pending,
113193
- bool allow_stream_result) {
114361
+ unique_ptr<QueryResult> ClientContext::FetchResultInternal(ClientContextLock &lock, PendingQueryResult &pending) {
113194
114362
  D_ASSERT(active_query);
113195
114363
  D_ASSERT(active_query->open_result == &pending);
113196
114364
  D_ASSERT(active_query->prepared);
114365
+ auto &executor = GetExecutor();
113197
114366
  auto &prepared = *active_query->prepared;
113198
- bool create_stream_result = prepared.properties.allow_stream_result && allow_stream_result;
114367
+ bool create_stream_result = prepared.properties.allow_stream_result && pending.allow_stream_result;
113199
114368
  if (create_stream_result) {
114369
+ D_ASSERT(!executor.HasResultCollector());
113200
114370
  active_query->progress_bar.reset();
113201
114371
  query_progress = -1;
113202
114372
 
@@ -113207,25 +114377,32 @@ unique_ptr<QueryResult> ClientContext::FetchResultInternal(ClientContextLock &lo
113207
114377
  active_query->open_result = stream_result.get();
113208
114378
  return move(stream_result);
113209
114379
  }
113210
- // create a materialized result by continuously fetching
113211
- auto result = make_unique<MaterializedQueryResult>(pending.statement_type, pending.properties, pending.types,
113212
- pending.names, shared_from_this());
113213
- result->properties = pending.properties;
113214
- while (true) {
113215
- auto chunk = FetchInternal(lock, GetExecutor(), *result);
113216
- if (!chunk || chunk->size() == 0) {
113217
- break;
113218
- }
114380
+ unique_ptr<QueryResult> result;
114381
+ if (executor.HasResultCollector()) {
114382
+ // we have a result collector - fetch the result directly from the result collector
114383
+ result = executor.GetResult();
114384
+ CleanupInternal(lock, result.get(), false);
114385
+ } else {
114386
+ // no result collector - create a materialized result by continuously fetching
114387
+ auto materialized_result = make_unique<MaterializedQueryResult>(
114388
+ pending.statement_type, pending.properties, pending.types, pending.names, shared_from_this());
114389
+ while (true) {
114390
+ auto chunk = FetchInternal(lock, GetExecutor(), *materialized_result);
114391
+ if (!chunk || chunk->size() == 0) {
114392
+ break;
114393
+ }
113219
114394
  #ifdef DEBUG
113220
- for (idx_t i = 0; i < chunk->ColumnCount(); i++) {
113221
- if (pending.types[i].id() == LogicalTypeId::VARCHAR) {
113222
- chunk->data[i].UTFVerify(chunk->size());
114395
+ for (idx_t i = 0; i < chunk->ColumnCount(); i++) {
114396
+ if (pending.types[i].id() == LogicalTypeId::VARCHAR) {
114397
+ chunk->data[i].UTFVerify(chunk->size());
114398
+ }
113223
114399
  }
113224
- }
113225
114400
  #endif
113226
- result->collection.Append(*chunk);
114401
+ materialized_result->collection.Append(*chunk);
114402
+ }
114403
+ result = move(materialized_result);
113227
114404
  }
113228
- return move(result);
114405
+ return result;
113229
114406
  }
113230
114407
 
113231
114408
  shared_ptr<PreparedStatementData> ClientContext::CreatePreparedStatement(ClientContextLock &lock, const string &query,
@@ -113288,7 +114465,7 @@ double ClientContext::GetProgress() {
113288
114465
 
113289
114466
  unique_ptr<PendingQueryResult> ClientContext::PendingPreparedStatement(ClientContextLock &lock,
113290
114467
  shared_ptr<PreparedStatementData> statement_p,
113291
- vector<Value> bound_values) {
114468
+ PendingQueryParameters parameters) {
113292
114469
  D_ASSERT(active_query);
113293
114470
  auto &statement = *statement_p;
113294
114471
  if (ActiveTransaction().IsInvalidated() && statement.properties.requires_valid_transaction) {
@@ -113301,21 +114478,32 @@ unique_ptr<PendingQueryResult> ClientContext::PendingPreparedStatement(ClientCon
113301
114478
  }
113302
114479
 
113303
114480
  // bind the bound values before execution
113304
- statement.Bind(move(bound_values));
114481
+ statement.Bind(parameters.parameters ? *parameters.parameters : vector<Value>());
113305
114482
 
113306
114483
  active_query->executor = make_unique<Executor>(*this);
113307
114484
  auto &executor = *active_query->executor;
113308
114485
  if (config.enable_progress_bar) {
113309
- active_query->progress_bar = make_unique<ProgressBar>(executor, config.wait_time);
114486
+ active_query->progress_bar = make_unique<ProgressBar>(executor, config.wait_time, config.print_progress_bar);
113310
114487
  active_query->progress_bar->Start();
113311
114488
  query_progress = 0;
113312
114489
  }
113313
- executor.Initialize(statement.plan.get());
114490
+ auto stream_result = parameters.allow_stream_result && statement.properties.allow_stream_result;
114491
+ if (!stream_result && statement.properties.return_type == StatementReturnType::QUERY_RESULT) {
114492
+ unique_ptr<PhysicalResultCollector> collector;
114493
+ auto &config = ClientConfig::GetConfig(*this);
114494
+ auto get_method =
114495
+ config.result_collector ? config.result_collector : PhysicalResultCollector::GetResultCollector;
114496
+ collector = get_method(*this, statement);
114497
+ D_ASSERT(collector->type == PhysicalOperatorType::RESULT_COLLECTOR);
114498
+ executor.Initialize(move(collector));
114499
+ } else {
114500
+ executor.Initialize(statement.plan.get());
114501
+ }
113314
114502
  auto types = executor.GetTypes();
113315
114503
  D_ASSERT(types == statement.types);
113316
114504
  D_ASSERT(!active_query->open_result);
113317
114505
 
113318
- auto pending_result = make_unique<PendingQueryResult>(shared_from_this(), *statement_p, move(types));
114506
+ auto pending_result = make_unique<PendingQueryResult>(shared_from_this(), *statement_p, move(types), stream_result);
113319
114507
  active_query->prepared = move(statement_p);
113320
114508
  active_query->open_result = pending_result.get();
113321
114509
  return pending_result;
@@ -113443,49 +114631,59 @@ unique_ptr<PreparedStatement> ClientContext::Prepare(const string &query) {
113443
114631
 
113444
114632
  unique_ptr<PendingQueryResult> ClientContext::PendingQueryPreparedInternal(ClientContextLock &lock, const string &query,
113445
114633
  shared_ptr<PreparedStatementData> &prepared,
113446
- vector<Value> &values) {
114634
+ PendingQueryParameters parameters) {
113447
114635
  try {
113448
114636
  InitialCleanup(lock);
113449
114637
  } catch (std::exception &ex) {
113450
114638
  return make_unique<PendingQueryResult>(ex.what());
113451
114639
  }
113452
- return PendingStatementOrPreparedStatementInternal(lock, query, nullptr, prepared, &values);
114640
+ return PendingStatementOrPreparedStatementInternal(lock, query, nullptr, prepared, parameters);
113453
114641
  }
113454
114642
 
113455
- unique_ptr<PendingQueryResult>
113456
- ClientContext::PendingQuery(const string &query, shared_ptr<PreparedStatementData> &prepared, vector<Value> &values) {
114643
+ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query,
114644
+ shared_ptr<PreparedStatementData> &prepared,
114645
+ PendingQueryParameters parameters) {
113457
114646
  auto lock = LockContext();
113458
- return PendingQueryPreparedInternal(*lock, query, prepared, values);
114647
+ return PendingQueryPreparedInternal(*lock, query, prepared, parameters);
113459
114648
  }
113460
114649
 
113461
114650
  unique_ptr<QueryResult> ClientContext::Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
113462
- vector<Value> &values, bool allow_stream_result) {
114651
+ PendingQueryParameters parameters) {
113463
114652
  auto lock = LockContext();
113464
- auto pending = PendingQueryPreparedInternal(*lock, query, prepared, values);
114653
+ auto pending = PendingQueryPreparedInternal(*lock, query, prepared, parameters);
113465
114654
  if (!pending->success) {
113466
114655
  return make_unique<MaterializedQueryResult>(pending->error);
113467
114656
  }
113468
- return pending->ExecuteInternal(*lock, allow_stream_result);
114657
+ return pending->ExecuteInternal(*lock);
114658
+ }
114659
+
114660
+ unique_ptr<QueryResult> ClientContext::Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
114661
+ vector<Value> &values, bool allow_stream_result) {
114662
+ PendingQueryParameters parameters;
114663
+ parameters.parameters = &values;
114664
+ parameters.allow_stream_result = allow_stream_result;
114665
+ return Execute(query, prepared, parameters);
113469
114666
  }
113470
114667
 
113471
114668
  unique_ptr<PendingQueryResult> ClientContext::PendingStatementInternal(ClientContextLock &lock, const string &query,
113472
- unique_ptr<SQLStatement> statement) {
114669
+ unique_ptr<SQLStatement> statement,
114670
+ PendingQueryParameters parameters) {
113473
114671
  // prepare the query for execution
113474
114672
  auto prepared = CreatePreparedStatement(lock, query, move(statement));
113475
- // by default, no values are bound
113476
- vector<Value> bound_values;
113477
114673
  // execute the prepared statement
113478
- return PendingPreparedStatement(lock, move(prepared), move(bound_values));
114674
+ return PendingPreparedStatement(lock, move(prepared), parameters);
113479
114675
  }
113480
114676
 
113481
114677
  unique_ptr<QueryResult> ClientContext::RunStatementInternal(ClientContextLock &lock, const string &query,
113482
114678
  unique_ptr<SQLStatement> statement,
113483
114679
  bool allow_stream_result, bool verify) {
113484
- auto pending = PendingQueryInternal(lock, move(statement), verify);
114680
+ PendingQueryParameters parameters;
114681
+ parameters.allow_stream_result = allow_stream_result;
114682
+ auto pending = PendingQueryInternal(lock, move(statement), parameters, verify);
113485
114683
  if (!pending->success) {
113486
114684
  return make_unique<MaterializedQueryResult>(move(pending->error));
113487
114685
  }
113488
- return ExecutePendingQueryInternal(lock, *pending, allow_stream_result);
114686
+ return ExecutePendingQueryInternal(lock, *pending);
113489
114687
  }
113490
114688
 
113491
114689
  bool ClientContext::IsActiveResult(ClientContextLock &lock, BaseQueryResult *result) {
@@ -113508,7 +114706,7 @@ static bool IsExplainAnalyze(SQLStatement *statement) {
113508
114706
 
113509
114707
  unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatementInternal(
113510
114708
  ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
113511
- shared_ptr<PreparedStatementData> &prepared, vector<Value> *values) {
114709
+ shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters) {
113512
114710
  // check if we are on AutoCommit. In this case we should start a transaction.
113513
114711
  if (statement && config.query_verification_enabled) {
113514
114712
  // query verification is enabled
@@ -113545,13 +114743,12 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
113545
114743
  break;
113546
114744
  }
113547
114745
  }
113548
- return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, values);
114746
+ return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, parameters);
113549
114747
  }
113550
114748
 
113551
- unique_ptr<PendingQueryResult>
113552
- ClientContext::PendingStatementOrPreparedStatement(ClientContextLock &lock, const string &query,
113553
- unique_ptr<SQLStatement> statement,
113554
- shared_ptr<PreparedStatementData> &prepared, vector<Value> *values) {
114749
+ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatement(
114750
+ ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
114751
+ shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters) {
113555
114752
  unique_ptr<PendingQueryResult> result;
113556
114753
 
113557
114754
  BeginQueryInternal(lock, query);
@@ -113561,13 +114758,14 @@ ClientContext::PendingStatementOrPreparedStatement(ClientContextLock &lock, cons
113561
114758
  bool invalidate_query = true;
113562
114759
  try {
113563
114760
  if (statement) {
113564
- result = PendingStatementInternal(lock, query, move(statement));
114761
+ result = PendingStatementInternal(lock, query, move(statement), parameters);
113565
114762
  } else {
113566
114763
  auto &catalog = Catalog::GetCatalog(*this);
113567
114764
  if (prepared->unbound_statement && (catalog.GetCatalogVersion() != prepared->catalog_version ||
113568
114765
  !prepared->properties.bound_all_parameters)) {
113569
114766
  // catalog was modified: rebind the statement before execution
113570
- auto new_prepared = CreatePreparedStatement(lock, query, prepared->unbound_statement->Copy(), values);
114767
+ auto new_prepared =
114768
+ CreatePreparedStatement(lock, query, prepared->unbound_statement->Copy(), parameters.parameters);
113571
114769
  if (prepared->types != new_prepared->types && prepared->properties.bound_all_parameters) {
113572
114770
  throw BinderException("Rebinding statement after catalog change resulted in change of types");
113573
114771
  }
@@ -113576,7 +114774,7 @@ ClientContext::PendingStatementOrPreparedStatement(ClientContextLock &lock, cons
113576
114774
  prepared = move(new_prepared);
113577
114775
  prepared->properties.bound_all_parameters = false;
113578
114776
  }
113579
- result = PendingPreparedStatement(lock, prepared, *values);
114777
+ result = PendingPreparedStatement(lock, prepared, parameters);
113580
114778
  }
113581
114779
  } catch (StandardException &ex) {
113582
114780
  // standard exceptions do not invalidate the current transaction
@@ -113618,8 +114816,8 @@ void ClientContext::LogQueryInternal(ClientContextLock &, const string &query) {
113618
114816
  }
113619
114817
 
113620
114818
  unique_ptr<QueryResult> ClientContext::Query(unique_ptr<SQLStatement> statement, bool allow_stream_result) {
113621
- auto pending_query = PendingQuery(move(statement));
113622
- return pending_query->Execute(allow_stream_result);
114819
+ auto pending_query = PendingQuery(move(statement), allow_stream_result);
114820
+ return pending_query->Execute();
113623
114821
  }
113624
114822
 
113625
114823
  unique_ptr<QueryResult> ClientContext::Query(const string &query, bool allow_stream_result) {
@@ -113644,13 +114842,14 @@ unique_ptr<QueryResult> ClientContext::Query(const string &query, bool allow_str
113644
114842
  for (idx_t i = 0; i < statements.size(); i++) {
113645
114843
  auto &statement = statements[i];
113646
114844
  bool is_last_statement = i + 1 == statements.size();
113647
- bool stream_result = allow_stream_result && is_last_statement;
113648
- auto pending_query = PendingQueryInternal(*lock, move(statement));
114845
+ PendingQueryParameters parameters;
114846
+ parameters.allow_stream_result = allow_stream_result && is_last_statement;
114847
+ auto pending_query = PendingQueryInternal(*lock, move(statement), parameters);
113649
114848
  unique_ptr<QueryResult> current_result;
113650
114849
  if (!pending_query->success) {
113651
114850
  current_result = make_unique<MaterializedQueryResult>(pending_query->error);
113652
114851
  } else {
113653
- current_result = ExecutePendingQueryInternal(*lock, *pending_query, stream_result);
114852
+ current_result = ExecutePendingQueryInternal(*lock, *pending_query);
113654
114853
  }
113655
114854
  // now append the result to the list of results
113656
114855
  if (!last_result) {
@@ -113679,7 +114878,7 @@ bool ClientContext::ParseStatements(ClientContextLock &lock, const string &query
113679
114878
  }
113680
114879
  }
113681
114880
 
113682
- unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query) {
114881
+ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query, bool allow_stream_result) {
113683
114882
  auto lock = LockContext();
113684
114883
 
113685
114884
  string error;
@@ -113690,28 +114889,33 @@ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(const string &query)
113690
114889
  if (statements.size() != 1) {
113691
114890
  return make_unique<PendingQueryResult>("PendingQuery can only take a single statement");
113692
114891
  }
113693
- return PendingQueryInternal(*lock, move(statements[0]));
114892
+ PendingQueryParameters parameters;
114893
+ parameters.allow_stream_result = allow_stream_result;
114894
+ return PendingQueryInternal(*lock, move(statements[0]), parameters);
113694
114895
  }
113695
114896
 
113696
- unique_ptr<PendingQueryResult> ClientContext::PendingQuery(unique_ptr<SQLStatement> statement) {
114897
+ unique_ptr<PendingQueryResult> ClientContext::PendingQuery(unique_ptr<SQLStatement> statement,
114898
+ bool allow_stream_result) {
113697
114899
  auto lock = LockContext();
113698
- return PendingQueryInternal(*lock, move(statement));
114900
+ PendingQueryParameters parameters;
114901
+ parameters.allow_stream_result = allow_stream_result;
114902
+ return PendingQueryInternal(*lock, move(statement), parameters);
113699
114903
  }
113700
114904
 
113701
114905
  unique_ptr<PendingQueryResult> ClientContext::PendingQueryInternal(ClientContextLock &lock,
113702
- unique_ptr<SQLStatement> statement, bool verify) {
114906
+ unique_ptr<SQLStatement> statement,
114907
+ PendingQueryParameters parameters, bool verify) {
113703
114908
  auto query = statement->query;
113704
114909
  shared_ptr<PreparedStatementData> prepared;
113705
114910
  if (verify) {
113706
- return PendingStatementOrPreparedStatementInternal(lock, query, move(statement), prepared, nullptr);
114911
+ return PendingStatementOrPreparedStatementInternal(lock, query, move(statement), prepared, parameters);
113707
114912
  } else {
113708
- return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, nullptr);
114913
+ return PendingStatementOrPreparedStatement(lock, query, move(statement), prepared, parameters);
113709
114914
  }
113710
114915
  }
113711
114916
 
113712
- unique_ptr<QueryResult> ClientContext::ExecutePendingQueryInternal(ClientContextLock &lock, PendingQueryResult &query,
113713
- bool allow_stream_result) {
113714
- return query.ExecuteInternal(lock, allow_stream_result);
114917
+ unique_ptr<QueryResult> ClientContext::ExecutePendingQueryInternal(ClientContextLock &lock, PendingQueryResult &query) {
114918
+ return query.ExecuteInternal(lock);
113715
114919
  }
113716
114920
 
113717
114921
  void ClientContext::Interrupt() {
@@ -114394,6 +115598,16 @@ struct PreserveIdentifierCase {
114394
115598
  static Value GetSetting(ClientContext &context);
114395
115599
  };
114396
115600
 
115601
+ struct PreserveInsertionOrder {
115602
+ static constexpr const char *Name = "preserve_insertion_order";
115603
+ static constexpr const char *Description =
115604
+ "Whether or not to preserve insertion order. If set to false the system is allowed to re-order any results "
115605
+ "that do not contain ORDER BY clauses.";
115606
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
115607
+ static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
115608
+ static Value GetSetting(ClientContext &context);
115609
+ };
115610
+
114397
115611
  struct ProfilerHistorySize {
114398
115612
  static constexpr const char *Name = "profiler_history_size";
114399
115613
  static constexpr const char *Description = "Sets the profiler history size";
@@ -114509,6 +115723,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
114509
115723
  DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting),
114510
115724
  DUCKDB_LOCAL(PerfectHashThresholdSetting),
114511
115725
  DUCKDB_LOCAL(PreserveIdentifierCase),
115726
+ DUCKDB_GLOBAL(PreserveInsertionOrder),
114512
115727
  DUCKDB_LOCAL(ProfilerHistorySize),
114513
115728
  DUCKDB_LOCAL(ProfileOutputSetting),
114514
115729
  DUCKDB_LOCAL(ProfilingModeSetting),
@@ -114978,12 +116193,12 @@ unique_ptr<MaterializedQueryResult> Connection::Query(unique_ptr<SQLStatement> s
114978
116193
  return unique_ptr_cast<QueryResult, MaterializedQueryResult>(move(result));
114979
116194
  }
114980
116195
 
114981
- unique_ptr<PendingQueryResult> Connection::PendingQuery(const string &query) {
114982
- return context->PendingQuery(query);
116196
+ unique_ptr<PendingQueryResult> Connection::PendingQuery(const string &query, bool allow_stream_result) {
116197
+ return context->PendingQuery(query, allow_stream_result);
114983
116198
  }
114984
116199
 
114985
- unique_ptr<PendingQueryResult> Connection::PendingQuery(unique_ptr<SQLStatement> statement) {
114986
- return context->PendingQuery(move(statement));
116200
+ unique_ptr<PendingQueryResult> Connection::PendingQuery(unique_ptr<SQLStatement> statement, bool allow_stream_result) {
116201
+ return context->PendingQuery(move(statement), allow_stream_result);
114987
116202
  }
114988
116203
 
114989
116204
  unique_ptr<PreparedStatement> Connection::Prepare(const string &query) {
@@ -124082,10 +125297,10 @@ unique_ptr<DataChunk> MaterializedQueryResult::FetchRaw() {
124082
125297
  namespace duckdb {
124083
125298
 
124084
125299
  PendingQueryResult::PendingQueryResult(shared_ptr<ClientContext> context_p, PreparedStatementData &statement,
124085
- vector<LogicalType> types_p)
125300
+ vector<LogicalType> types_p, bool allow_stream_result)
124086
125301
  : BaseQueryResult(QueryResultType::PENDING_RESULT, statement.statement_type, statement.properties, move(types_p),
124087
125302
  statement.names),
124088
- context(move(context_p)) {
125303
+ context(move(context_p)), allow_stream_result(allow_stream_result) {
124089
125304
  }
124090
125305
 
124091
125306
  PendingQueryResult::PendingQueryResult(string error) : BaseQueryResult(QueryResultType::PENDING_RESULT, move(error)) {
@@ -124123,21 +125338,21 @@ PendingExecutionResult PendingQueryResult::ExecuteTaskInternal(ClientContextLock
124123
125338
  return context->ExecuteTaskInternal(lock, *this);
124124
125339
  }
124125
125340
 
124126
- unique_ptr<QueryResult> PendingQueryResult::ExecuteInternal(ClientContextLock &lock, bool allow_streaming_result) {
125341
+ unique_ptr<QueryResult> PendingQueryResult::ExecuteInternal(ClientContextLock &lock) {
124127
125342
  CheckExecutableInternal(lock);
124128
125343
  while (ExecuteTaskInternal(lock) == PendingExecutionResult::RESULT_NOT_READY) {
124129
125344
  }
124130
125345
  if (!success) {
124131
125346
  return make_unique<MaterializedQueryResult>(error);
124132
125347
  }
124133
- auto result = context->FetchResultInternal(lock, *this, allow_streaming_result);
125348
+ auto result = context->FetchResultInternal(lock, *this);
124134
125349
  Close();
124135
125350
  return result;
124136
125351
  }
124137
125352
 
124138
- unique_ptr<QueryResult> PendingQueryResult::Execute(bool allow_streaming_result) {
125353
+ unique_ptr<QueryResult> PendingQueryResult::Execute() {
124139
125354
  auto lock = LockContext();
124140
- return ExecuteInternal(*lock, allow_streaming_result);
125355
+ return ExecuteInternal(*lock);
124141
125356
  }
124142
125357
 
124143
125358
  void PendingQueryResult::Close() {
@@ -124190,19 +125405,22 @@ const vector<string> &PreparedStatement::GetNames() {
124190
125405
  }
124191
125406
 
124192
125407
  unique_ptr<QueryResult> PreparedStatement::Execute(vector<Value> &values, bool allow_stream_result) {
124193
- auto pending = PendingQuery(values);
125408
+ auto pending = PendingQuery(values, allow_stream_result);
124194
125409
  if (!pending->success) {
124195
125410
  return make_unique<MaterializedQueryResult>(pending->error);
124196
125411
  }
124197
- return pending->Execute(allow_stream_result && data->properties.allow_stream_result);
125412
+ return pending->Execute();
124198
125413
  }
124199
125414
 
124200
- unique_ptr<PendingQueryResult> PreparedStatement::PendingQuery(vector<Value> &values) {
125415
+ unique_ptr<PendingQueryResult> PreparedStatement::PendingQuery(vector<Value> &values, bool allow_stream_result) {
124201
125416
  if (!success) {
124202
125417
  throw InvalidInputException("Attempting to execute an unsuccessfully prepared statement!");
124203
125418
  }
124204
125419
  D_ASSERT(data);
124205
- auto result = context->PendingQuery(query, data, values);
125420
+ PendingQueryParameters parameters;
125421
+ parameters.parameters = &values;
125422
+ parameters.allow_stream_result = allow_stream_result && data->properties.allow_stream_result;
125423
+ auto result = context->PendingQuery(query, data, parameters);
124206
125424
  return result;
124207
125425
  }
124208
125426
 
@@ -124344,6 +125562,7 @@ bool QueryProfiler::OperatorRequiresProfiling(PhysicalOperatorType op_type) {
124344
125562
  case PhysicalOperatorType::STREAMING_SAMPLE:
124345
125563
  case PhysicalOperatorType::LIMIT:
124346
125564
  case PhysicalOperatorType::LIMIT_PERCENT:
125565
+ case PhysicalOperatorType::STREAMING_LIMIT:
124347
125566
  case PhysicalOperatorType::TOP_N:
124348
125567
  case PhysicalOperatorType::WINDOW:
124349
125568
  case PhysicalOperatorType::UNNEST:
@@ -124847,27 +126066,10 @@ unique_ptr<QueryProfiler::TreeNode> QueryProfiler::CreateTree(PhysicalOperator *
124847
126066
  node->extra_info = root->ParamsToString();
124848
126067
  node->depth = depth;
124849
126068
  tree_map[root] = node.get();
124850
- for (auto &child : root->children) {
124851
- auto child_node = CreateTree(child.get(), depth + 1);
124852
- node->children.push_back(move(child_node));
124853
- }
124854
- switch (root->type) {
124855
- case PhysicalOperatorType::DELIM_JOIN: {
124856
- auto &delim_join = (PhysicalDelimJoin &)*root;
124857
- auto child_node = CreateTree((PhysicalOperator *)delim_join.join.get(), depth + 1);
124858
- node->children.push_back(move(child_node));
124859
- child_node = CreateTree((PhysicalOperator *)delim_join.distinct.get(), depth + 1);
124860
- node->children.push_back(move(child_node));
124861
- break;
124862
- }
124863
- case PhysicalOperatorType::EXECUTE: {
124864
- auto &execute = (PhysicalExecute &)*root;
124865
- auto child_node = CreateTree((PhysicalOperator *)execute.plan, depth + 1);
126069
+ auto children = root->GetChildren();
126070
+ for (auto &child : children) {
126071
+ auto child_node = CreateTree(child, depth + 1);
124866
126072
  node->children.push_back(move(child_node));
124867
- break;
124868
- }
124869
- default:
124870
- break;
124871
126073
  }
124872
126074
  return node;
124873
126075
  }
@@ -128174,6 +129376,18 @@ Value PreserveIdentifierCase::GetSetting(ClientContext &context) {
128174
129376
  return Value::BOOLEAN(ClientConfig::GetConfig(context).preserve_identifier_case);
128175
129377
  }
128176
129378
 
129379
+ //===--------------------------------------------------------------------===//
129380
+ // PreserveInsertionOrder
129381
+ //===--------------------------------------------------------------------===//
129382
+ void PreserveInsertionOrder::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
129383
+ config.preserve_insertion_order = input.GetValue<bool>();
129384
+ }
129385
+
129386
+ Value PreserveInsertionOrder::GetSetting(ClientContext &context) {
129387
+ auto &config = DBConfig::GetConfig(context);
129388
+ return Value::BOOLEAN(config.preserve_insertion_order);
129389
+ }
129390
+
128177
129391
  //===--------------------------------------------------------------------===//
128178
129392
  // Profiler History Size
128179
129393
  //===--------------------------------------------------------------------===//
@@ -137610,11 +138824,6 @@ void Event::SetTasks(vector<unique_ptr<Task>> tasks) {
137610
138824
 
137611
138825
 
137612
138826
 
137613
-
137614
-
137615
-
137616
-
137617
-
137618
138827
  //===----------------------------------------------------------------------===//
137619
138828
  // DuckDB
137620
138829
  //
@@ -137709,7 +138918,9 @@ private:
137709
138918
  //! Whether or not the pipeline has been finalized (used for verification only)
137710
138919
  bool finalized = false;
137711
138920
  //! Whether or not the pipeline has finished processing
137712
- bool finished_processing = false;
138921
+ int32_t finished_processing_idx = -1;
138922
+ //! Whether or not this pipeline requires keeping track of the batch index of the source
138923
+ bool requires_batch_index = false;
137713
138924
 
137714
138925
  //! Cached chunks for any operators that require caching
137715
138926
  vector<unique_ptr<DataChunk>> cached_chunks;
@@ -137722,6 +138933,9 @@ private:
137722
138933
  void GoToSource(idx_t &current_idx, idx_t initial_idx);
137723
138934
  void FetchFromSource(DataChunk &result);
137724
138935
 
138936
+ void FinishProcessing(int32_t operator_idx = -1);
138937
+ bool IsFinished();
138938
+
137725
138939
  OperatorResultType ExecutePushInternal(DataChunk &input, idx_t initial_idx = 0);
137726
138940
  //! Pushes a chunk through the pipeline and returns a single result chunk
137727
138941
  //! Returns whether or not a new input chunk is needed, or whether or not we are finished
@@ -137822,6 +139036,8 @@ public:
137822
139036
  } // namespace duckdb
137823
139037
 
137824
139038
 
139039
+
139040
+
137825
139041
  #include <algorithm>
137826
139042
 
137827
139043
  namespace duckdb {
@@ -138062,8 +139278,18 @@ void Executor::VerifyPipelines() {
138062
139278
  #endif
138063
139279
  }
138064
139280
 
139281
+ void Executor::Initialize(unique_ptr<PhysicalOperator> physical_plan) {
139282
+ Reset();
139283
+ owned_plan = move(physical_plan);
139284
+ InitializeInternal(owned_plan.get());
139285
+ }
139286
+
138065
139287
  void Executor::Initialize(PhysicalOperator *plan) {
138066
139288
  Reset();
139289
+ InitializeInternal(plan);
139290
+ }
139291
+
139292
+ void Executor::InitializeInternal(PhysicalOperator *plan) {
138067
139293
 
138068
139294
  auto &scheduler = TaskScheduler::GetScheduler(context);
138069
139295
  {
@@ -138076,7 +139302,9 @@ void Executor::Initialize(PhysicalOperator *plan) {
138076
139302
 
138077
139303
  auto root_pipeline = make_shared<Pipeline>(*this);
138078
139304
  root_pipeline->sink = nullptr;
138079
- BuildPipelines(physical_plan, root_pipeline.get());
139305
+
139306
+ PipelineBuildState state;
139307
+ physical_plan->BuildPipelines(*this, *root_pipeline, state);
138080
139308
 
138081
139309
  this->total_pipelines = pipelines.size();
138082
139310
 
@@ -138187,9 +139415,8 @@ PendingExecutionResult Executor::ExecuteTask() {
138187
139415
 
138188
139416
  void Executor::Reset() {
138189
139417
  lock_guard<mutex> elock(executor_lock);
138190
- delim_join_dependencies.clear();
138191
- recursive_cte = nullptr;
138192
139418
  physical_plan = nullptr;
139419
+ owned_plan.reset();
138193
139420
  root_executor.reset();
138194
139421
  root_pipelines.clear();
138195
139422
  root_pipeline_idx = 0;
@@ -138230,235 +139457,6 @@ void Executor::AddChildPipeline(Pipeline *current) {
138230
139457
  child_pipelines[current].push_back(move(child_pipeline));
138231
139458
  }
138232
139459
 
138233
- void Executor::BuildPipelines(PhysicalOperator *op, Pipeline *current) {
138234
- D_ASSERT(current);
138235
- op->op_state.reset();
138236
- if (op->IsSink()) {
138237
- // operator is a sink, build a pipeline
138238
- op->sink_state.reset();
138239
-
138240
- PhysicalOperator *pipeline_child = nullptr;
138241
- switch (op->type) {
138242
- case PhysicalOperatorType::CREATE_TABLE_AS:
138243
- case PhysicalOperatorType::INSERT:
138244
- case PhysicalOperatorType::DELETE_OPERATOR:
138245
- case PhysicalOperatorType::UPDATE:
138246
- case PhysicalOperatorType::HASH_GROUP_BY:
138247
- case PhysicalOperatorType::SIMPLE_AGGREGATE:
138248
- case PhysicalOperatorType::PERFECT_HASH_GROUP_BY:
138249
- case PhysicalOperatorType::WINDOW:
138250
- case PhysicalOperatorType::ORDER_BY:
138251
- case PhysicalOperatorType::RESERVOIR_SAMPLE:
138252
- case PhysicalOperatorType::TOP_N:
138253
- case PhysicalOperatorType::COPY_TO_FILE:
138254
- case PhysicalOperatorType::LIMIT:
138255
- case PhysicalOperatorType::LIMIT_PERCENT:
138256
- case PhysicalOperatorType::EXPLAIN_ANALYZE:
138257
- D_ASSERT(op->children.size() == 1);
138258
- // single operator:
138259
- // the operator becomes the data source of the current pipeline
138260
- current->source = op;
138261
- // we create a new pipeline starting from the child
138262
- pipeline_child = op->children[0].get();
138263
- break;
138264
- case PhysicalOperatorType::EXPORT:
138265
- // EXPORT has an optional child
138266
- // we only need to schedule child pipelines if there is a child
138267
- current->source = op;
138268
- if (op->children.empty()) {
138269
- return;
138270
- }
138271
- D_ASSERT(op->children.size() == 1);
138272
- pipeline_child = op->children[0].get();
138273
- break;
138274
- case PhysicalOperatorType::NESTED_LOOP_JOIN:
138275
- case PhysicalOperatorType::BLOCKWISE_NL_JOIN:
138276
- case PhysicalOperatorType::HASH_JOIN:
138277
- case PhysicalOperatorType::PIECEWISE_MERGE_JOIN:
138278
- case PhysicalOperatorType::CROSS_PRODUCT:
138279
- // regular join, create a pipeline with RHS source that sinks into this pipeline
138280
- pipeline_child = op->children[1].get();
138281
- // on the LHS (probe child), the operator becomes a regular operator
138282
- current->operators.push_back(op);
138283
- if (op->IsSource()) {
138284
- // FULL or RIGHT outer join
138285
- // schedule a scan of the node as a child pipeline
138286
- // this scan has to be performed AFTER all the probing has happened
138287
- if (recursive_cte) {
138288
- throw NotImplementedException("FULL and RIGHT outer joins are not supported in recursive CTEs yet");
138289
- }
138290
- AddChildPipeline(current);
138291
- }
138292
- BuildPipelines(op->children[0].get(), current);
138293
- break;
138294
- case PhysicalOperatorType::IE_JOIN: {
138295
- D_ASSERT(op->children.size() == 2);
138296
- if (recursive_cte) {
138297
- throw NotImplementedException("IEJoins are not supported in recursive CTEs yet");
138298
- }
138299
-
138300
- // Build the LHS
138301
- auto lhs_pipeline = make_shared<Pipeline>(*this);
138302
- lhs_pipeline->sink = op;
138303
- D_ASSERT(op->children[0].get());
138304
- BuildPipelines(op->children[0].get(), lhs_pipeline.get());
138305
-
138306
- // Build the RHS
138307
- auto rhs_pipeline = make_shared<Pipeline>(*this);
138308
- rhs_pipeline->sink = op;
138309
- D_ASSERT(op->children[1].get());
138310
- BuildPipelines(op->children[1].get(), rhs_pipeline.get());
138311
-
138312
- // RHS => LHS => current
138313
- current->AddDependency(rhs_pipeline);
138314
- rhs_pipeline->AddDependency(lhs_pipeline);
138315
-
138316
- pipelines.emplace_back(move(lhs_pipeline));
138317
- pipelines.emplace_back(move(rhs_pipeline));
138318
-
138319
- // Now build both and scan
138320
- current->source = op;
138321
- return;
138322
- }
138323
- case PhysicalOperatorType::DELIM_JOIN: {
138324
- // duplicate eliminated join
138325
- // for delim joins, recurse into the actual join
138326
- pipeline_child = op->children[0].get();
138327
- break;
138328
- }
138329
- case PhysicalOperatorType::RECURSIVE_CTE: {
138330
- auto &cte_node = (PhysicalRecursiveCTE &)*op;
138331
-
138332
- // recursive CTE
138333
- current->source = op;
138334
- // the LHS of the recursive CTE is our initial state
138335
- // we build this pipeline as normal
138336
- pipeline_child = op->children[0].get();
138337
- // for the RHS, we gather all pipelines that depend on the recursive cte
138338
- // these pipelines need to be rerun
138339
- if (recursive_cte) {
138340
- throw InternalException("Recursive CTE detected WITHIN a recursive CTE node");
138341
- }
138342
- recursive_cte = op;
138343
-
138344
- auto recursive_pipeline = make_shared<Pipeline>(*this);
138345
- recursive_pipeline->sink = op;
138346
- op->sink_state.reset();
138347
- BuildPipelines(op->children[1].get(), recursive_pipeline.get());
138348
-
138349
- cte_node.pipelines.push_back(move(recursive_pipeline));
138350
-
138351
- recursive_cte = nullptr;
138352
- break;
138353
- }
138354
- default:
138355
- throw InternalException("Unimplemented sink type!");
138356
- }
138357
- // the current is dependent on this pipeline to complete
138358
- auto pipeline = make_shared<Pipeline>(*this);
138359
- pipeline->sink = op;
138360
- current->AddDependency(pipeline);
138361
- D_ASSERT(pipeline_child);
138362
- // recurse into the pipeline child
138363
- BuildPipelines(pipeline_child, pipeline.get());
138364
- if (op->type == PhysicalOperatorType::DELIM_JOIN) {
138365
- // for delim joins, recurse into the actual join
138366
- // any pipelines in there depend on the main pipeline
138367
- auto &delim_join = (PhysicalDelimJoin &)*op;
138368
- // any scan of the duplicate eliminated data on the RHS depends on this pipeline
138369
- // we add an entry to the mapping of (PhysicalOperator*) -> (Pipeline*)
138370
- for (auto &delim_scan : delim_join.delim_scans) {
138371
- delim_join_dependencies[delim_scan] = pipeline.get();
138372
- }
138373
- BuildPipelines(delim_join.join.get(), current);
138374
- }
138375
- if (!recursive_cte) {
138376
- // regular pipeline: schedule it
138377
- pipelines.push_back(move(pipeline));
138378
- } else {
138379
- // CTE pipeline! add it to the CTE pipelines
138380
- D_ASSERT(recursive_cte);
138381
- auto &cte = (PhysicalRecursiveCTE &)*recursive_cte;
138382
- cte.pipelines.push_back(move(pipeline));
138383
- }
138384
- } else {
138385
- // operator is not a sink! recurse in children
138386
- // first check if there is any additional action we need to do depending on the type
138387
- switch (op->type) {
138388
- case PhysicalOperatorType::DELIM_SCAN: {
138389
- D_ASSERT(op->children.empty());
138390
- auto entry = delim_join_dependencies.find(op);
138391
- D_ASSERT(entry != delim_join_dependencies.end());
138392
- // this chunk scan introduces a dependency to the current pipeline
138393
- // namely a dependency on the duplicate elimination pipeline to finish
138394
- auto delim_dependency = entry->second->shared_from_this();
138395
- D_ASSERT(delim_dependency->sink->type == PhysicalOperatorType::DELIM_JOIN);
138396
- auto &delim_join = (PhysicalDelimJoin &)*delim_dependency->sink;
138397
- current->AddDependency(delim_dependency);
138398
- current->source = (PhysicalOperator *)delim_join.distinct.get();
138399
- return;
138400
- }
138401
- case PhysicalOperatorType::EXECUTE: {
138402
- // EXECUTE statement: build pipeline on child
138403
- auto &execute = (PhysicalExecute &)*op;
138404
- BuildPipelines(execute.plan, current);
138405
- return;
138406
- }
138407
- case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
138408
- if (!recursive_cte) {
138409
- throw InternalException("Recursive CTE scan found without recursive CTE node");
138410
- }
138411
- break;
138412
- }
138413
- case PhysicalOperatorType::INDEX_JOIN: {
138414
- // index join: we only continue into the LHS
138415
- // the right side is probed by the index join
138416
- // so we don't need to do anything in the pipeline with this child
138417
- current->operators.push_back(op);
138418
- BuildPipelines(op->children[0].get(), current);
138419
- return;
138420
- }
138421
- case PhysicalOperatorType::UNION: {
138422
- if (recursive_cte) {
138423
- throw NotImplementedException("UNIONS are not supported in recursive CTEs yet");
138424
- }
138425
- auto union_pipeline = make_shared<Pipeline>(*this);
138426
- auto pipeline_ptr = union_pipeline.get();
138427
- // set up dependencies for any child pipelines to this union pipeline
138428
- auto child_entry = child_pipelines.find(current);
138429
- if (child_entry != child_pipelines.end()) {
138430
- for (auto &current_child : child_entry->second) {
138431
- D_ASSERT(child_dependencies.find(current_child.get()) != child_dependencies.end());
138432
- child_dependencies[current_child.get()].push_back(pipeline_ptr);
138433
- }
138434
- }
138435
- // for the current pipeline, continue building on the LHS
138436
- union_pipeline->operators = current->operators;
138437
- BuildPipelines(op->children[0].get(), current);
138438
- // insert the union pipeline as a union pipeline of the current node
138439
- union_pipelines[current].push_back(move(union_pipeline));
138440
-
138441
- // for the union pipeline, build on the RHS
138442
- pipeline_ptr->sink = current->sink;
138443
- BuildPipelines(op->children[1].get(), pipeline_ptr);
138444
- return;
138445
- }
138446
- default:
138447
- break;
138448
- }
138449
- if (op->children.empty()) {
138450
- // source
138451
- current->source = op;
138452
- } else {
138453
- if (op->children.size() != 1) {
138454
- throw InternalException("Operator not supported yet");
138455
- }
138456
- current->operators.push_back(op);
138457
- BuildPipelines(op->children[0].get(), current);
138458
- }
138459
- }
138460
- }
138461
-
138462
139460
  vector<LogicalType> Executor::GetTypes() {
138463
139461
  D_ASSERT(physical_plan);
138464
139462
  return physical_plan->GetTypes();
@@ -138518,14 +139516,37 @@ void Executor::Flush(ThreadContext &tcontext) {
138518
139516
  bool Executor::GetPipelinesProgress(double &current_progress) { // LCOV_EXCL_START
138519
139517
  lock_guard<mutex> elock(executor_lock);
138520
139518
 
138521
- if (!pipelines.empty()) {
138522
- return pipelines.back()->GetProgress(current_progress);
138523
- } else {
138524
- current_progress = -1;
138525
- return true;
139519
+ vector<double> progress;
139520
+ vector<idx_t> cardinality;
139521
+ idx_t total_cardinality = 0;
139522
+ for (auto &pipeline : pipelines) {
139523
+ double child_percentage;
139524
+ idx_t child_cardinality;
139525
+ if (!pipeline->GetProgress(child_percentage, child_cardinality)) {
139526
+ return false;
139527
+ }
139528
+ progress.push_back(child_percentage);
139529
+ cardinality.push_back(child_cardinality);
139530
+ total_cardinality += child_cardinality;
138526
139531
  }
139532
+ current_progress = 0;
139533
+ for (size_t i = 0; i < progress.size(); i++) {
139534
+ current_progress += progress[i] * double(cardinality[i]) / double(total_cardinality);
139535
+ }
139536
+ return true;
138527
139537
  } // LCOV_EXCL_STOP
138528
139538
 
139539
+ bool Executor::HasResultCollector() {
139540
+ return physical_plan->type == PhysicalOperatorType::RESULT_COLLECTOR;
139541
+ }
139542
+
139543
+ unique_ptr<QueryResult> Executor::GetResult() {
139544
+ D_ASSERT(HasResultCollector());
139545
+ auto &result_collector = (PhysicalResultCollector &)*physical_plan;
139546
+ D_ASSERT(result_collector.sink_state);
139547
+ return result_collector.GetResult(*result_collector.sink_state);
139548
+ }
139549
+
138529
139550
  unique_ptr<DataChunk> Executor::FetchChunk() {
138530
139551
  D_ASSERT(physical_plan);
138531
139552
 
@@ -138591,10 +139612,6 @@ TaskExecutionResult ExecutorTask::Execute(TaskExecutionMode mode) {
138591
139612
 
138592
139613
 
138593
139614
 
138594
-
138595
-
138596
-
138597
-
138598
139615
 
138599
139616
  namespace duckdb {
138600
139617
 
@@ -138636,50 +139653,13 @@ ClientContext &Pipeline::GetClientContext() {
138636
139653
  return executor.context;
138637
139654
  }
138638
139655
 
138639
- // LCOV_EXCL_START
138640
- bool Pipeline::GetProgressInternal(ClientContext &context, PhysicalOperator *op, double &current_percentage) {
138641
- current_percentage = -1;
138642
- switch (op->type) {
138643
- case PhysicalOperatorType::TABLE_SCAN: {
138644
- auto &get = (PhysicalTableScan &)*op;
138645
- if (get.function.table_scan_progress) {
138646
- current_percentage = get.function.table_scan_progress(context, get.bind_data.get());
138647
- return true;
138648
- }
138649
- // If the table_scan_progress is not implemented it means we don't support this function yet in the progress
138650
- // bar
138651
- return false;
138652
- }
138653
- // If it is not a table scan we go down on all children until we reach the leaf operators
138654
- default: {
138655
- vector<idx_t> progress;
138656
- vector<idx_t> cardinality;
138657
- double total_cardinality = 0;
138658
- current_percentage = 0;
138659
- for (auto &op_child : op->children) {
138660
- double child_percentage = 0;
138661
- if (!GetProgressInternal(context, op_child.get(), child_percentage)) {
138662
- return false;
138663
- }
138664
- if (!Value::DoubleIsFinite(child_percentage)) {
138665
- return false;
138666
- }
138667
- progress.push_back(child_percentage);
138668
- cardinality.push_back(op_child->estimated_cardinality);
138669
- total_cardinality += op_child->estimated_cardinality;
138670
- }
138671
- for (size_t i = 0; i < progress.size(); i++) {
138672
- current_percentage += progress[i] * cardinality[i] / total_cardinality;
138673
- }
138674
- return true;
138675
- }
138676
- }
138677
- }
138678
- // LCOV_EXCL_STOP
139656
+ bool Pipeline::GetProgress(double &current_percentage, idx_t &source_cardinality) {
139657
+ D_ASSERT(source);
138679
139658
 
138680
- bool Pipeline::GetProgress(double &current_percentage) {
138681
139659
  auto &client = executor.context;
138682
- return GetProgressInternal(client, source, current_percentage);
139660
+ current_percentage = source->GetProgress(client, *source_state);
139661
+ source_cardinality = source->estimated_cardinality;
139662
+ return current_percentage >= 0;
138683
139663
  }
138684
139664
 
138685
139665
  void Pipeline::ScheduleSequentialTask(shared_ptr<Event> &event) {
@@ -138689,6 +139669,7 @@ void Pipeline::ScheduleSequentialTask(shared_ptr<Event> &event) {
138689
139669
  }
138690
139670
 
138691
139671
  bool Pipeline::ScheduleParallel(shared_ptr<Event> &event) {
139672
+ // check if the sink, source and all intermediate operators support parallelism
138692
139673
  if (!sink->ParallelSink()) {
138693
139674
  return false;
138694
139675
  }
@@ -138700,10 +139681,35 @@ bool Pipeline::ScheduleParallel(shared_ptr<Event> &event) {
138700
139681
  return false;
138701
139682
  }
138702
139683
  }
139684
+ if (sink->RequiresBatchIndex()) {
139685
+ if (!source->SupportsBatchIndex()) {
139686
+ throw InternalException(
139687
+ "Attempting to schedule a pipeline where the sink requires batch index but source does not support it");
139688
+ }
139689
+ }
138703
139690
  idx_t max_threads = source_state->MaxThreads();
138704
139691
  return LaunchScanTasks(event, max_threads);
138705
139692
  }
138706
139693
 
139694
+ bool Pipeline::IsOrderDependent() const {
139695
+ auto &config = DBConfig::GetConfig(executor.context);
139696
+ if (!config.preserve_insertion_order) {
139697
+ return false;
139698
+ }
139699
+ if (sink && sink->IsOrderDependent()) {
139700
+ return true;
139701
+ }
139702
+ if (source->IsOrderDependent()) {
139703
+ return true;
139704
+ }
139705
+ for (auto &op : operators) {
139706
+ if (op->IsOrderDependent()) {
139707
+ return true;
139708
+ }
139709
+ }
139710
+ return false;
139711
+ }
139712
+
138707
139713
  void Pipeline::Schedule(shared_ptr<Event> &event) {
138708
139714
  D_ASSERT(ready);
138709
139715
  D_ASSERT(sink);
@@ -138801,6 +139807,59 @@ vector<PhysicalOperator *> Pipeline::GetOperators() const {
138801
139807
  return result;
138802
139808
  }
138803
139809
 
139810
+ //===--------------------------------------------------------------------===//
139811
+ // Pipeline Build State
139812
+ //===--------------------------------------------------------------------===//
139813
+ void PipelineBuildState::SetPipelineSource(Pipeline &pipeline, PhysicalOperator *op) {
139814
+ pipeline.source = op;
139815
+ }
139816
+
139817
+ void PipelineBuildState::SetPipelineSink(Pipeline &pipeline, PhysicalOperator *op) {
139818
+ pipeline.sink = op;
139819
+ // set the base batch index of this pipeline based on how many other pipelines have this node as their sink
139820
+ pipeline.base_batch_index = BATCH_INCREMENT * sink_pipeline_count[op];
139821
+ // increment the number of nodes that have this pipeline as their sink
139822
+ sink_pipeline_count[op]++;
139823
+ }
139824
+
139825
+ void PipelineBuildState::AddPipelineOperator(Pipeline &pipeline, PhysicalOperator *op) {
139826
+ pipeline.operators.push_back(op);
139827
+ }
139828
+
139829
+ void PipelineBuildState::AddPipeline(Executor &executor, shared_ptr<Pipeline> pipeline) {
139830
+ executor.pipelines.push_back(move(pipeline));
139831
+ }
139832
+
139833
+ PhysicalOperator *PipelineBuildState::GetPipelineSource(Pipeline &pipeline) {
139834
+ return pipeline.source;
139835
+ }
139836
+
139837
+ PhysicalOperator *PipelineBuildState::GetPipelineSink(Pipeline &pipeline) {
139838
+ return pipeline.sink;
139839
+ }
139840
+
139841
+ void PipelineBuildState::SetPipelineOperators(Pipeline &pipeline, vector<PhysicalOperator *> operators) {
139842
+ pipeline.operators = move(operators);
139843
+ }
139844
+
139845
+ void PipelineBuildState::AddChildPipeline(Executor &executor, Pipeline &pipeline) {
139846
+ executor.AddChildPipeline(&pipeline);
139847
+ }
139848
+
139849
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &PipelineBuildState::GetUnionPipelines(Executor &executor) {
139850
+ return executor.union_pipelines;
139851
+ }
139852
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &PipelineBuildState::GetChildPipelines(Executor &executor) {
139853
+ return executor.child_pipelines;
139854
+ }
139855
+ unordered_map<Pipeline *, vector<Pipeline *>> &PipelineBuildState::GetChildDependencies(Executor &executor) {
139856
+ return executor.child_dependencies;
139857
+ }
139858
+
139859
+ vector<PhysicalOperator *> PipelineBuildState::GetPipelineOperators(Pipeline &pipeline) {
139860
+ return pipeline.operators;
139861
+ }
139862
+
138804
139863
  } // namespace duckdb
138805
139864
 
138806
139865
 
@@ -138842,6 +139901,7 @@ void PipelineEvent::FinishEvent() {
138842
139901
 
138843
139902
 
138844
139903
 
139904
+
138845
139905
  namespace duckdb {
138846
139906
 
138847
139907
  PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_p)
@@ -138850,7 +139910,9 @@ PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_
138850
139910
  local_source_state = pipeline.source->GetLocalSourceState(context, *pipeline.source_state);
138851
139911
  if (pipeline.sink) {
138852
139912
  local_sink_state = pipeline.sink->GetLocalSinkState(context);
139913
+ requires_batch_index = pipeline.sink->RequiresBatchIndex() && pipeline.source->SupportsBatchIndex();
138853
139914
  }
139915
+ bool can_cache_in_pipeline = pipeline.sink && !pipeline.IsOrderDependent() && !requires_batch_index;
138854
139916
  intermediate_chunks.reserve(pipeline.operators.size());
138855
139917
  intermediate_states.reserve(pipeline.operators.size());
138856
139918
  cached_chunks.resize(pipeline.operators.size());
@@ -138861,7 +139923,7 @@ PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_
138861
139923
  chunk->Initialize(prev_operator->GetTypes());
138862
139924
  intermediate_chunks.push_back(move(chunk));
138863
139925
  intermediate_states.push_back(current_operator->GetOperatorState(context.client));
138864
- if (pipeline.sink && !pipeline.sink->SinkOrderMatters() && current_operator->RequiresCache()) {
139926
+ if (can_cache_in_pipeline && current_operator->RequiresCache()) {
138865
139927
  auto &cache_types = current_operator->GetTypes();
138866
139928
  bool can_cache = true;
138867
139929
  for (auto &type : cache_types) {
@@ -138879,7 +139941,7 @@ PipelineExecutor::PipelineExecutor(ClientContext &context_p, Pipeline &pipeline_
138879
139941
  if (current_operator->IsSink() && current_operator->sink_state->state == SinkFinalizeType::NO_OUTPUT_POSSIBLE) {
138880
139942
  // one of the operators has already figured out no output is possible
138881
139943
  // we can skip executing the pipeline
138882
- finished_processing = true;
139944
+ FinishProcessing();
138883
139945
  }
138884
139946
  }
138885
139947
  InitializeChunk(final_chunk);
@@ -138890,7 +139952,7 @@ bool PipelineExecutor::Execute(idx_t max_chunks) {
138890
139952
  bool exhausted_source = false;
138891
139953
  auto &source_chunk = pipeline.operators.empty() ? final_chunk : *intermediate_chunks[0];
138892
139954
  for (idx_t i = 0; i < max_chunks; i++) {
138893
- if (finished_processing) {
139955
+ if (IsFinished()) {
138894
139956
  break;
138895
139957
  }
138896
139958
  source_chunk.Reset();
@@ -138901,11 +139963,11 @@ bool PipelineExecutor::Execute(idx_t max_chunks) {
138901
139963
  }
138902
139964
  auto result = ExecutePushInternal(source_chunk);
138903
139965
  if (result == OperatorResultType::FINISHED) {
138904
- finished_processing = true;
139966
+ D_ASSERT(IsFinished());
138905
139967
  break;
138906
139968
  }
138907
139969
  }
138908
- if (!exhausted_source && !finished_processing) {
139970
+ if (!exhausted_source && !IsFinished()) {
138909
139971
  return false;
138910
139972
  }
138911
139973
  PushFinalize();
@@ -138920,6 +139982,15 @@ OperatorResultType PipelineExecutor::ExecutePush(DataChunk &input) { // LCOV_EXC
138920
139982
  return ExecutePushInternal(input);
138921
139983
  } // LCOV_EXCL_STOP
138922
139984
 
139985
+ void PipelineExecutor::FinishProcessing(int32_t operator_idx) {
139986
+ finished_processing_idx = operator_idx < 0 ? NumericLimits<int32_t>::Maximum() : operator_idx;
139987
+ in_process_operators = stack<idx_t>();
139988
+ }
139989
+
139990
+ bool PipelineExecutor::IsFinished() {
139991
+ return finished_processing_idx >= 0;
139992
+ }
139993
+
138923
139994
  OperatorResultType PipelineExecutor::ExecutePushInternal(DataChunk &input, idx_t initial_idx) {
138924
139995
  D_ASSERT(pipeline.sink);
138925
139996
  if (input.size() == 0) { // LCOV_EXCL_START
@@ -138944,6 +140015,7 @@ OperatorResultType PipelineExecutor::ExecutePushInternal(DataChunk &input, idx_t
138944
140015
  auto sink_result = pipeline.sink->Sink(context, *pipeline.sink->sink_state, *local_sink_state, sink_chunk);
138945
140016
  EndOperator(pipeline.sink, nullptr);
138946
140017
  if (sink_result == SinkResultType::FINISHED) {
140018
+ FinishProcessing();
138947
140019
  return OperatorResultType::FINISHED;
138948
140020
  }
138949
140021
  }
@@ -138951,7 +140023,6 @@ OperatorResultType PipelineExecutor::ExecutePushInternal(DataChunk &input, idx_t
138951
140023
  return OperatorResultType::NEED_MORE_INPUT;
138952
140024
  }
138953
140025
  }
138954
- return OperatorResultType::FINISHED;
138955
140026
  }
138956
140027
 
138957
140028
  void PipelineExecutor::PushFinalize() {
@@ -138960,13 +140031,15 @@ void PipelineExecutor::PushFinalize() {
138960
140031
  }
138961
140032
  finalized = true;
138962
140033
  // flush all caches
138963
- if (!finished_processing) {
138964
- D_ASSERT(in_process_operators.empty());
138965
- for (idx_t i = 0; i < cached_chunks.size(); i++) {
138966
- if (cached_chunks[i] && cached_chunks[i]->size() > 0) {
138967
- ExecutePushInternal(*cached_chunks[i], i + 1);
138968
- cached_chunks[i].reset();
138969
- }
140034
+ // note that even if an operator has finished, we might still need to flush caches AFTER that operator
140035
+ // e.g. if we have SOURCE -> LIMIT -> CROSS_PRODUCT -> SINK, if the LIMIT reports no more rows will be passed on
140036
+ // we still need to flush caches from the CROSS_PRODUCT
140037
+ D_ASSERT(in_process_operators.empty());
140038
+ idx_t start_idx = IsFinished() ? idx_t(finished_processing_idx) : 0;
140039
+ for (idx_t i = start_idx; i < cached_chunks.size(); i++) {
140040
+ if (cached_chunks[i] && cached_chunks[i]->size() > 0) {
140041
+ ExecutePushInternal(*cached_chunks[i], i + 1);
140042
+ cached_chunks[i].reset();
138970
140043
  }
138971
140044
  }
138972
140045
  D_ASSERT(local_sink_state);
@@ -139022,7 +140095,7 @@ void PipelineExecutor::CacheChunk(DataChunk &current_chunk, idx_t operator_idx)
139022
140095
  }
139023
140096
 
139024
140097
  void PipelineExecutor::ExecutePull(DataChunk &result) {
139025
- if (finished_processing) {
140098
+ if (IsFinished()) {
139026
140099
  return;
139027
140100
  }
139028
140101
  auto &executor = pipeline.executor;
@@ -139038,7 +140111,10 @@ void PipelineExecutor::ExecutePull(DataChunk &result) {
139038
140111
  }
139039
140112
  }
139040
140113
  if (!pipeline.operators.empty()) {
139041
- Execute(source_chunk, result);
140114
+ auto state = Execute(source_chunk, result);
140115
+ if (state == OperatorResultType::FINISHED) {
140116
+ break;
140117
+ }
139042
140118
  }
139043
140119
  }
139044
140120
  } catch (std::exception &ex) { // LCOV_EXCL_START
@@ -139122,6 +140198,7 @@ OperatorResultType PipelineExecutor::Execute(DataChunk &input, DataChunk &result
139122
140198
  in_process_operators.push(current_idx);
139123
140199
  } else if (result == OperatorResultType::FINISHED) {
139124
140200
  D_ASSERT(current_chunk.size() == 0);
140201
+ FinishProcessing(current_idx);
139125
140202
  return OperatorResultType::FINISHED;
139126
140203
  }
139127
140204
  current_chunk.Verify();
@@ -139155,6 +140232,14 @@ OperatorResultType PipelineExecutor::Execute(DataChunk &input, DataChunk &result
139155
140232
  void PipelineExecutor::FetchFromSource(DataChunk &result) {
139156
140233
  StartOperator(pipeline.source);
139157
140234
  pipeline.source->GetData(context, result, *pipeline.source_state, *local_source_state);
140235
+ if (result.size() != 0 && requires_batch_index) {
140236
+ auto next_batch_index =
140237
+ pipeline.source->GetBatchIndex(context, result, *pipeline.source_state, *local_source_state);
140238
+ next_batch_index += pipeline.base_batch_index;
140239
+ D_ASSERT(local_sink_state->batch_index <= next_batch_index ||
140240
+ local_sink_state->batch_index == DConstants::INVALID_INDEX);
140241
+ local_sink_state->batch_index = next_batch_index;
140242
+ }
139158
140243
  EndOperator(pipeline.source, &result);
139159
140244
  }
139160
140245
 
@@ -144914,6 +145999,11 @@ bool WindowExpression::Equals(const WindowExpression *a, const WindowExpression
144914
145999
  return false;
144915
146000
  }
144916
146001
  }
146002
+ // check if the filter clauses are equivalent
146003
+ if (!BaseExpression::Equals(a->filter_expr.get(), b->filter_expr.get())) {
146004
+ return false;
146005
+ }
146006
+
144917
146007
  return true;
144918
146008
  }
144919
146009
 
@@ -144933,6 +146023,8 @@ unique_ptr<ParsedExpression> WindowExpression::Copy() const {
144933
146023
  new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
144934
146024
  }
144935
146025
 
146026
+ new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
146027
+
144936
146028
  new_window->start = start;
144937
146029
  new_window->end = end;
144938
146030
  new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
@@ -144965,6 +146057,7 @@ void WindowExpression::Serialize(FieldWriter &writer) const {
144965
146057
  writer.WriteOptional(offset_expr);
144966
146058
  writer.WriteOptional(default_expr);
144967
146059
  writer.WriteField<bool>(ignore_nulls);
146060
+ writer.WriteOptional(filter_expr);
144968
146061
  }
144969
146062
 
144970
146063
  unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type, FieldReader &reader) {
@@ -144987,6 +146080,7 @@ unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type,
144987
146080
  expr->offset_expr = reader.ReadOptional<ParsedExpression>(nullptr);
144988
146081
  expr->default_expr = reader.ReadOptional<ParsedExpression>(nullptr);
144989
146082
  expr->ignore_nulls = reader.ReadRequired<bool>();
146083
+ expr->filter_expr = reader.ReadOptional<ParsedExpression>(nullptr);
144990
146084
  return move(expr);
144991
146085
  }
144992
146086
 
@@ -145849,6 +146943,9 @@ void ParsedExpressionIterator::EnumerateChildren(
145849
146943
  for (auto &child : window_expr.children) {
145850
146944
  callback(child);
145851
146945
  }
146946
+ if (window_expr.filter_expr) {
146947
+ callback(window_expr.filter_expr);
146948
+ }
145852
146949
  if (window_expr.start_expr) {
145853
146950
  callback(window_expr.start_expr);
145854
146951
  }
@@ -154435,6 +155532,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
154435
155532
  auto lowercase_name = StringUtil::Lower(function_name);
154436
155533
 
154437
155534
  if (root->over) {
155535
+ const auto win_fun_type = WindowToExpressionType(lowercase_name);
155536
+ if (win_fun_type == ExpressionType::INVALID) {
155537
+ throw InternalException("Unknown/unsupported window function");
155538
+ }
155539
+
154438
155540
  if (root->agg_distinct) {
154439
155541
  throw ParserException("DISTINCT is not implemented for window functions!");
154440
155542
  }
@@ -154443,18 +155545,13 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
154443
155545
  throw ParserException("ORDER BY is not implemented for window functions!");
154444
155546
  }
154445
155547
 
154446
- if (root->agg_filter) {
154447
- throw ParserException("FILTER is not implemented for window functions!");
155548
+ if (win_fun_type != ExpressionType::WINDOW_AGGREGATE && root->agg_filter) {
155549
+ throw ParserException("FILTER is not implemented for non-aggregate window functions!");
154448
155550
  }
154449
155551
  if (root->export_state) {
154450
155552
  throw ParserException("EXPORT_STATE is not supported for window functions!");
154451
155553
  }
154452
155554
 
154453
- const auto win_fun_type = WindowToExpressionType(lowercase_name);
154454
- if (win_fun_type == ExpressionType::INVALID) {
154455
- throw InternalException("Unknown/unsupported window function");
154456
- }
154457
-
154458
155555
  if (win_fun_type == ExpressionType::WINDOW_AGGREGATE && root->agg_ignore_nulls) {
154459
155556
  throw ParserException("IGNORE NULLS is not supported for windowed aggregates");
154460
155557
  }
@@ -154462,6 +155559,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
154462
155559
  auto expr = make_unique<WindowExpression>(win_fun_type, schema, lowercase_name);
154463
155560
  expr->ignore_nulls = root->agg_ignore_nulls;
154464
155561
 
155562
+ if (root->agg_filter) {
155563
+ auto filter_expr = TransformExpression(root->agg_filter);
155564
+ expr->filter_expr = move(filter_expr);
155565
+ }
155566
+
154465
155567
  if (root->args) {
154466
155568
  vector<unique_ptr<ParsedExpression>> function_list;
154467
155569
  TransformExpressionList(*root->args, function_list);
@@ -160716,6 +161818,7 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
160716
161818
  for (auto &order : window.orders) {
160717
161819
  BindChild(order.expression, depth, error);
160718
161820
  }
161821
+ BindChild(window.filter_expr, depth, error);
160719
161822
  BindChild(window.start_expr, depth, error);
160720
161823
  BindChild(window.end_expr, depth, error);
160721
161824
  BindChild(window.offset_expr, depth, error);
@@ -160849,6 +161952,8 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
160849
161952
  result->orders.emplace_back(type, null_order, move(expression));
160850
161953
  }
160851
161954
 
161955
+ result->filter_expr = CastWindowExpression(window.filter_expr, LogicalType::BOOLEAN);
161956
+
160852
161957
  result->start_expr = CastWindowExpression(window.start_expr, start_type);
160853
161958
  result->end_expr = CastWindowExpression(window.end_expr, end_type);
160854
161959
  result->offset_expr = CastWindowExpression(window.offset_expr, LogicalType::BIGINT);
@@ -167153,6 +168258,11 @@ bool BoundWindowExpression::Equals(const BaseExpression *other_p) const {
167153
168258
  return false;
167154
168259
  }
167155
168260
  }
168261
+ // check if the filter expressions are equivalent
168262
+ if (!Expression::Equals(filter_expr.get(), other->filter_expr.get())) {
168263
+ return false;
168264
+ }
168265
+
167156
168266
  // check if the framing expressions are equivalent
167157
168267
  if (!Expression::Equals(start_expr.get(), other->start_expr.get()) ||
167158
168268
  !Expression::Equals(end_expr.get(), other->end_expr.get()) ||
@@ -167217,6 +168327,8 @@ unique_ptr<Expression> BoundWindowExpression::Copy() {
167217
168327
  new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
167218
168328
  }
167219
168329
 
168330
+ new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
168331
+
167220
168332
  new_window->start = start;
167221
168333
  new_window->end = end;
167222
168334
  new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
@@ -168457,6 +169569,9 @@ void ExpressionIterator::EnumerateChildren(Expression &expr,
168457
169569
  for (auto &child : window_expr.children) {
168458
169570
  callback(child);
168459
169571
  }
169572
+ if (window_expr.filter_expr) {
169573
+ callback(window_expr.filter_expr);
169574
+ }
168460
169575
  if (window_expr.start_expr) {
168461
169576
  callback(window_expr.start_expr);
168462
169577
  }