duckdb 0.3.5-dev725.0 → 0.3.5-dev750.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -18499,6 +18499,13 @@ DUCKDB_API bool TryCastToTimestampMS::Operation(string_t input, timestamp_t &res
18499
18499
  template <>
18500
18500
  DUCKDB_API bool TryCastToTimestampSec::Operation(string_t input, timestamp_t &result, bool strict);
18501
18501
 
18502
+ template <>
18503
+ DUCKDB_API bool TryCastToTimestampNS::Operation(date_t input, timestamp_t &result, bool strict);
18504
+ template <>
18505
+ DUCKDB_API bool TryCastToTimestampMS::Operation(date_t input, timestamp_t &result, bool strict);
18506
+ template <>
18507
+ DUCKDB_API bool TryCastToTimestampSec::Operation(date_t input, timestamp_t &result, bool strict);
18508
+
18502
18509
  //===--------------------------------------------------------------------===//
18503
18510
  // Non-Standard Timestamps -> string/standard timestamp
18504
18511
  //===--------------------------------------------------------------------===//
@@ -19528,6 +19535,108 @@ string_t StringCastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_
19528
19535
 
19529
19536
  } // namespace duckdb
19530
19537
 
19538
+ //===----------------------------------------------------------------------===//
19539
+ // DuckDB
19540
+ //
19541
+ // duckdb/common/operator/multiply.hpp
19542
+ //
19543
+ //
19544
+ //===----------------------------------------------------------------------===//
19545
+
19546
+
19547
+
19548
+
19549
+
19550
+
19551
+ namespace duckdb {
19552
+
19553
+ struct MultiplyOperator {
19554
+ template <class TA, class TB, class TR>
19555
+ static inline TR Operation(TA left, TB right) {
19556
+ return left * right;
19557
+ }
19558
+ };
19559
+
19560
+ template <>
19561
+ float MultiplyOperator::Operation(float left, float right);
19562
+ template <>
19563
+ double MultiplyOperator::Operation(double left, double right);
19564
+ template <>
19565
+ interval_t MultiplyOperator::Operation(interval_t left, int64_t right);
19566
+ template <>
19567
+ interval_t MultiplyOperator::Operation(int64_t left, interval_t right);
19568
+
19569
+ struct TryMultiplyOperator {
19570
+ template <class TA, class TB, class TR>
19571
+ static inline bool Operation(TA left, TB right, TR &result) {
19572
+ throw InternalException("Unimplemented type for TryMultiplyOperator");
19573
+ }
19574
+ };
19575
+
19576
+ template <>
19577
+ bool TryMultiplyOperator::Operation(uint8_t left, uint8_t right, uint8_t &result);
19578
+ template <>
19579
+ bool TryMultiplyOperator::Operation(uint16_t left, uint16_t right, uint16_t &result);
19580
+ template <>
19581
+ bool TryMultiplyOperator::Operation(uint32_t left, uint32_t right, uint32_t &result);
19582
+ template <>
19583
+ bool TryMultiplyOperator::Operation(uint64_t left, uint64_t right, uint64_t &result);
19584
+
19585
+ template <>
19586
+ bool TryMultiplyOperator::Operation(int8_t left, int8_t right, int8_t &result);
19587
+ template <>
19588
+ bool TryMultiplyOperator::Operation(int16_t left, int16_t right, int16_t &result);
19589
+ template <>
19590
+ bool TryMultiplyOperator::Operation(int32_t left, int32_t right, int32_t &result);
19591
+ template <>
19592
+ DUCKDB_API bool TryMultiplyOperator::Operation(int64_t left, int64_t right, int64_t &result);
19593
+
19594
+ struct MultiplyOperatorOverflowCheck {
19595
+ template <class TA, class TB, class TR>
19596
+ static inline TR Operation(TA left, TB right) {
19597
+ TR result;
19598
+ if (!TryMultiplyOperator::Operation(left, right, result)) {
19599
+ throw OutOfRangeException("Overflow in multiplication of %s (%d * %d)!", TypeIdToString(GetTypeId<TA>()),
19600
+ left, right);
19601
+ }
19602
+ return result;
19603
+ }
19604
+ };
19605
+
19606
+ struct TryDecimalMultiply {
19607
+ template <class TA, class TB, class TR>
19608
+ static inline bool Operation(TA left, TB right, TR &result) {
19609
+ throw InternalException("Unimplemented type for TryDecimalMultiply");
19610
+ }
19611
+ };
19612
+
19613
+ template <>
19614
+ bool TryDecimalMultiply::Operation(int16_t left, int16_t right, int16_t &result);
19615
+ template <>
19616
+ bool TryDecimalMultiply::Operation(int32_t left, int32_t right, int32_t &result);
19617
+ template <>
19618
+ bool TryDecimalMultiply::Operation(int64_t left, int64_t right, int64_t &result);
19619
+ template <>
19620
+ bool TryDecimalMultiply::Operation(hugeint_t left, hugeint_t right, hugeint_t &result);
19621
+
19622
+ struct DecimalMultiplyOverflowCheck {
19623
+ template <class TA, class TB, class TR>
19624
+ static inline TR Operation(TA left, TB right) {
19625
+ TR result;
19626
+ if (!TryDecimalMultiply::Operation<TA, TB, TR>(left, right, result)) {
19627
+ throw OutOfRangeException("Overflow in multiplication of DECIMAL(18) (%d * %d). You might want to add an "
19628
+ "explicit cast to a bigger decimal.",
19629
+ left, right);
19630
+ }
19631
+ return result;
19632
+ }
19633
+ };
19634
+
19635
+ template <>
19636
+ hugeint_t DecimalMultiplyOverflowCheck::Operation(hugeint_t left, hugeint_t right);
19637
+
19638
+ } // namespace duckdb
19639
+
19531
19640
 
19532
19641
 
19533
19642
 
@@ -23709,6 +23818,35 @@ bool TryCastToTimestampSec::Operation(string_t input, timestamp_t &result, bool
23709
23818
  return true;
23710
23819
  }
23711
23820
 
23821
+ template <>
23822
+ bool TryCastToTimestampNS::Operation(date_t input, timestamp_t &result, bool strict) {
23823
+ if (!TryCast::Operation<date_t, timestamp_t>(input, result, strict)) {
23824
+ return false;
23825
+ }
23826
+ if (!TryMultiplyOperator::Operation(result.value, Interval::NANOS_PER_MICRO, result.value)) {
23827
+ return false;
23828
+ }
23829
+ return true;
23830
+ }
23831
+
23832
+ template <>
23833
+ bool TryCastToTimestampMS::Operation(date_t input, timestamp_t &result, bool strict) {
23834
+ if (!TryCast::Operation<date_t, timestamp_t>(input, result, strict)) {
23835
+ return false;
23836
+ }
23837
+ result.value /= Interval::MICROS_PER_MSEC;
23838
+ return true;
23839
+ }
23840
+
23841
+ template <>
23842
+ bool TryCastToTimestampSec::Operation(date_t input, timestamp_t &result, bool strict) {
23843
+ if (!TryCast::Operation<date_t, timestamp_t>(input, result, strict)) {
23844
+ return false;
23845
+ }
23846
+ result.value /= Interval::MICROS_PER_MSEC * Interval::MSECS_PER_SEC;
23847
+ return true;
23848
+ }
23849
+
23712
23850
  //===--------------------------------------------------------------------===//
23713
23851
  // Cast From Blob
23714
23852
  //===--------------------------------------------------------------------===//
@@ -38787,14 +38925,17 @@ struct SelCache {
38787
38925
 
38788
38926
 
38789
38927
  namespace duckdb {
38928
+
38790
38929
  struct ArrowAuxiliaryData : VectorAuxiliaryData {
38791
38930
  explicit ArrowAuxiliaryData(shared_ptr<ArrowArrayWrapper> arrow_array_p)
38792
38931
  : VectorAuxiliaryData(VectorAuxiliaryDataType::ARROW_AUXILIARY), arrow_array(std::move(arrow_array_p)) {
38932
+ }
38933
+ ~ArrowAuxiliaryData() override {
38934
+ }
38793
38935
 
38794
- };
38795
- ~ArrowAuxiliaryData() override {};
38796
38936
  shared_ptr<ArrowArrayWrapper> arrow_array;
38797
38937
  };
38938
+
38798
38939
  } // namespace duckdb
38799
38940
 
38800
38941
 
@@ -41571,107 +41712,6 @@ dtime_t AddTimeOperator::Operation(interval_t left, dtime_t right);
41571
41712
 
41572
41713
  } // namespace duckdb
41573
41714
 
41574
- //===----------------------------------------------------------------------===//
41575
- // DuckDB
41576
- //
41577
- // duckdb/common/operator/multiply.hpp
41578
- //
41579
- //
41580
- //===----------------------------------------------------------------------===//
41581
-
41582
-
41583
-
41584
-
41585
-
41586
-
41587
- namespace duckdb {
41588
-
41589
- struct MultiplyOperator {
41590
- template <class TA, class TB, class TR>
41591
- static inline TR Operation(TA left, TB right) {
41592
- return left * right;
41593
- }
41594
- };
41595
-
41596
- template <>
41597
- float MultiplyOperator::Operation(float left, float right);
41598
- template <>
41599
- double MultiplyOperator::Operation(double left, double right);
41600
- template <>
41601
- interval_t MultiplyOperator::Operation(interval_t left, int64_t right);
41602
- template <>
41603
- interval_t MultiplyOperator::Operation(int64_t left, interval_t right);
41604
-
41605
- struct TryMultiplyOperator {
41606
- template <class TA, class TB, class TR>
41607
- static inline bool Operation(TA left, TB right, TR &result) {
41608
- throw InternalException("Unimplemented type for TryMultiplyOperator");
41609
- }
41610
- };
41611
-
41612
- template <>
41613
- bool TryMultiplyOperator::Operation(uint8_t left, uint8_t right, uint8_t &result);
41614
- template <>
41615
- bool TryMultiplyOperator::Operation(uint16_t left, uint16_t right, uint16_t &result);
41616
- template <>
41617
- bool TryMultiplyOperator::Operation(uint32_t left, uint32_t right, uint32_t &result);
41618
- template <>
41619
- bool TryMultiplyOperator::Operation(uint64_t left, uint64_t right, uint64_t &result);
41620
-
41621
- template <>
41622
- bool TryMultiplyOperator::Operation(int8_t left, int8_t right, int8_t &result);
41623
- template <>
41624
- bool TryMultiplyOperator::Operation(int16_t left, int16_t right, int16_t &result);
41625
- template <>
41626
- bool TryMultiplyOperator::Operation(int32_t left, int32_t right, int32_t &result);
41627
- template <>
41628
- DUCKDB_API bool TryMultiplyOperator::Operation(int64_t left, int64_t right, int64_t &result);
41629
-
41630
- struct MultiplyOperatorOverflowCheck {
41631
- template <class TA, class TB, class TR>
41632
- static inline TR Operation(TA left, TB right) {
41633
- TR result;
41634
- if (!TryMultiplyOperator::Operation(left, right, result)) {
41635
- throw OutOfRangeException("Overflow in multiplication of %s (%d * %d)!", TypeIdToString(GetTypeId<TA>()),
41636
- left, right);
41637
- }
41638
- return result;
41639
- }
41640
- };
41641
-
41642
- struct TryDecimalMultiply {
41643
- template <class TA, class TB, class TR>
41644
- static inline bool Operation(TA left, TB right, TR &result) {
41645
- throw InternalException("Unimplemented type for TryDecimalMultiply");
41646
- }
41647
- };
41648
-
41649
- template <>
41650
- bool TryDecimalMultiply::Operation(int16_t left, int16_t right, int16_t &result);
41651
- template <>
41652
- bool TryDecimalMultiply::Operation(int32_t left, int32_t right, int32_t &result);
41653
- template <>
41654
- bool TryDecimalMultiply::Operation(int64_t left, int64_t right, int64_t &result);
41655
- template <>
41656
- bool TryDecimalMultiply::Operation(hugeint_t left, hugeint_t right, hugeint_t &result);
41657
-
41658
- struct DecimalMultiplyOverflowCheck {
41659
- template <class TA, class TB, class TR>
41660
- static inline TR Operation(TA left, TB right) {
41661
- TR result;
41662
- if (!TryDecimalMultiply::Operation<TA, TB, TR>(left, right, result)) {
41663
- throw OutOfRangeException("Overflow in multiplication of DECIMAL(18) (%d * %d). You might want to add an "
41664
- "explicit cast to a bigger decimal.",
41665
- left, right);
41666
- }
41667
- return result;
41668
- }
41669
- };
41670
-
41671
- template <>
41672
- hugeint_t DecimalMultiplyOverflowCheck::Operation(hugeint_t left, hugeint_t right);
41673
-
41674
- } // namespace duckdb
41675
41715
 
41676
41716
 
41677
41717
 
@@ -50869,6 +50909,15 @@ static bool DateCastSwitch(Vector &source, Vector &result, idx_t count, string *
50869
50909
  case LogicalTypeId::TIMESTAMP_TZ:
50870
50910
  // date to timestamp
50871
50911
  return VectorTryCastLoop<date_t, timestamp_t, duckdb::TryCast>(source, result, count, error_message);
50912
+ case LogicalTypeId::TIMESTAMP_NS:
50913
+ return VectorTryCastLoop<date_t, timestamp_t, duckdb::TryCastToTimestampNS>(source, result, count,
50914
+ error_message);
50915
+ case LogicalTypeId::TIMESTAMP_SEC:
50916
+ return VectorTryCastLoop<date_t, timestamp_t, duckdb::TryCastToTimestampSec>(source, result, count,
50917
+ error_message);
50918
+ case LogicalTypeId::TIMESTAMP_MS:
50919
+ return VectorTryCastLoop<date_t, timestamp_t, duckdb::TryCastToTimestampMS>(source, result, count,
50920
+ error_message);
50872
50921
  default:
50873
50922
  return TryVectorNullCast(source, result, count, error_message);
50874
50923
  }
@@ -65676,7 +65725,7 @@ public:
65676
65725
  }
65677
65726
 
65678
65727
  bool SupportsBatchIndex() const override {
65679
- return function.supports_batch_index;
65728
+ return function.get_batch_index != nullptr;
65680
65729
  }
65681
65730
 
65682
65731
  double GetProgress(ClientContext &context, GlobalSourceState &gstate) const override;
@@ -71665,6 +71714,7 @@ public:
71665
71714
 
71666
71715
  public:
71667
71716
  unique_ptr<OperatorState> GetOperatorState(ClientContext &context) const override;
71717
+ unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
71668
71718
  OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
71669
71719
  GlobalOperatorState &gstate, OperatorState &state) const override;
71670
71720
 
@@ -71686,12 +71736,20 @@ private:
71686
71736
 
71687
71737
  namespace duckdb {
71688
71738
 
71689
- class TableInOutFunctionState : public OperatorState {
71739
+ class TableInOutLocalState : public OperatorState {
71690
71740
  public:
71691
- TableInOutFunctionState() {
71741
+ TableInOutLocalState() {
71692
71742
  }
71693
71743
 
71694
- unique_ptr<FunctionOperatorData> operator_data;
71744
+ unique_ptr<LocalTableFunctionState> local_state;
71745
+ };
71746
+
71747
+ class TableInOutGlobalState : public GlobalOperatorState {
71748
+ public:
71749
+ TableInOutGlobalState() {
71750
+ }
71751
+
71752
+ unique_ptr<GlobalTableFunctionState> global_state;
71695
71753
  };
71696
71754
 
71697
71755
  PhysicalTableInOutFunction::PhysicalTableInOutFunction(vector<LogicalType> types, TableFunction function_p,
@@ -71702,17 +71760,29 @@ PhysicalTableInOutFunction::PhysicalTableInOutFunction(vector<LogicalType> types
71702
71760
  }
71703
71761
 
71704
71762
  unique_ptr<OperatorState> PhysicalTableInOutFunction::GetOperatorState(ClientContext &context) const {
71705
- auto result = make_unique<TableInOutFunctionState>();
71706
- if (function.init) {
71707
- result->operator_data = function.init(context, bind_data.get(), column_ids, nullptr);
71763
+ auto result = make_unique<TableInOutLocalState>();
71764
+ if (function.init_local) {
71765
+ TableFunctionInitInput input(bind_data.get(), column_ids, nullptr);
71766
+ result->local_state = function.init_local(context, input, nullptr);
71767
+ }
71768
+ return move(result);
71769
+ }
71770
+
71771
+ unique_ptr<GlobalOperatorState> PhysicalTableInOutFunction::GetGlobalOperatorState(ClientContext &context) const {
71772
+ auto result = make_unique<TableInOutGlobalState>();
71773
+ if (function.init_global) {
71774
+ TableFunctionInitInput input(bind_data.get(), column_ids, nullptr);
71775
+ result->global_state = function.init_global(context, input);
71708
71776
  }
71709
71777
  return move(result);
71710
71778
  }
71711
71779
 
71712
71780
  OperatorResultType PhysicalTableInOutFunction::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
71713
- GlobalOperatorState &gstate, OperatorState &state_p) const {
71714
- auto &state = (TableInOutFunctionState &)state_p;
71715
- return function.in_out_function(context.client, bind_data.get(), state.operator_data.get(), input, chunk);
71781
+ GlobalOperatorState &gstate_p, OperatorState &state_p) const {
71782
+ auto &gstate = (TableInOutGlobalState &)gstate_p;
71783
+ auto &state = (TableInOutLocalState &)state_p;
71784
+ TableFunctionInput data(bind_data.get(), state.local_state.get(), gstate.global_state.get());
71785
+ return function.in_out_function(context.client, data, input, chunk);
71716
71786
  }
71717
71787
 
71718
71788
  } // namespace duckdb
@@ -72328,7 +72398,6 @@ bool PhysicalExpressionScan::IsFoldable() const {
72328
72398
 
72329
72399
 
72330
72400
 
72331
-
72332
72401
  #include <utility>
72333
72402
 
72334
72403
  namespace duckdb {
@@ -72342,103 +72411,64 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
72342
72411
  table_filters(move(table_filters_p)) {
72343
72412
  }
72344
72413
 
72345
- class TableScanGlobalState : public GlobalSourceState {
72414
+ class TableScanGlobalSourceState : public GlobalSourceState {
72346
72415
  public:
72347
- TableScanGlobalState(ClientContext &context, const PhysicalTableScan &op) {
72348
- if (!op.function.max_threads || !op.function.init_parallel_state) {
72349
- // table function cannot be parallelized
72350
- return;
72351
- }
72352
- // table function can be parallelized
72353
- // check how many threads we can have
72354
- max_threads = op.function.max_threads(context, op.bind_data.get());
72355
- if (max_threads <= 1) {
72356
- return;
72357
- }
72358
- if (op.function.init_parallel_state) {
72359
- TableFilterCollection collection(op.table_filters.get());
72360
- parallel_state = op.function.init_parallel_state(context, op.bind_data.get(), op.column_ids, &collection);
72416
+ TableScanGlobalSourceState(ClientContext &context, const PhysicalTableScan &op) {
72417
+ if (op.function.init_global) {
72418
+ TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.table_filters.get());
72419
+ global_state = op.function.init_global(context, input);
72420
+ if (global_state) {
72421
+ max_threads = global_state->MaxThreads();
72422
+ }
72423
+ } else {
72424
+ max_threads = 1;
72361
72425
  }
72362
72426
  }
72363
72427
 
72364
72428
  idx_t max_threads = 0;
72365
- unique_ptr<ParallelState> parallel_state;
72429
+ unique_ptr<GlobalTableFunctionState> global_state;
72366
72430
 
72367
72431
  idx_t MaxThreads() override {
72368
72432
  return max_threads;
72369
72433
  }
72370
72434
  };
72371
72435
 
72372
- class TableScanLocalState : public LocalSourceState {
72436
+ class TableScanLocalSourceState : public LocalSourceState {
72373
72437
  public:
72374
- TableScanLocalState(ExecutionContext &context, TableScanGlobalState &gstate, const PhysicalTableScan &op) {
72375
- TableFilterCollection filters(op.table_filters.get());
72376
- if (gstate.parallel_state) {
72377
- // parallel scan init
72378
- operator_data = op.function.parallel_init(context.client, op.bind_data.get(), gstate.parallel_state.get(),
72379
- op.column_ids, &filters);
72380
- } else if (op.function.init) {
72381
- // sequential scan init
72382
- operator_data = op.function.init(context.client, op.bind_data.get(), op.column_ids, &filters);
72438
+ TableScanLocalSourceState(ExecutionContext &context, TableScanGlobalSourceState &gstate,
72439
+ const PhysicalTableScan &op) {
72440
+ if (op.function.init_local) {
72441
+ TableFunctionInitInput input(op.bind_data.get(), op.column_ids, op.table_filters.get());
72442
+ local_state = op.function.init_local(context.client, input, gstate.global_state.get());
72383
72443
  }
72384
72444
  }
72385
72445
 
72386
- unique_ptr<FunctionOperatorData> operator_data;
72446
+ unique_ptr<LocalTableFunctionState> local_state;
72387
72447
  };
72388
72448
 
72389
72449
  unique_ptr<LocalSourceState> PhysicalTableScan::GetLocalSourceState(ExecutionContext &context,
72390
72450
  GlobalSourceState &gstate) const {
72391
- return make_unique<TableScanLocalState>(context, (TableScanGlobalState &)gstate, *this);
72451
+ return make_unique<TableScanLocalSourceState>(context, (TableScanGlobalSourceState &)gstate, *this);
72392
72452
  }
72393
72453
 
72394
72454
  unique_ptr<GlobalSourceState> PhysicalTableScan::GetGlobalSourceState(ClientContext &context) const {
72395
- return make_unique<TableScanGlobalState>(context, *this);
72455
+ return make_unique<TableScanGlobalSourceState>(context, *this);
72396
72456
  }
72397
72457
 
72398
72458
  void PhysicalTableScan::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
72399
72459
  LocalSourceState &lstate) const {
72400
72460
  D_ASSERT(!column_ids.empty());
72401
- auto &gstate = (TableScanGlobalState &)gstate_p;
72402
- auto &state = (TableScanLocalState &)lstate;
72461
+ auto &gstate = (TableScanGlobalSourceState &)gstate_p;
72462
+ auto &state = (TableScanLocalSourceState &)lstate;
72403
72463
 
72404
- if (!gstate.parallel_state) {
72405
- // sequential scan
72406
- function.function(context.client, bind_data.get(), state.operator_data.get(), chunk);
72407
- if (chunk.size() != 0) {
72408
- return;
72409
- }
72410
- } else {
72411
- // parallel scan
72412
- do {
72413
- if (function.parallel_function) {
72414
- function.parallel_function(context.client, bind_data.get(), state.operator_data.get(), chunk,
72415
- gstate.parallel_state.get());
72416
- } else {
72417
- function.function(context.client, bind_data.get(), state.operator_data.get(), chunk);
72418
- }
72419
-
72420
- if (chunk.size() == 0) {
72421
- D_ASSERT(function.parallel_state_next);
72422
- if (function.parallel_state_next(context.client, bind_data.get(), state.operator_data.get(),
72423
- gstate.parallel_state.get())) {
72424
- continue;
72425
- } else {
72426
- break;
72427
- }
72428
- } else {
72429
- return;
72430
- }
72431
- } while (true);
72432
- }
72433
- D_ASSERT(chunk.size() == 0);
72434
- if (function.cleanup) {
72435
- function.cleanup(context.client, bind_data.get(), state.operator_data.get());
72436
- }
72464
+ TableFunctionInput data(bind_data.get(), state.local_state.get(), gstate.global_state.get());
72465
+ function.function(context.client, data, chunk);
72437
72466
  }
72438
72467
 
72439
72468
  double PhysicalTableScan::GetProgress(ClientContext &context, GlobalSourceState &gstate_p) const {
72469
+ auto &gstate = (TableScanGlobalSourceState &)gstate_p;
72440
72470
  if (function.table_scan_progress) {
72441
- return function.table_scan_progress(context, bind_data.get());
72471
+ return function.table_scan_progress(context, bind_data.get(), gstate.global_state.get());
72442
72472
  }
72443
72473
  // if table_scan_progress is not implemented we don't support this function yet in the progress bar
72444
72474
  return -1;
@@ -72448,10 +72478,10 @@ idx_t PhysicalTableScan::GetBatchIndex(ExecutionContext &context, DataChunk &chu
72448
72478
  LocalSourceState &lstate) const {
72449
72479
  D_ASSERT(SupportsBatchIndex());
72450
72480
  D_ASSERT(function.get_batch_index);
72451
- auto &gstate = (TableScanGlobalState &)gstate_p;
72452
- auto &state = (TableScanLocalState &)lstate;
72453
- return function.get_batch_index(context.client, bind_data.get(), state.operator_data.get(),
72454
- gstate.parallel_state.get());
72481
+ auto &gstate = (TableScanGlobalSourceState &)gstate_p;
72482
+ auto &state = (TableScanLocalSourceState &)lstate;
72483
+ return function.get_batch_index(context.client, bind_data.get(), state.local_state.get(),
72484
+ gstate.global_state.get());
72455
72485
  }
72456
72486
 
72457
72487
  string PhysicalTableScan::GetName() const {
@@ -104041,7 +104071,6 @@ unique_ptr<MacroFunction> ScalarMacroFunction::Copy() {
104041
104071
 
104042
104072
 
104043
104073
 
104044
-
104045
104074
  //===----------------------------------------------------------------------===//
104046
104075
  // DuckDB
104047
104076
  //
@@ -104073,8 +104102,7 @@ namespace duckdb {
104073
104102
  using std::thread;
104074
104103
  }
104075
104104
 
104076
- #include <map>
104077
- #include <condition_variable>
104105
+
104078
104106
 
104079
104107
  namespace duckdb {
104080
104108
  //===--------------------------------------------------------------------===//
@@ -104093,78 +104121,64 @@ enum class ArrowDateTimeType : uint8_t {
104093
104121
  DAYS = 4,
104094
104122
  MONTHS = 5
104095
104123
  };
104124
+
104096
104125
  struct ArrowConvertData {
104097
104126
  ArrowConvertData(LogicalType type) : dictionary_type(type) {};
104098
104127
  ArrowConvertData() {};
104099
104128
  //! Hold type of dictionary
104100
104129
  LogicalType dictionary_type;
104101
104130
  //! If its a variable size type (e.g., strings, blobs, lists) holds which type it is
104102
- vector<std::pair<ArrowVariableSizeType, idx_t>> variable_sz_type;
104131
+ vector<pair<ArrowVariableSizeType, idx_t>> variable_sz_type;
104103
104132
  //! If this is a date/time holds its precision
104104
104133
  vector<ArrowDateTimeType> date_time_precision;
104105
104134
  };
104106
104135
 
104107
- struct ArrowScanFunctionData : public TableFunctionData {
104108
- #ifndef DUCKDB_NO_THREADS
104136
+ typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(
104137
+ uintptr_t stream_factory_ptr, pair<unordered_map<idx_t, string>, vector<string>> &project_columns,
104138
+ TableFilterSet *filters);
104139
+ typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper &schema);
104109
104140
 
104110
- ArrowScanFunctionData(idx_t rows_per_thread_p,
104111
- unique_ptr<ArrowArrayStreamWrapper> (*scanner_producer_p)(
104112
- uintptr_t stream_factory_ptr,
104113
- std::pair<std::unordered_map<idx_t, string>, std::vector<string>> &project_columns,
104114
- TableFilterCollection *filters),
104115
- uintptr_t stream_factory_ptr_p, std::thread::id thread_id_p)
104116
- : lines_read(0), rows_per_thread(rows_per_thread_p), stream_factory_ptr(stream_factory_ptr_p),
104117
- scanner_producer(scanner_producer_p), number_of_rows(0), thread_id(thread_id_p) {
104118
- }
104119
- #endif
104120
-
104121
- ArrowScanFunctionData(idx_t rows_per_thread_p,
104122
- unique_ptr<ArrowArrayStreamWrapper> (*scanner_producer_p)(
104123
- uintptr_t stream_factory_ptr,
104124
- std::pair<std::unordered_map<idx_t, string>, std::vector<string>> &project_columns,
104125
- TableFilterCollection *filters),
104141
+ struct ArrowScanFunctionData : public TableFunctionData {
104142
+ ArrowScanFunctionData(idx_t rows_per_thread_p, stream_factory_produce_t scanner_producer_p,
104126
104143
  uintptr_t stream_factory_ptr_p)
104127
104144
  : lines_read(0), rows_per_thread(rows_per_thread_p), stream_factory_ptr(stream_factory_ptr_p),
104128
104145
  scanner_producer(scanner_producer_p), number_of_rows(0) {
104129
104146
  }
104130
104147
  //! This holds the original list type (col_idx, [ArrowListType,size])
104131
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
104132
- std::atomic<idx_t> lines_read;
104148
+ unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
104149
+ atomic<idx_t> lines_read;
104133
104150
  ArrowSchemaWrapper schema_root;
104134
104151
  idx_t rows_per_thread;
104135
104152
  //! Pointer to the scanner factory
104136
104153
  uintptr_t stream_factory_ptr;
104137
104154
  //! Pointer to the scanner factory produce
104138
- unique_ptr<ArrowArrayStreamWrapper> (*scanner_producer)(
104139
- uintptr_t stream_factory_ptr,
104140
- std::pair<std::unordered_map<idx_t, string>, std::vector<string>> &project_columns,
104141
- TableFilterCollection *filters);
104155
+ stream_factory_produce_t scanner_producer;
104142
104156
  //! Number of rows (Used in cardinality and progress bar)
104143
104157
  int64_t number_of_rows;
104144
- #ifndef DUCKDB_NO_THREADS
104145
- // Thread that made first call in the binder
104146
- std::thread::id thread_id;
104147
- #endif
104148
104158
  };
104149
104159
 
104150
- struct ArrowScanState : public FunctionOperatorData {
104151
- explicit ArrowScanState(unique_ptr<ArrowArrayWrapper> current_chunk) : chunk(move(current_chunk)) {
104160
+ struct ArrowScanLocalState : public LocalTableFunctionState {
104161
+ explicit ArrowScanLocalState(unique_ptr<ArrowArrayWrapper> current_chunk) : chunk(move(current_chunk)) {
104152
104162
  }
104163
+
104153
104164
  unique_ptr<ArrowArrayStreamWrapper> stream;
104154
104165
  shared_ptr<ArrowArrayWrapper> chunk;
104155
104166
  idx_t chunk_offset = 0;
104156
104167
  vector<column_t> column_ids;
104157
104168
  //! Store child vectors for Arrow Dictionary Vectors (col-idx,vector)
104158
104169
  unordered_map<idx_t, unique_ptr<Vector>> arrow_dictionary_vectors;
104159
- TableFilterCollection *filters = nullptr;
104170
+ TableFilterSet *filters = nullptr;
104160
104171
  };
104161
104172
 
104162
- struct ParallelArrowScanState : public ParallelState {
104163
- ParallelArrowScanState() {
104164
- }
104173
+ struct ArrowScanGlobalState : public GlobalTableFunctionState {
104165
104174
  unique_ptr<ArrowArrayStreamWrapper> stream;
104166
- std::mutex main_mutex;
104175
+ mutex main_mutex;
104167
104176
  bool ready = false;
104177
+ idx_t max_threads = 1;
104178
+
104179
+ idx_t MaxThreads() const override {
104180
+ return max_threads;
104181
+ }
104168
104182
  };
104169
104183
 
104170
104184
  struct ArrowTableFunction {
@@ -104176,45 +104190,30 @@ private:
104176
104190
  static unique_ptr<FunctionData> ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
104177
104191
  vector<LogicalType> &return_types, vector<string> &names);
104178
104192
  //! Actual conversion from Arrow to DuckDB
104179
- static void ArrowToDuckDB(ArrowScanState &scan_state,
104193
+ static void ArrowToDuckDB(ArrowScanLocalState &scan_state,
104180
104194
  std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
104181
104195
  DataChunk &output, idx_t start);
104182
104196
 
104183
- //! -----Single Thread Functions:-----
104184
- //! Initialize Single Thread Scan
104185
- static unique_ptr<FunctionOperatorData> ArrowScanInit(ClientContext &context, const FunctionData *bind_data,
104186
- const vector<column_t> &column_ids,
104187
- TableFilterCollection *filters);
104188
-
104189
- //! Scan Function for Single Thread Execution
104190
- static void ArrowScanFunction(ClientContext &context, const FunctionData *bind_data,
104191
- FunctionOperatorData *operator_state, DataChunk &output);
104192
-
104193
- //! -----Multi Thread Functions:-----
104194
- //! Initialize Parallel State
104195
- static unique_ptr<ParallelState> ArrowScanInitParallelState(ClientContext &context, const FunctionData *bind_data_p,
104196
- const vector<column_t> &column_ids,
104197
- TableFilterCollection *filters);
104198
- //! Initialize Parallel Scans
104199
- static unique_ptr<FunctionOperatorData> ArrowScanParallelInit(ClientContext &context,
104200
- const FunctionData *bind_data_p, ParallelState *state,
104201
- const vector<column_t> &column_ids,
104202
- TableFilterCollection *filters);
104197
+ //! Initialize Global State
104198
+ static unique_ptr<GlobalTableFunctionState> ArrowScanInitGlobal(ClientContext &context,
104199
+ TableFunctionInitInput &input);
104200
+
104201
+ //! Initialize Local State
104202
+ static unique_ptr<LocalTableFunctionState> ArrowScanInitLocal(ClientContext &context, TableFunctionInitInput &input,
104203
+ GlobalTableFunctionState *global_state);
104204
+
104205
+ //! Scan Function
104206
+ static void ArrowScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output);
104207
+
104203
104208
  //! Defines Maximum Number of Threads
104204
- static idx_t ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p);
104205
- //! Scan Function for Parallel Execution
104206
- static void ArrowScanFunctionParallel(ClientContext &context, const FunctionData *bind_data,
104207
- FunctionOperatorData *operator_state, DataChunk &output,
104208
- ParallelState *parallel_state_p);
104209
- //! Get next chunk for the running thread
104210
- static bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p,
104211
- FunctionOperatorData *operator_state, ParallelState *parallel_state_p);
104209
+ static idx_t ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data);
104212
104210
 
104213
104211
  //! -----Utility Functions:-----
104214
104212
  //! Gets Arrow Table's Cardinality
104215
104213
  static unique_ptr<NodeStatistics> ArrowScanCardinality(ClientContext &context, const FunctionData *bind_data);
104216
104214
  //! Gets the progress on the table scan, used for Progress Bars
104217
- static double ArrowProgress(ClientContext &context, const FunctionData *bind_data_p);
104215
+ static double ArrowProgress(ClientContext &context, const FunctionData *bind_data,
104216
+ const GlobalTableFunctionState *global_state);
104218
104217
  };
104219
104218
 
104220
104219
  } // namespace duckdb
@@ -104224,12 +104223,6 @@ private:
104224
104223
 
104225
104224
 
104226
104225
 
104227
-
104228
-
104229
-
104230
-
104231
- #include <map>
104232
-
104233
104226
  namespace duckdb {
104234
104227
 
104235
104228
  LogicalType GetArrowLogicalType(ArrowSchema &schema,
@@ -104413,26 +104406,14 @@ void RenameArrowColumns(vector<string> &names) {
104413
104406
 
104414
104407
  unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
104415
104408
  vector<LogicalType> &return_types, vector<string> &names) {
104416
- typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(
104417
- uintptr_t stream_factory_ptr,
104418
- std::pair<std::unordered_map<idx_t, string>, std::vector<string>> & project_columns,
104419
- TableFilterCollection * filters);
104420
-
104421
- typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper & schema);
104422
-
104423
104409
  auto stream_factory_ptr = input.inputs[0].GetPointer();
104424
104410
  auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer();
104425
104411
  auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer();
104426
104412
  auto rows_per_thread = input.inputs[3].GetValue<uint64_t>();
104427
104413
 
104428
- std::pair<std::unordered_map<idx_t, string>, std::vector<string>> project_columns;
104429
- #ifndef DUCKDB_NO_THREADS
104430
-
104431
- auto res = make_unique<ArrowScanFunctionData>(rows_per_thread, stream_factory_produce, stream_factory_ptr,
104432
- std::this_thread::get_id());
104433
- #else
104414
+ pair<unordered_map<idx_t, string>, vector<string>> project_columns;
104434
104415
  auto res = make_unique<ArrowScanFunctionData>(rows_per_thread, stream_factory_produce, stream_factory_ptr);
104435
- #endif
104416
+
104436
104417
  auto &data = *res;
104437
104418
  stream_factory_get_schema(stream_factory_ptr, data.schema_root);
104438
104419
  for (idx_t col_idx = 0; col_idx < (idx_t)data.schema_root.arrow_schema.n_children; col_idx++) {
@@ -104459,8 +104440,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
104459
104440
  }
104460
104441
 
104461
104442
  unique_ptr<ArrowArrayStreamWrapper> ProduceArrowScan(const ArrowScanFunctionData &function,
104462
- const vector<column_t> &column_ids,
104463
- TableFilterCollection *filters) {
104443
+ const vector<column_t> &column_ids, TableFilterSet *filters) {
104464
104444
  //! Generate Projection Pushdown Vector
104465
104445
  pair<unordered_map<idx_t, string>, vector<string>> project_columns;
104466
104446
  D_ASSERT(!column_ids.empty());
@@ -104475,18 +104455,114 @@ unique_ptr<ArrowArrayStreamWrapper> ProduceArrowScan(const ArrowScanFunctionData
104475
104455
  return function.scanner_producer(function.stream_factory_ptr, project_columns, filters);
104476
104456
  }
104477
104457
 
104478
- unique_ptr<FunctionOperatorData> ArrowTableFunction::ArrowScanInit(ClientContext &context,
104479
- const FunctionData *bind_data,
104480
- const vector<column_t> &column_ids,
104481
- TableFilterCollection *filters) {
104458
+ idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) {
104459
+ auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
104460
+ if (bind_data.number_of_rows <= 0 || ClientConfig::GetConfig(context).verify_parallelism) {
104461
+ return context.db->NumberOfThreads();
104462
+ }
104463
+ return ((bind_data.number_of_rows + bind_data.rows_per_thread - 1) / bind_data.rows_per_thread) + 1;
104464
+ }
104465
+
104466
+ bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
104467
+ ArrowScanGlobalState &parallel_state) {
104468
+ lock_guard<mutex> parallel_lock(parallel_state.main_mutex);
104469
+ state.chunk_offset = 0;
104470
+
104471
+ auto current_chunk = parallel_state.stream->GetNextChunk();
104472
+ while (current_chunk->arrow_array.length == 0 && current_chunk->arrow_array.release) {
104473
+ current_chunk = parallel_state.stream->GetNextChunk();
104474
+ }
104475
+ state.chunk = move(current_chunk);
104476
+ //! have we run out of chunks? we are done
104477
+ if (!state.chunk->arrow_array.release) {
104478
+ return false;
104479
+ }
104480
+ return true;
104481
+ }
104482
+
104483
+ unique_ptr<GlobalTableFunctionState> ArrowTableFunction::ArrowScanInitGlobal(ClientContext &context,
104484
+ TableFunctionInitInput &input) {
104485
+ auto &bind_data = (const ArrowScanFunctionData &)*input.bind_data;
104486
+ auto result = make_unique<ArrowScanGlobalState>();
104487
+ result->stream = ProduceArrowScan(bind_data, input.column_ids, input.filters);
104488
+ result->max_threads = ArrowScanMaxThreads(context, input.bind_data);
104489
+ return move(result);
104490
+ }
104491
+
104492
+ unique_ptr<LocalTableFunctionState> ArrowTableFunction::ArrowScanInitLocal(ClientContext &context,
104493
+ TableFunctionInitInput &input,
104494
+ GlobalTableFunctionState *global_state_p) {
104495
+ auto &global_state = (ArrowScanGlobalState &)*global_state_p;
104482
104496
  auto current_chunk = make_unique<ArrowArrayWrapper>();
104483
- auto result = make_unique<ArrowScanState>(move(current_chunk));
104484
- result->column_ids = column_ids;
104485
- auto &data = (const ArrowScanFunctionData &)*bind_data;
104486
- result->stream = ProduceArrowScan(data, column_ids, filters);
104497
+ auto result = make_unique<ArrowScanLocalState>(move(current_chunk));
104498
+ result->column_ids = input.column_ids;
104499
+ result->filters = input.filters;
104500
+ if (!ArrowScanParallelStateNext(context, input.bind_data, *result, global_state)) {
104501
+ return nullptr;
104502
+ }
104487
104503
  return move(result);
104488
104504
  }
104489
104505
 
104506
+ void ArrowTableFunction::ArrowScanFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
104507
+ if (!data_p.local_state) {
104508
+ return;
104509
+ }
104510
+ auto &data = (ArrowScanFunctionData &)*data_p.bind_data;
104511
+ auto &state = (ArrowScanLocalState &)*data_p.local_state;
104512
+ auto &global_state = (ArrowScanGlobalState &)*data_p.global_state;
104513
+
104514
+ //! Out of tuples in this chunk
104515
+ if (state.chunk_offset >= (idx_t)state.chunk->arrow_array.length) {
104516
+ if (!ArrowScanParallelStateNext(context, data_p.bind_data, state, global_state)) {
104517
+ return;
104518
+ }
104519
+ }
104520
+ int64_t output_size = MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
104521
+ data.lines_read += output_size;
104522
+ output.SetCardinality(output_size);
104523
+ ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size);
104524
+ output.Verify();
104525
+ state.chunk_offset += output.size();
104526
+ }
104527
+
104528
+ unique_ptr<NodeStatistics> ArrowTableFunction::ArrowScanCardinality(ClientContext &context, const FunctionData *data) {
104529
+ auto &bind_data = (ArrowScanFunctionData &)*data;
104530
+ return make_unique<NodeStatistics>(bind_data.number_of_rows, bind_data.number_of_rows);
104531
+ }
104532
+
104533
+ double ArrowTableFunction::ArrowProgress(ClientContext &context, const FunctionData *bind_data_p,
104534
+ const GlobalTableFunctionState *global_state) {
104535
+ auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
104536
+ if (bind_data.number_of_rows == 0) {
104537
+ return 100;
104538
+ }
104539
+ auto percentage = bind_data.lines_read * 100.0 / bind_data.number_of_rows;
104540
+ return percentage;
104541
+ }
104542
+
104543
+ void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) {
104544
+ TableFunction arrow("arrow_scan",
104545
+ {LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER, LogicalType::UBIGINT},
104546
+ ArrowScanFunction, ArrowScanBind, ArrowScanInitGlobal, ArrowScanInitLocal);
104547
+ arrow.cardinality = ArrowScanCardinality;
104548
+ arrow.projection_pushdown = true;
104549
+ arrow.filter_pushdown = true;
104550
+ arrow.table_scan_progress = ArrowProgress;
104551
+ set.AddFunction(arrow);
104552
+ }
104553
+
104554
+ void BuiltinFunctions::RegisterArrowFunctions() {
104555
+ ArrowTableFunction::RegisterFunction(*this);
104556
+ }
104557
+ } // namespace duckdb
104558
+
104559
+
104560
+
104561
+
104562
+
104563
+
104564
+ namespace duckdb {
104565
+
104490
104566
  void ShiftRight(unsigned char *ar, int size, int shift) {
104491
104567
  int carry = 0;
104492
104568
  while (shift--) {
@@ -104498,8 +104574,8 @@ void ShiftRight(unsigned char *ar, int size, int shift) {
104498
104574
  }
104499
104575
  }
104500
104576
 
104501
- void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size, int64_t nested_offset,
104502
- bool add_null = false) {
104577
+ void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
104578
+ int64_t nested_offset, bool add_null = false) {
104503
104579
  auto &mask = FlatVector::Validity(vector);
104504
104580
  if (array.null_count != 0 && array.buffers[0]) {
104505
104581
  D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR);
@@ -104530,7 +104606,7 @@ void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanState &scan_sta
104530
104606
  }
104531
104607
  }
104532
104608
 
104533
- void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanState &scan_state, idx_t size) {
104609
+ void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size) {
104534
104610
  if (array.null_count != 0 && array.buffers[0]) {
104535
104611
  auto bit_offset = scan_state.chunk_offset + array.offset;
104536
104612
  auto n_bitmask_bytes = (size + 8 - 1) / 8;
@@ -104549,12 +104625,12 @@ void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanState &scan
104549
104625
  }
104550
104626
  }
104551
104627
 
104552
- void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size,
104628
+ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
104553
104629
  std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
104554
104630
  std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset = -1,
104555
104631
  ValidityMask *parent_mask = nullptr);
104556
104632
 
104557
- void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size,
104633
+ void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
104558
104634
  std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
104559
104635
  std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset, ValidityMask *parent_mask) {
104560
104636
  auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++];
@@ -104631,7 +104707,7 @@ void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanState &scan_s
104631
104707
  }
104632
104708
  }
104633
104709
 
104634
- void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size,
104710
+ void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
104635
104711
  std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
104636
104712
  std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset) {
104637
104713
  auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.first++];
@@ -104687,9 +104763,9 @@ void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, ArrowScanState &scan_s
104687
104763
  }
104688
104764
  }
104689
104765
 
104690
- void ArrowToDuckDBMapList(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size,
104691
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
104692
- std::pair<idx_t, idx_t> &arrow_convert_idx, uint32_t *offsets, ValidityMask *parent_mask) {
104766
+ void ArrowToDuckDBMapList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
104767
+ unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
104768
+ pair<idx_t, idx_t> &arrow_convert_idx, uint32_t *offsets, ValidityMask *parent_mask) {
104693
104769
  idx_t list_size = offsets[size] - offsets[0];
104694
104770
  ListVector::Reserve(vector, list_size);
104695
104771
 
@@ -104741,7 +104817,7 @@ static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets)
104741
104817
  }
104742
104818
  }
104743
104819
 
104744
- void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset) {
104820
+ void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset) {
104745
104821
  auto internal_type = GetTypeIdSize(vector.GetType().InternalType());
104746
104822
  auto data_ptr = (data_ptr_t)array.buffers[1] + internal_type * (scan_state.chunk_offset + array.offset);
104747
104823
  if (nested_offset != -1) {
@@ -104751,8 +104827,8 @@ void DirectConversion(Vector &vector, ArrowArray &array, ArrowScanState &scan_st
104751
104827
  }
104752
104828
 
104753
104829
  template <class T>
104754
- void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset, idx_t size,
104755
- int64_t conversion) {
104830
+ void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
104831
+ idx_t size, int64_t conversion) {
104756
104832
  auto tgt_ptr = (dtime_t *)FlatVector::GetData(vector);
104757
104833
  auto &validity_mask = FlatVector::Validity(vector);
104758
104834
  auto src_ptr = (T *)array.buffers[1] + scan_state.chunk_offset + array.offset;
@@ -104769,7 +104845,7 @@ void TimeConversion(Vector &vector, ArrowArray &array, ArrowScanState &scan_stat
104769
104845
  }
104770
104846
  }
104771
104847
 
104772
- void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset,
104848
+ void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
104773
104849
  idx_t size, int64_t conversion) {
104774
104850
  auto tgt_ptr = (timestamp_t *)FlatVector::GetData(vector);
104775
104851
  auto &validity_mask = FlatVector::Validity(vector);
@@ -104787,7 +104863,7 @@ void TimestampTZConversion(Vector &vector, ArrowArray &array, ArrowScanState &sc
104787
104863
  }
104788
104864
  }
104789
104865
 
104790
- void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset,
104866
+ void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
104791
104867
  idx_t size, int64_t conversion) {
104792
104868
  auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
104793
104869
  auto src_ptr = (int64_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
@@ -104803,7 +104879,7 @@ void IntervalConversionUs(Vector &vector, ArrowArray &array, ArrowScanState &sca
104803
104879
  }
104804
104880
  }
104805
104881
 
104806
- void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, int64_t nested_offset,
104882
+ void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, int64_t nested_offset,
104807
104883
  idx_t size) {
104808
104884
  auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
104809
104885
  auto src_ptr = (int32_t *)array.buffers[1] + scan_state.chunk_offset + array.offset;
@@ -104817,7 +104893,7 @@ void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanState
104817
104893
  }
104818
104894
  }
104819
104895
 
104820
- void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size,
104896
+ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
104821
104897
  std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
104822
104898
  std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset, ValidityMask *parent_mask) {
104823
104899
  switch (vector.GetType().id()) {
@@ -105242,7 +105318,7 @@ void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, LogicalType
105242
105318
  }
105243
105319
  }
105244
105320
 
105245
- void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanState &scan_state, idx_t size,
105321
+ void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
105246
105322
  std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
105247
105323
  idx_t col_idx, std::pair<idx_t, idx_t> &arrow_convert_idx) {
105248
105324
  SelectionVector sel;
@@ -105269,8 +105345,8 @@ void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanS
105269
105345
  vector.Slice(*dict_vectors[col_idx], sel, size);
105270
105346
  }
105271
105347
 
105272
- void ArrowTableFunction::ArrowToDuckDB(ArrowScanState &scan_state,
105273
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
105348
+ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state,
105349
+ unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
105274
105350
  DataChunk &output, idx_t start) {
105275
105351
  for (idx_t idx = 0; idx < output.ColumnCount(); idx++) {
105276
105352
  auto col_idx = scan_state.column_ids[idx];
@@ -105294,125 +105370,6 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanState &scan_state,
105294
105370
  }
105295
105371
  }
105296
105372
 
105297
- void ArrowTableFunction::ArrowScanFunction(ClientContext &context, const FunctionData *bind_data,
105298
- FunctionOperatorData *operator_state, DataChunk &output) {
105299
-
105300
- auto &data = (ArrowScanFunctionData &)*bind_data;
105301
- auto &state = (ArrowScanState &)*operator_state;
105302
-
105303
- //! have we run out of data on the current chunk? move to next one
105304
- while (state.chunk_offset >= (idx_t)state.chunk->arrow_array.length) {
105305
- state.chunk_offset = 0;
105306
- state.arrow_dictionary_vectors.clear();
105307
- state.chunk = state.stream->GetNextChunk();
105308
- //! have we run out of chunks? we are done
105309
- if (!state.chunk->arrow_array.release) {
105310
- return;
105311
- }
105312
- }
105313
-
105314
- int64_t output_size = MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
105315
- data.lines_read += output_size;
105316
- output.SetCardinality(output_size);
105317
- ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size);
105318
- output.Verify();
105319
- state.chunk_offset += output.size();
105320
- }
105321
-
105322
- void ArrowTableFunction::ArrowScanFunctionParallel(ClientContext &context, const FunctionData *bind_data,
105323
- FunctionOperatorData *operator_state, DataChunk &output,
105324
- ParallelState *parallel_state_p) {
105325
- auto &data = (ArrowScanFunctionData &)*bind_data;
105326
- auto &state = (ArrowScanState &)*operator_state;
105327
- //! Out of tuples in this chunk
105328
- if (state.chunk_offset >= (idx_t)state.chunk->arrow_array.length) {
105329
- return;
105330
- }
105331
- int64_t output_size = MinValue<int64_t>(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset);
105332
- data.lines_read += output_size;
105333
- output.SetCardinality(output_size);
105334
- ArrowToDuckDB(state, data.arrow_convert_data, output, data.lines_read - output_size);
105335
- output.Verify();
105336
- state.chunk_offset += output.size();
105337
- }
105338
-
105339
- idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) {
105340
- auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
105341
- if (bind_data.number_of_rows <= 0 || ClientConfig::GetConfig(context).verify_parallelism) {
105342
- return context.db->NumberOfThreads();
105343
- }
105344
- return ((bind_data.number_of_rows + bind_data.rows_per_thread - 1) / bind_data.rows_per_thread) + 1;
105345
- }
105346
-
105347
- unique_ptr<ParallelState> ArrowTableFunction::ArrowScanInitParallelState(ClientContext &context,
105348
- const FunctionData *bind_data_p,
105349
- const vector<column_t> &column_ids,
105350
- TableFilterCollection *filters) {
105351
- auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
105352
- auto result = make_unique<ParallelArrowScanState>();
105353
- result->stream = ProduceArrowScan(bind_data, column_ids, filters);
105354
- return move(result);
105355
- }
105356
-
105357
- bool ArrowTableFunction::ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p,
105358
- FunctionOperatorData *operator_state,
105359
- ParallelState *parallel_state_p) {
105360
- auto &state = (ArrowScanState &)*operator_state;
105361
- auto &parallel_state = (ParallelArrowScanState &)*parallel_state_p;
105362
-
105363
- lock_guard<mutex> parallel_lock(parallel_state.main_mutex);
105364
- state.chunk_offset = 0;
105365
-
105366
- auto current_chunk = parallel_state.stream->GetNextChunk();
105367
- while (current_chunk->arrow_array.length == 0 && current_chunk->arrow_array.release) {
105368
- current_chunk = parallel_state.stream->GetNextChunk();
105369
- }
105370
- state.chunk = move(current_chunk);
105371
- //! have we run out of chunks? we are done
105372
- if (!state.chunk->arrow_array.release) {
105373
- return false;
105374
- }
105375
- return true;
105376
- }
105377
-
105378
- unique_ptr<FunctionOperatorData>
105379
- ArrowTableFunction::ArrowScanParallelInit(ClientContext &context, const FunctionData *bind_data_p, ParallelState *state,
105380
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
105381
- auto current_chunk = make_unique<ArrowArrayWrapper>();
105382
- auto result = make_unique<ArrowScanState>(move(current_chunk));
105383
- result->column_ids = column_ids;
105384
- result->filters = filters;
105385
- ArrowScanParallelStateNext(context, bind_data_p, result.get(), state);
105386
- return move(result);
105387
- }
105388
-
105389
- unique_ptr<NodeStatistics> ArrowTableFunction::ArrowScanCardinality(ClientContext &context, const FunctionData *data) {
105390
- auto &bind_data = (ArrowScanFunctionData &)*data;
105391
- return make_unique<NodeStatistics>(bind_data.number_of_rows, bind_data.number_of_rows);
105392
- }
105393
-
105394
- double ArrowTableFunction::ArrowProgress(ClientContext &context, const FunctionData *bind_data_p) {
105395
- auto &bind_data = (const ArrowScanFunctionData &)*bind_data_p;
105396
- if (bind_data.number_of_rows == 0) {
105397
- return 100;
105398
- }
105399
- auto percentage = bind_data.lines_read * 100.0 / bind_data.number_of_rows;
105400
- return percentage;
105401
- }
105402
-
105403
- void ArrowTableFunction::RegisterFunction(BuiltinFunctions &set) {
105404
- TableFunctionSet arrow("arrow_scan");
105405
- arrow.AddFunction(
105406
- TableFunction({LogicalType::POINTER, LogicalType::POINTER, LogicalType::POINTER, LogicalType::UBIGINT},
105407
- ArrowScanFunction, ArrowScanBind, ArrowScanInit, nullptr, nullptr, nullptr, ArrowScanCardinality,
105408
- nullptr, nullptr, ArrowScanMaxThreads, ArrowScanInitParallelState, ArrowScanFunctionParallel,
105409
- ArrowScanParallelInit, ArrowScanParallelStateNext, true, true, ArrowProgress));
105410
- set.AddFunction(arrow);
105411
- }
105412
-
105413
- void BuiltinFunctions::RegisterArrowFunctions() {
105414
- ArrowTableFunction::RegisterFunction(*this);
105415
- }
105416
105373
  } // namespace duckdb
105417
105374
  //===----------------------------------------------------------------------===//
105418
105375
  // DuckDB
@@ -105464,8 +105421,7 @@ static unique_ptr<FunctionData> CheckpointBind(ClientContext &context, TableFunc
105464
105421
  }
105465
105422
 
105466
105423
  template <bool FORCE>
105467
- static void TemplatedCheckpointFunction(ClientContext &context, const FunctionData *bind_data_p,
105468
- FunctionOperatorData *operator_state, DataChunk &output) {
105424
+ static void TemplatedCheckpointFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
105469
105425
  auto &transaction_manager = TransactionManager::Get(context);
105470
105426
  transaction_manager.Checkpoint(context, FORCE);
105471
105427
  }
@@ -105525,10 +105481,6 @@ struct ReadCSVData : public BaseCSVData {
105525
105481
  //! The initial reader (if any): this is used when automatic detection is used during binding.
105526
105482
  //! In this case, the CSV reader is already created and might as well be re-used.
105527
105483
  unique_ptr<BufferedCSVReader> initial_reader;
105528
- //! Total File Size
105529
- atomic<idx_t> file_size;
105530
- //! How many bytes were read up to this point
105531
- atomic<idx_t> bytes_read;
105532
105484
  };
105533
105485
 
105534
105486
  struct CSVCopyFunction {
@@ -105965,23 +105917,20 @@ static unique_ptr<FunctionData> GlobFunctionBind(ClientContext &context, TableFu
105965
105917
  return move(result);
105966
105918
  }
105967
105919
 
105968
- struct GlobFunctionState : public FunctionOperatorData {
105920
+ struct GlobFunctionState : public GlobalTableFunctionState {
105969
105921
  GlobFunctionState() : current_idx(0) {
105970
105922
  }
105971
105923
 
105972
105924
  idx_t current_idx;
105973
105925
  };
105974
105926
 
105975
- static unique_ptr<FunctionOperatorData> GlobFunctionInit(ClientContext &context, const FunctionData *bind_data,
105976
- const vector<column_t> &column_ids,
105977
- TableFilterCollection *filters) {
105927
+ static unique_ptr<GlobalTableFunctionState> GlobFunctionInit(ClientContext &context, TableFunctionInitInput &input) {
105978
105928
  return make_unique<GlobFunctionState>();
105979
105929
  }
105980
105930
 
105981
- static void GlobFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *state_p,
105982
- DataChunk &output) {
105983
- auto &bind_data = (GlobFunctionBindData &)*bind_data_p;
105984
- auto &state = (GlobFunctionState &)*state_p;
105931
+ static void GlobFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
105932
+ auto &bind_data = (GlobFunctionBindData &)*data_p.bind_data;
105933
+ auto &state = (GlobFunctionState &)*data_p.global_state;
105985
105934
 
105986
105935
  idx_t count = 0;
105987
105936
  idx_t next_idx = MinValue<idx_t>(state.current_idx + STANDARD_VECTOR_SIZE, bind_data.files.size());
@@ -106113,7 +106062,7 @@ struct TestAllTypesFun {
106113
106062
 
106114
106063
  namespace duckdb {
106115
106064
 
106116
- struct PragmaDetailedProfilingOutputOperatorData : public FunctionOperatorData {
106065
+ struct PragmaDetailedProfilingOutputOperatorData : public GlobalTableFunctionState {
106117
106066
  explicit PragmaDetailedProfilingOutputOperatorData() : chunk_index(0), initialized(false) {
106118
106067
  }
106119
106068
  idx_t chunk_index;
@@ -106160,10 +106109,8 @@ static unique_ptr<FunctionData> PragmaDetailedProfilingOutputBind(ClientContext
106160
106109
  return make_unique<PragmaDetailedProfilingOutputData>(return_types);
106161
106110
  }
106162
106111
 
106163
- unique_ptr<FunctionOperatorData> PragmaDetailedProfilingOutputInit(ClientContext &context,
106164
- const FunctionData *bind_data,
106165
- const vector<column_t> &column_ids,
106166
- TableFilterCollection *filters) {
106112
+ unique_ptr<GlobalTableFunctionState> PragmaDetailedProfilingOutputInit(ClientContext &context,
106113
+ TableFunctionInitInput &input) {
106167
106114
  return make_unique<PragmaDetailedProfilingOutputOperatorData>();
106168
106115
  }
106169
106116
 
@@ -106210,10 +106157,10 @@ static void ExtractFunctions(ChunkCollection &collection, ExpressionInfo &info,
106210
106157
  }
106211
106158
  }
106212
106159
 
106213
- static void PragmaDetailedProfilingOutputFunction(ClientContext &context, const FunctionData *bind_data_p,
106214
- FunctionOperatorData *operator_state, DataChunk &output) {
106215
- auto &state = (PragmaDetailedProfilingOutputOperatorData &)*operator_state;
106216
- auto &data = (PragmaDetailedProfilingOutputData &)*bind_data_p;
106160
+ static void PragmaDetailedProfilingOutputFunction(ClientContext &context, TableFunctionInput &data_p,
106161
+ DataChunk &output) {
106162
+ auto &state = (PragmaDetailedProfilingOutputOperatorData &)*data_p.global_state;
106163
+ auto &data = (PragmaDetailedProfilingOutputData &)*data_p.bind_data;
106217
106164
 
106218
106165
  if (!state.initialized) {
106219
106166
  // create a ChunkCollection
@@ -106288,7 +106235,7 @@ void PragmaDetailedProfilingOutput::RegisterFunction(BuiltinFunctions &set) {
106288
106235
 
106289
106236
  namespace duckdb {
106290
106237
 
106291
- struct PragmaLastProfilingOutputOperatorData : public FunctionOperatorData {
106238
+ struct PragmaLastProfilingOutputOperatorData : public GlobalTableFunctionState {
106292
106239
  PragmaLastProfilingOutputOperatorData() : chunk_index(0), initialized(false) {
106293
106240
  }
106294
106241
  idx_t chunk_index;
@@ -106332,16 +106279,14 @@ static void SetValue(DataChunk &output, int index, int op_id, string name, doubl
106332
106279
  output.SetValue(4, index, move(description));
106333
106280
  }
106334
106281
 
106335
- unique_ptr<FunctionOperatorData> PragmaLastProfilingOutputInit(ClientContext &context, const FunctionData *bind_data,
106336
- const vector<column_t> &column_ids,
106337
- TableFilterCollection *filters) {
106282
+ unique_ptr<GlobalTableFunctionState> PragmaLastProfilingOutputInit(ClientContext &context,
106283
+ TableFunctionInitInput &input) {
106338
106284
  return make_unique<PragmaLastProfilingOutputOperatorData>();
106339
106285
  }
106340
106286
 
106341
- static void PragmaLastProfilingOutputFunction(ClientContext &context, const FunctionData *bind_data_p,
106342
- FunctionOperatorData *operator_state, DataChunk &output) {
106343
- auto &state = (PragmaLastProfilingOutputOperatorData &)*operator_state;
106344
- auto &data = (PragmaLastProfilingOutputData &)*bind_data_p;
106287
+ static void PragmaLastProfilingOutputFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
106288
+ auto &state = (PragmaLastProfilingOutputOperatorData &)*data_p.global_state;
106289
+ auto &data = (PragmaLastProfilingOutputData &)*data_p.bind_data;
106345
106290
  if (!state.initialized) {
106346
106291
  // create a ChunkCollection
106347
106292
  auto collection = make_unique<ChunkCollection>();
@@ -106465,23 +106410,20 @@ static unique_ptr<FunctionData> RangeFunctionBind(ClientContext &context, TableF
106465
106410
  return move(result);
106466
106411
  }
106467
106412
 
106468
- struct RangeFunctionState : public FunctionOperatorData {
106413
+ struct RangeFunctionState : public GlobalTableFunctionState {
106469
106414
  RangeFunctionState() : current_idx(0) {
106470
106415
  }
106471
106416
 
106472
106417
  int64_t current_idx;
106473
106418
  };
106474
106419
 
106475
- static unique_ptr<FunctionOperatorData> RangeFunctionInit(ClientContext &context, const FunctionData *bind_data,
106476
- const vector<column_t> &column_ids,
106477
- TableFilterCollection *filters) {
106420
+ static unique_ptr<GlobalTableFunctionState> RangeFunctionInit(ClientContext &context, TableFunctionInitInput &input) {
106478
106421
  return make_unique<RangeFunctionState>();
106479
106422
  }
106480
106423
 
106481
- static void RangeFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *state_p,
106482
- DataChunk &output) {
106483
- auto &bind_data = (RangeFunctionBindData &)*bind_data_p;
106484
- auto &state = (RangeFunctionState &)*state_p;
106424
+ static void RangeFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
106425
+ auto &bind_data = (RangeFunctionBindData &)*data_p.bind_data;
106426
+ auto &state = (RangeFunctionState &)*data_p.global_state;
106485
106427
 
106486
106428
  auto increment = bind_data.increment;
106487
106429
  auto end = bind_data.end;
@@ -106587,7 +106529,7 @@ static unique_ptr<FunctionData> RangeDateTimeBind(ClientContext &context, TableF
106587
106529
  return move(result);
106588
106530
  }
106589
106531
 
106590
- struct RangeDateTimeState : public FunctionOperatorData {
106532
+ struct RangeDateTimeState : public GlobalTableFunctionState {
106591
106533
  explicit RangeDateTimeState(timestamp_t start_p) : current_state(start_p) {
106592
106534
  }
106593
106535
 
@@ -106595,17 +106537,14 @@ struct RangeDateTimeState : public FunctionOperatorData {
106595
106537
  bool finished = false;
106596
106538
  };
106597
106539
 
106598
- static unique_ptr<FunctionOperatorData> RangeDateTimeInit(ClientContext &context, const FunctionData *bind_data_p,
106599
- const vector<column_t> &column_ids,
106600
- TableFilterCollection *filters) {
106601
- auto &bind_data = (RangeDateTimeBindData &)*bind_data_p;
106540
+ static unique_ptr<GlobalTableFunctionState> RangeDateTimeInit(ClientContext &context, TableFunctionInitInput &input) {
106541
+ auto &bind_data = (RangeDateTimeBindData &)*input.bind_data;
106602
106542
  return make_unique<RangeDateTimeState>(bind_data.start);
106603
106543
  }
106604
106544
 
106605
- static void RangeDateTimeFunction(ClientContext &context, const FunctionData *bind_data_p,
106606
- FunctionOperatorData *state_p, DataChunk &output) {
106607
- auto &bind_data = (RangeDateTimeBindData &)*bind_data_p;
106608
- auto &state = (RangeDateTimeState &)*state_p;
106545
+ static void RangeDateTimeFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
106546
+ auto &bind_data = (RangeDateTimeBindData &)*data_p.bind_data;
106547
+ auto &state = (RangeDateTimeState &)*data_p.global_state;
106609
106548
  if (state.finished) {
106610
106549
  return;
106611
106550
  }
@@ -106630,29 +106569,29 @@ static void RangeDateTimeFunction(ClientContext &context, const FunctionData *bi
106630
106569
  void RangeTableFunction::RegisterFunction(BuiltinFunctions &set) {
106631
106570
  TableFunctionSet range("range");
106632
106571
 
106572
+ TableFunction range_function({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind<false>, RangeFunctionInit);
106573
+ range_function.cardinality = RangeCardinality;
106574
+
106633
106575
  // single argument range: (end) - implicit start = 0 and increment = 1
106634
- range.AddFunction(TableFunction({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind<false>, RangeFunctionInit,
106635
- nullptr, nullptr, nullptr, RangeCardinality));
106576
+ range.AddFunction(range_function);
106636
106577
  // two arguments range: (start, end) - implicit increment = 1
106637
- range.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction, RangeFunctionBind<false>,
106638
- RangeFunctionInit, nullptr, nullptr, nullptr, RangeCardinality));
106578
+ range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT};
106579
+ range.AddFunction(range_function);
106639
106580
  // three arguments range: (start, end, increment)
106640
- range.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction,
106641
- RangeFunctionBind<false>, RangeFunctionInit, nullptr, nullptr, nullptr,
106642
- RangeCardinality));
106581
+ range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT};
106582
+ range.AddFunction(range_function);
106643
106583
  range.AddFunction(TableFunction({LogicalType::TIMESTAMP, LogicalType::TIMESTAMP, LogicalType::INTERVAL},
106644
106584
  RangeDateTimeFunction, RangeDateTimeBind<false>, RangeDateTimeInit));
106645
106585
  set.AddFunction(range);
106646
106586
  // generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS
106647
106587
  TableFunctionSet generate_series("generate_series");
106648
- generate_series.AddFunction(TableFunction({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind<true>,
106649
- RangeFunctionInit, nullptr, nullptr, nullptr, RangeCardinality));
106650
- generate_series.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT}, RangeFunction,
106651
- RangeFunctionBind<true>, RangeFunctionInit, nullptr, nullptr, nullptr,
106652
- RangeCardinality));
106653
- generate_series.AddFunction(TableFunction({LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT},
106654
- RangeFunction, RangeFunctionBind<true>, RangeFunctionInit, nullptr,
106655
- nullptr, nullptr, RangeCardinality));
106588
+ range_function.bind = RangeFunctionBind<true>;
106589
+ range_function.arguments = {LogicalType::BIGINT};
106590
+ generate_series.AddFunction(range_function);
106591
+ range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT};
106592
+ generate_series.AddFunction(range_function);
106593
+ range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT};
106594
+ generate_series.AddFunction(range_function);
106656
106595
  generate_series.AddFunction(TableFunction({LogicalType::TIMESTAMP, LogicalType::TIMESTAMP, LogicalType::INTERVAL},
106657
106596
  RangeDateTimeFunction, RangeDateTimeBind<true>, RangeDateTimeInit));
106658
106597
  set.AddFunction(generate_series);
@@ -106757,17 +106696,19 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
106757
106696
  return move(result);
106758
106697
  }
106759
106698
 
106760
- struct ReadCSVOperatorData : public FunctionOperatorData {
106699
+ struct ReadCSVOperatorData : public GlobalTableFunctionState {
106761
106700
  //! The CSV reader
106762
106701
  unique_ptr<BufferedCSVReader> csv_reader;
106763
106702
  //! The index of the next file to read (i.e. current file + 1)
106764
106703
  idx_t file_index;
106704
+ //! Total File Size
106705
+ idx_t file_size;
106706
+ //! How many bytes were read up to this point
106707
+ atomic<idx_t> bytes_read;
106765
106708
  };
106766
106709
 
106767
- static unique_ptr<FunctionOperatorData> ReadCSVInit(ClientContext &context, const FunctionData *bind_data_p,
106768
- const vector<column_t> &column_ids,
106769
- TableFilterCollection *filters) {
106770
- auto &bind_data = (ReadCSVData &)*bind_data_p;
106710
+ static unique_ptr<GlobalTableFunctionState> ReadCSVInit(ClientContext &context, TableFunctionInitInput &input) {
106711
+ auto &bind_data = (ReadCSVData &)*input.bind_data;
106771
106712
  auto result = make_unique<ReadCSVOperatorData>();
106772
106713
  if (bind_data.initial_reader) {
106773
106714
  result->csv_reader = move(bind_data.initial_reader);
@@ -106775,8 +106716,7 @@ static unique_ptr<FunctionOperatorData> ReadCSVInit(ClientContext &context, cons
106775
106716
  bind_data.options.file_path = bind_data.files[0];
106776
106717
  result->csv_reader = make_unique<BufferedCSVReader>(context, bind_data.options, bind_data.sql_types);
106777
106718
  }
106778
- bind_data.bytes_read = 0;
106779
- bind_data.file_size = result->csv_reader->GetFileSize();
106719
+ result->file_size = result->csv_reader->GetFileSize();
106780
106720
  result->file_index = 1;
106781
106721
  return move(result);
106782
106722
  }
@@ -106787,13 +106727,12 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
106787
106727
  return ReadCSVBind(context, input, return_types, names);
106788
106728
  }
106789
106729
 
106790
- static void ReadCSVFunction(ClientContext &context, const FunctionData *bind_data_p,
106791
- FunctionOperatorData *operator_state, DataChunk &output) {
106792
- auto &bind_data = (ReadCSVData &)*bind_data_p;
106793
- auto &data = (ReadCSVOperatorData &)*operator_state;
106730
+ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
106731
+ auto &bind_data = (ReadCSVData &)*data_p.bind_data;
106732
+ auto &data = (ReadCSVOperatorData &)*data_p.global_state;
106794
106733
  do {
106795
106734
  data.csv_reader->ParseCSV(output);
106796
- bind_data.bytes_read = data.csv_reader->bytes_in_chunk;
106735
+ data.bytes_read = data.csv_reader->bytes_in_chunk;
106797
106736
  if (output.size() == 0 && data.file_index < bind_data.files.size()) {
106798
106737
  // exhausted this file, but we have more files we can read
106799
106738
  // open the next file and increment the counter
@@ -106834,12 +106773,13 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
106834
106773
  table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
106835
106774
  }
106836
106775
 
106837
- double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p) {
106838
- auto &bind_data = (ReadCSVData &)*bind_data_p;
106839
- if (bind_data.file_size == 0) {
106776
+ double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
106777
+ const GlobalTableFunctionState *global_state) {
106778
+ auto &data = (const ReadCSVOperatorData &)*global_state;
106779
+ if (data.file_size == 0) {
106840
106780
  return 100;
106841
106781
  }
106842
- auto percentage = (bind_data.bytes_read * 100.0) / bind_data.file_size;
106782
+ auto percentage = (data.bytes_read * 100.0) / data.file_size;
106843
106783
  return percentage;
106844
106784
  }
106845
106785
 
@@ -106900,7 +106840,7 @@ struct RepeatFunctionData : public TableFunctionData {
106900
106840
  idx_t target_count;
106901
106841
  };
106902
106842
 
106903
- struct RepeatOperatorData : public FunctionOperatorData {
106843
+ struct RepeatOperatorData : public GlobalTableFunctionState {
106904
106844
  RepeatOperatorData() : current_count(0) {
106905
106845
  }
106906
106846
  idx_t current_count;
@@ -106915,15 +106855,13 @@ static unique_ptr<FunctionData> RepeatBind(ClientContext &context, TableFunction
106915
106855
  return make_unique<RepeatFunctionData>(inputs[0], inputs[1].GetValue<int64_t>());
106916
106856
  }
106917
106857
 
106918
- static unique_ptr<FunctionOperatorData> RepeatInit(ClientContext &context, const FunctionData *bind_data,
106919
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
106858
+ static unique_ptr<GlobalTableFunctionState> RepeatInit(ClientContext &context, TableFunctionInitInput &input) {
106920
106859
  return make_unique<RepeatOperatorData>();
106921
106860
  }
106922
106861
 
106923
- static void RepeatFunction(ClientContext &context, const FunctionData *bind_data_p,
106924
- FunctionOperatorData *operator_state, DataChunk &output) {
106925
- auto &bind_data = (RepeatFunctionData &)*bind_data_p;
106926
- auto &state = (RepeatOperatorData &)*operator_state;
106862
+ static void RepeatFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
106863
+ auto &bind_data = (const RepeatFunctionData &)*data_p.bind_data;
106864
+ auto &state = (RepeatOperatorData &)*data_p.global_state;
106927
106865
 
106928
106866
  idx_t remaining = MinValue<idx_t>(bind_data.target_count - state.current_count, STANDARD_VECTOR_SIZE);
106929
106867
  output.data[0].Reference(bind_data.value);
@@ -106932,13 +106870,13 @@ static void RepeatFunction(ClientContext &context, const FunctionData *bind_data
106932
106870
  }
106933
106871
 
106934
106872
  static unique_ptr<NodeStatistics> RepeatCardinality(ClientContext &context, const FunctionData *bind_data_p) {
106935
- auto &bind_data = (RepeatFunctionData &)*bind_data_p;
106873
+ auto &bind_data = (const RepeatFunctionData &)*bind_data_p;
106936
106874
  return make_unique<NodeStatistics>(bind_data.target_count, bind_data.target_count);
106937
106875
  }
106938
106876
 
106939
106877
  void RepeatTableFunction::RegisterFunction(BuiltinFunctions &set) {
106940
- TableFunction repeat("repeat", {LogicalType::ANY, LogicalType::BIGINT}, RepeatFunction, RepeatBind, RepeatInit,
106941
- nullptr, nullptr, nullptr, RepeatCardinality);
106878
+ TableFunction repeat("repeat", {LogicalType::ANY, LogicalType::BIGINT}, RepeatFunction, RepeatBind, RepeatInit);
106879
+ repeat.cardinality = RepeatCardinality;
106942
106880
  set.AddFunction(repeat);
106943
106881
  }
106944
106882
 
@@ -106966,8 +106904,8 @@ static unique_ptr<FunctionData> SummaryFunctionBind(ClientContext &context, Tabl
106966
106904
  return make_unique<TableFunctionData>();
106967
106905
  }
106968
106906
 
106969
- static OperatorResultType SummaryFunction(ClientContext &context, const FunctionData *bind_data_p,
106970
- FunctionOperatorData *state_p, DataChunk &input, DataChunk &output) {
106907
+ static OperatorResultType SummaryFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &input,
106908
+ DataChunk &output) {
106971
106909
  output.SetCardinality(input.size());
106972
106910
 
106973
106911
  for (idx_t row_idx = 0; row_idx < input.size(); row_idx++) {
@@ -107009,7 +106947,7 @@ void SummaryTableFunction::RegisterFunction(BuiltinFunctions &set) {
107009
106947
 
107010
106948
  namespace duckdb {
107011
106949
 
107012
- struct DuckDBColumnsData : public FunctionOperatorData {
106950
+ struct DuckDBColumnsData : public GlobalTableFunctionState {
107013
106951
  DuckDBColumnsData() : offset(0), column_offset(0) {
107014
106952
  }
107015
106953
 
@@ -107068,8 +107006,7 @@ static unique_ptr<FunctionData> DuckDBColumnsBind(ClientContext &context, TableF
107068
107006
  return nullptr;
107069
107007
  }
107070
107008
 
107071
- unique_ptr<FunctionOperatorData> DuckDBColumnsInit(ClientContext &context, const FunctionData *bind_data,
107072
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
107009
+ unique_ptr<GlobalTableFunctionState> DuckDBColumnsInit(ClientContext &context, TableFunctionInitInput &input) {
107073
107010
  auto result = make_unique<DuckDBColumnsData>();
107074
107011
 
107075
107012
  // scan all the schemas for tables and views and collect them
@@ -107084,8 +107021,6 @@ unique_ptr<FunctionOperatorData> DuckDBColumnsInit(ClientContext &context, const
107084
107021
  return move(result);
107085
107022
  }
107086
107023
 
107087
- namespace { // anonymous namespace for the ColumnHelper classes for working with tables/views
107088
-
107089
107024
  class ColumnHelper {
107090
107025
  public:
107091
107026
  static unique_ptr<ColumnHelper> Create(CatalogEntry *entry);
@@ -107275,11 +107210,8 @@ void ColumnHelper::WriteColumns(idx_t start_index, idx_t start_col, idx_t end_co
107275
107210
  }
107276
107211
  }
107277
107212
 
107278
- } // anonymous namespace
107279
-
107280
- void DuckDBColumnsFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
107281
- DataChunk &output) {
107282
- auto &data = (DuckDBColumnsData &)*operator_state;
107213
+ void DuckDBColumnsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
107214
+ auto &data = (DuckDBColumnsData &)*data_p.global_state;
107283
107215
  if (data.offset >= data.entries.size()) {
107284
107216
  // finished returning values
107285
107217
  return;
@@ -107343,7 +107275,7 @@ void DuckDBColumnsFun::RegisterFunction(BuiltinFunctions &set) {
107343
107275
 
107344
107276
  namespace duckdb {
107345
107277
 
107346
- struct DuckDBConstraintsData : public FunctionOperatorData {
107278
+ struct DuckDBConstraintsData : public GlobalTableFunctionState {
107347
107279
  DuckDBConstraintsData() : offset(0), constraint_offset(0) {
107348
107280
  }
107349
107281
 
@@ -107389,9 +107321,7 @@ static unique_ptr<FunctionData> DuckDBConstraintsBind(ClientContext &context, Ta
107389
107321
  return nullptr;
107390
107322
  }
107391
107323
 
107392
- unique_ptr<FunctionOperatorData> DuckDBConstraintsInit(ClientContext &context, const FunctionData *bind_data,
107393
- const vector<column_t> &column_ids,
107394
- TableFilterCollection *filters) {
107324
+ unique_ptr<GlobalTableFunctionState> DuckDBConstraintsInit(ClientContext &context, TableFunctionInitInput &input) {
107395
107325
  auto result = make_unique<DuckDBConstraintsData>();
107396
107326
 
107397
107327
  // scan all the schemas for tables and collect themand collect them
@@ -107406,9 +107336,8 @@ unique_ptr<FunctionOperatorData> DuckDBConstraintsInit(ClientContext &context, c
107406
107336
  return move(result);
107407
107337
  }
107408
107338
 
107409
- void DuckDBConstraintsFunction(ClientContext &context, const FunctionData *bind_data,
107410
- FunctionOperatorData *operator_state, DataChunk &output) {
107411
- auto &data = (DuckDBConstraintsData &)*operator_state;
107339
+ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
107340
+ auto &data = (DuckDBConstraintsData &)*data_p.global_state;
107412
107341
  if (data.offset >= data.entries.size()) {
107413
107342
  // finished returning values
107414
107343
  return;
@@ -107551,7 +107480,7 @@ struct DependencyInformation {
107551
107480
  DependencyType type;
107552
107481
  };
107553
107482
 
107554
- struct DuckDBDependenciesData : public FunctionOperatorData {
107483
+ struct DuckDBDependenciesData : public GlobalTableFunctionState {
107555
107484
  DuckDBDependenciesData() : offset(0) {
107556
107485
  }
107557
107486
 
@@ -107585,9 +107514,7 @@ static unique_ptr<FunctionData> DuckDBDependenciesBind(ClientContext &context, T
107585
107514
  return nullptr;
107586
107515
  }
107587
107516
 
107588
- unique_ptr<FunctionOperatorData> DuckDBDependenciesInit(ClientContext &context, const FunctionData *bind_data,
107589
- const vector<column_t> &column_ids,
107590
- TableFilterCollection *filters) {
107517
+ unique_ptr<GlobalTableFunctionState> DuckDBDependenciesInit(ClientContext &context, TableFunctionInitInput &input) {
107591
107518
  auto result = make_unique<DuckDBDependenciesData>();
107592
107519
 
107593
107520
  // scan all the schemas and collect them
@@ -107604,9 +107531,8 @@ unique_ptr<FunctionOperatorData> DuckDBDependenciesInit(ClientContext &context,
107604
107531
  return move(result);
107605
107532
  }
107606
107533
 
107607
- void DuckDBDependenciesFunction(ClientContext &context, const FunctionData *bind_data,
107608
- FunctionOperatorData *operator_state, DataChunk &output) {
107609
- auto &data = (DuckDBDependenciesData &)*operator_state;
107534
+ void DuckDBDependenciesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
107535
+ auto &data = (DuckDBDependenciesData &)*data_p.global_state;
107610
107536
  if (data.offset >= data.entries.size()) {
107611
107537
  // finished returning values
107612
107538
  return;
@@ -107675,7 +107601,7 @@ void DuckDBDependenciesFun::RegisterFunction(BuiltinFunctions &set) {
107675
107601
 
107676
107602
  namespace duckdb {
107677
107603
 
107678
- struct DuckDBFunctionsData : public FunctionOperatorData {
107604
+ struct DuckDBFunctionsData : public GlobalTableFunctionState {
107679
107605
  DuckDBFunctionsData() : offset(0), offset_in_entry(0) {
107680
107606
  }
107681
107607
 
@@ -107729,9 +107655,7 @@ static void ExtractFunctionsFromSchema(ClientContext &context, SchemaCatalogEntr
107729
107655
  [&](CatalogEntry *entry) { result.entries.push_back(entry); });
107730
107656
  }
107731
107657
 
107732
- unique_ptr<FunctionOperatorData> DuckDBFunctionsInit(ClientContext &context, const FunctionData *bind_data,
107733
- const vector<column_t> &column_ids,
107734
- TableFilterCollection *filters) {
107658
+ unique_ptr<GlobalTableFunctionState> DuckDBFunctionsInit(ClientContext &context, TableFunctionInitInput &input) {
107735
107659
  auto result = make_unique<DuckDBFunctionsData>();
107736
107660
 
107737
107661
  // scan all the schemas for tables and collect themand collect them
@@ -108100,9 +108024,8 @@ bool ExtractFunctionData(StandardEntry *entry, idx_t function_idx, DataChunk &ou
108100
108024
  return function_idx + 1 == OP::FunctionCount(function);
108101
108025
  }
108102
108026
 
108103
- void DuckDBFunctionsFunction(ClientContext &context, const FunctionData *bind_data,
108104
- FunctionOperatorData *operator_state, DataChunk &output) {
108105
- auto &data = (DuckDBFunctionsData &)*operator_state;
108027
+ void DuckDBFunctionsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108028
+ auto &data = (DuckDBFunctionsData &)*data_p.global_state;
108106
108029
  if (data.offset >= data.entries.size()) {
108107
108030
  // finished returning values
108108
108031
  return;
@@ -108175,7 +108098,7 @@ void DuckDBFunctionsFun::RegisterFunction(BuiltinFunctions &set) {
108175
108098
 
108176
108099
  namespace duckdb {
108177
108100
 
108178
- struct DuckDBIndexesData : public FunctionOperatorData {
108101
+ struct DuckDBIndexesData : public GlobalTableFunctionState {
108179
108102
  DuckDBIndexesData() : offset(0) {
108180
108103
  }
108181
108104
 
@@ -108218,8 +108141,7 @@ static unique_ptr<FunctionData> DuckDBIndexesBind(ClientContext &context, TableF
108218
108141
  return nullptr;
108219
108142
  }
108220
108143
 
108221
- unique_ptr<FunctionOperatorData> DuckDBIndexesInit(ClientContext &context, const FunctionData *bind_data,
108222
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
108144
+ unique_ptr<GlobalTableFunctionState> DuckDBIndexesInit(ClientContext &context, TableFunctionInitInput &input) {
108223
108145
  auto result = make_unique<DuckDBIndexesData>();
108224
108146
 
108225
108147
  // scan all the schemas for tables and collect themand collect them
@@ -108234,9 +108156,8 @@ unique_ptr<FunctionOperatorData> DuckDBIndexesInit(ClientContext &context, const
108234
108156
  return move(result);
108235
108157
  }
108236
108158
 
108237
- void DuckDBIndexesFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
108238
- DataChunk &output) {
108239
- auto &data = (DuckDBIndexesData &)*operator_state;
108159
+ void DuckDBIndexesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108160
+ auto &data = (DuckDBIndexesData &)*data_p.global_state;
108240
108161
  if (data.offset >= data.entries.size()) {
108241
108162
  // finished returning values
108242
108163
  return;
@@ -108292,7 +108213,7 @@ void DuckDBIndexesFun::RegisterFunction(BuiltinFunctions &set) {
108292
108213
 
108293
108214
  namespace duckdb {
108294
108215
 
108295
- struct DuckDBKeywordsData : public FunctionOperatorData {
108216
+ struct DuckDBKeywordsData : public GlobalTableFunctionState {
108296
108217
  DuckDBKeywordsData() : offset(0) {
108297
108218
  }
108298
108219
 
@@ -108311,17 +108232,14 @@ static unique_ptr<FunctionData> DuckDBKeywordsBind(ClientContext &context, Table
108311
108232
  return nullptr;
108312
108233
  }
108313
108234
 
108314
- unique_ptr<FunctionOperatorData> DuckDBKeywordsInit(ClientContext &context, const FunctionData *bind_data,
108315
- const vector<column_t> &column_ids,
108316
- TableFilterCollection *filters) {
108235
+ unique_ptr<GlobalTableFunctionState> DuckDBKeywordsInit(ClientContext &context, TableFunctionInitInput &input) {
108317
108236
  auto result = make_unique<DuckDBKeywordsData>();
108318
108237
  result->entries = Parser::KeywordList();
108319
108238
  return move(result);
108320
108239
  }
108321
108240
 
108322
- void DuckDBKeywordsFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
108323
- DataChunk &output) {
108324
- auto &data = (DuckDBKeywordsData &)*operator_state;
108241
+ void DuckDBKeywordsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108242
+ auto &data = (DuckDBKeywordsData &)*data_p.global_state;
108325
108243
  if (data.offset >= data.entries.size()) {
108326
108244
  // finished returning values
108327
108245
  return;
@@ -108375,7 +108293,7 @@ void DuckDBKeywordsFun::RegisterFunction(BuiltinFunctions &set) {
108375
108293
 
108376
108294
  namespace duckdb {
108377
108295
 
108378
- struct DuckDBSchemasData : public FunctionOperatorData {
108296
+ struct DuckDBSchemasData : public GlobalTableFunctionState {
108379
108297
  DuckDBSchemasData() : offset(0) {
108380
108298
  }
108381
108299
 
@@ -108400,8 +108318,7 @@ static unique_ptr<FunctionData> DuckDBSchemasBind(ClientContext &context, TableF
108400
108318
  return nullptr;
108401
108319
  }
108402
108320
 
108403
- unique_ptr<FunctionOperatorData> DuckDBSchemasInit(ClientContext &context, const FunctionData *bind_data,
108404
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
108321
+ unique_ptr<GlobalTableFunctionState> DuckDBSchemasInit(ClientContext &context, TableFunctionInitInput &input) {
108405
108322
  auto result = make_unique<DuckDBSchemasData>();
108406
108323
 
108407
108324
  // scan all the schemas and collect them
@@ -108413,9 +108330,8 @@ unique_ptr<FunctionOperatorData> DuckDBSchemasInit(ClientContext &context, const
108413
108330
  return move(result);
108414
108331
  }
108415
108332
 
108416
- void DuckDBSchemasFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
108417
- DataChunk &output) {
108418
- auto &data = (DuckDBSchemasData &)*operator_state;
108333
+ void DuckDBSchemasFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108334
+ auto &data = (DuckDBSchemasData &)*data_p.global_state;
108419
108335
  if (data.offset >= data.entries.size()) {
108420
108336
  // finished returning values
108421
108337
  return;
@@ -108458,7 +108374,7 @@ void DuckDBSchemasFun::RegisterFunction(BuiltinFunctions &set) {
108458
108374
 
108459
108375
  namespace duckdb {
108460
108376
 
108461
- struct DuckDBSequencesData : public FunctionOperatorData {
108377
+ struct DuckDBSequencesData : public GlobalTableFunctionState {
108462
108378
  DuckDBSequencesData() : offset(0) {
108463
108379
  }
108464
108380
 
@@ -108507,9 +108423,7 @@ static unique_ptr<FunctionData> DuckDBSequencesBind(ClientContext &context, Tabl
108507
108423
  return nullptr;
108508
108424
  }
108509
108425
 
108510
- unique_ptr<FunctionOperatorData> DuckDBSequencesInit(ClientContext &context, const FunctionData *bind_data,
108511
- const vector<column_t> &column_ids,
108512
- TableFilterCollection *filters) {
108426
+ unique_ptr<GlobalTableFunctionState> DuckDBSequencesInit(ClientContext &context, TableFunctionInitInput &input) {
108513
108427
  auto result = make_unique<DuckDBSequencesData>();
108514
108428
 
108515
108429
  // scan all the schemas for tables and collect themand collect them
@@ -108525,9 +108439,8 @@ unique_ptr<FunctionOperatorData> DuckDBSequencesInit(ClientContext &context, con
108525
108439
  return move(result);
108526
108440
  }
108527
108441
 
108528
- void DuckDBSequencesFunction(ClientContext &context, const FunctionData *bind_data,
108529
- FunctionOperatorData *operator_state, DataChunk &output) {
108530
- auto &data = (DuckDBSequencesData &)*operator_state;
108442
+ void DuckDBSequencesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108443
+ auto &data = (DuckDBSequencesData &)*data_p.global_state;
108531
108444
  if (data.offset >= data.entries.size()) {
108532
108445
  // finished returning values
108533
108446
  return;
@@ -108590,7 +108503,7 @@ struct DuckDBSettingValue {
108590
108503
  string input_type;
108591
108504
  };
108592
108505
 
108593
- struct DuckDBSettingsData : public FunctionOperatorData {
108506
+ struct DuckDBSettingsData : public GlobalTableFunctionState {
108594
108507
  DuckDBSettingsData() : offset(0) {
108595
108508
  }
108596
108509
 
@@ -108615,9 +108528,7 @@ static unique_ptr<FunctionData> DuckDBSettingsBind(ClientContext &context, Table
108615
108528
  return nullptr;
108616
108529
  }
108617
108530
 
108618
- unique_ptr<FunctionOperatorData> DuckDBSettingsInit(ClientContext &context, const FunctionData *bind_data,
108619
- const vector<column_t> &column_ids,
108620
- TableFilterCollection *filters) {
108531
+ unique_ptr<GlobalTableFunctionState> DuckDBSettingsInit(ClientContext &context, TableFunctionInitInput &input) {
108621
108532
  auto result = make_unique<DuckDBSettingsData>();
108622
108533
 
108623
108534
  auto &config = DBConfig::GetConfig(context);
@@ -108650,9 +108561,8 @@ unique_ptr<FunctionOperatorData> DuckDBSettingsInit(ClientContext &context, cons
108650
108561
  return move(result);
108651
108562
  }
108652
108563
 
108653
- void DuckDBSettingsFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
108654
- DataChunk &output) {
108655
- auto &data = (DuckDBSettingsData &)*operator_state;
108564
+ void DuckDBSettingsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108565
+ auto &data = (DuckDBSettingsData &)*data_p.global_state;
108656
108566
  if (data.offset >= data.settings.size()) {
108657
108567
  // finished returning values
108658
108568
  return;
@@ -108697,7 +108607,7 @@ void DuckDBSettingsFun::RegisterFunction(BuiltinFunctions &set) {
108697
108607
 
108698
108608
  namespace duckdb {
108699
108609
 
108700
- struct DuckDBTablesData : public FunctionOperatorData {
108610
+ struct DuckDBTablesData : public GlobalTableFunctionState {
108701
108611
  DuckDBTablesData() : offset(0) {
108702
108612
  }
108703
108613
 
@@ -108746,8 +108656,7 @@ static unique_ptr<FunctionData> DuckDBTablesBind(ClientContext &context, TableFu
108746
108656
  return nullptr;
108747
108657
  }
108748
108658
 
108749
- unique_ptr<FunctionOperatorData> DuckDBTablesInit(ClientContext &context, const FunctionData *bind_data,
108750
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
108659
+ unique_ptr<GlobalTableFunctionState> DuckDBTablesInit(ClientContext &context, TableFunctionInitInput &input) {
108751
108660
  auto result = make_unique<DuckDBTablesData>();
108752
108661
 
108753
108662
  // scan all the schemas for tables and collect themand collect them
@@ -108784,9 +108693,8 @@ static idx_t CheckConstraintCount(TableCatalogEntry &table) {
108784
108693
  return check_count;
108785
108694
  }
108786
108695
 
108787
- void DuckDBTablesFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
108788
- DataChunk &output) {
108789
- auto &data = (DuckDBTablesData &)*operator_state;
108696
+ void DuckDBTablesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108697
+ auto &data = (DuckDBTablesData &)*data_p.global_state;
108790
108698
  if (data.offset >= data.entries.size()) {
108791
108699
  // finished returning values
108792
108700
  return;
@@ -108848,7 +108756,7 @@ void DuckDBTablesFun::RegisterFunction(BuiltinFunctions &set) {
108848
108756
 
108849
108757
  namespace duckdb {
108850
108758
 
108851
- struct DuckDBTypesData : public FunctionOperatorData {
108759
+ struct DuckDBTypesData : public GlobalTableFunctionState {
108852
108760
  DuckDBTypesData() : offset(0) {
108853
108761
  }
108854
108762
 
@@ -108887,8 +108795,7 @@ static unique_ptr<FunctionData> DuckDBTypesBind(ClientContext &context, TableFun
108887
108795
  return nullptr;
108888
108796
  }
108889
108797
 
108890
- unique_ptr<FunctionOperatorData> DuckDBTypesInit(ClientContext &context, const FunctionData *bind_data,
108891
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
108798
+ unique_ptr<GlobalTableFunctionState> DuckDBTypesInit(ClientContext &context, TableFunctionInitInput &input) {
108892
108799
  auto result = make_unique<DuckDBTypesData>();
108893
108800
  auto schemas = Catalog::GetCatalog(context).schemas->GetEntries<SchemaCatalogEntry>(context);
108894
108801
  for (auto &schema : schemas) {
@@ -108903,9 +108810,8 @@ unique_ptr<FunctionOperatorData> DuckDBTypesInit(ClientContext &context, const F
108903
108810
  return move(result);
108904
108811
  }
108905
108812
 
108906
- void DuckDBTypesFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
108907
- DataChunk &output) {
108908
- auto &data = (DuckDBTypesData &)*operator_state;
108813
+ void DuckDBTypesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108814
+ auto &data = (DuckDBTypesData &)*data_p.global_state;
108909
108815
  if (data.offset >= data.entries.size()) {
108910
108816
  // finished returning values
108911
108817
  return;
@@ -109013,7 +108919,7 @@ void DuckDBTypesFun::RegisterFunction(BuiltinFunctions &set) {
109013
108919
 
109014
108920
  namespace duckdb {
109015
108921
 
109016
- struct DuckDBViewsData : public FunctionOperatorData {
108922
+ struct DuckDBViewsData : public GlobalTableFunctionState {
109017
108923
  DuckDBViewsData() : offset(0) {
109018
108924
  }
109019
108925
 
@@ -109050,8 +108956,7 @@ static unique_ptr<FunctionData> DuckDBViewsBind(ClientContext &context, TableFun
109050
108956
  return nullptr;
109051
108957
  }
109052
108958
 
109053
- unique_ptr<FunctionOperatorData> DuckDBViewsInit(ClientContext &context, const FunctionData *bind_data,
109054
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
108959
+ unique_ptr<GlobalTableFunctionState> DuckDBViewsInit(ClientContext &context, TableFunctionInitInput &input) {
109055
108960
  auto result = make_unique<DuckDBViewsData>();
109056
108961
 
109057
108962
  // scan all the schemas for tables and collect themand collect them
@@ -109066,9 +108971,8 @@ unique_ptr<FunctionOperatorData> DuckDBViewsInit(ClientContext &context, const F
109066
108971
  return move(result);
109067
108972
  }
109068
108973
 
109069
- void DuckDBViewsFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
109070
- DataChunk &output) {
109071
- auto &data = (DuckDBViewsData &)*operator_state;
108974
+ void DuckDBViewsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
108975
+ auto &data = (DuckDBViewsData &)*data_p.global_state;
109072
108976
  if (data.offset >= data.entries.size()) {
109073
108977
  // finished returning values
109074
108978
  return;
@@ -109121,7 +109025,7 @@ void DuckDBViewsFun::RegisterFunction(BuiltinFunctions &set) {
109121
109025
 
109122
109026
  namespace duckdb {
109123
109027
 
109124
- struct PragmaCollateData : public FunctionOperatorData {
109028
+ struct PragmaCollateData : public GlobalTableFunctionState {
109125
109029
  PragmaCollateData() : offset(0) {
109126
109030
  }
109127
109031
 
@@ -109137,8 +109041,7 @@ static unique_ptr<FunctionData> PragmaCollateBind(ClientContext &context, TableF
109137
109041
  return nullptr;
109138
109042
  }
109139
109043
 
109140
- unique_ptr<FunctionOperatorData> PragmaCollateInit(ClientContext &context, const FunctionData *bind_data,
109141
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
109044
+ unique_ptr<GlobalTableFunctionState> PragmaCollateInit(ClientContext &context, TableFunctionInitInput &input) {
109142
109045
  auto result = make_unique<PragmaCollateData>();
109143
109046
 
109144
109047
  Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) {
@@ -109150,9 +109053,8 @@ unique_ptr<FunctionOperatorData> PragmaCollateInit(ClientContext &context, const
109150
109053
  return move(result);
109151
109054
  }
109152
109055
 
109153
- static void PragmaCollateFunction(ClientContext &context, const FunctionData *bind_data,
109154
- FunctionOperatorData *operator_state, DataChunk &output) {
109155
- auto &data = (PragmaCollateData &)*operator_state;
109056
+ static void PragmaCollateFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
109057
+ auto &data = (PragmaCollateData &)*data_p.global_state;
109156
109058
  if (data.offset >= data.entries.size()) {
109157
109059
  // finished returning values
109158
109060
  return;
@@ -109179,7 +109081,7 @@ void PragmaCollations::RegisterFunction(BuiltinFunctions &set) {
109179
109081
 
109180
109082
  namespace duckdb {
109181
109083
 
109182
- struct PragmaDatabaseListData : public FunctionOperatorData {
109084
+ struct PragmaDatabaseListData : public GlobalTableFunctionState {
109183
109085
  PragmaDatabaseListData() : finished(false) {
109184
109086
  }
109185
109087
 
@@ -109200,15 +109102,12 @@ static unique_ptr<FunctionData> PragmaDatabaseListBind(ClientContext &context, T
109200
109102
  return nullptr;
109201
109103
  }
109202
109104
 
109203
- unique_ptr<FunctionOperatorData> PragmaDatabaseListInit(ClientContext &context, const FunctionData *bind_data,
109204
- const vector<column_t> &column_ids,
109205
- TableFilterCollection *filters) {
109105
+ unique_ptr<GlobalTableFunctionState> PragmaDatabaseListInit(ClientContext &context, TableFunctionInitInput &input) {
109206
109106
  return make_unique<PragmaDatabaseListData>();
109207
109107
  }
109208
109108
 
109209
- void PragmaDatabaseListFunction(ClientContext &context, const FunctionData *bind_data,
109210
- FunctionOperatorData *operator_state, DataChunk &output) {
109211
- auto &data = (PragmaDatabaseListData &)*operator_state;
109109
+ void PragmaDatabaseListFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
109110
+ auto &data = (PragmaDatabaseListData &)*data_p.global_state;
109212
109111
  if (data.finished) {
109213
109112
  return;
109214
109113
  }
@@ -109237,7 +109136,7 @@ void PragmaDatabaseList::RegisterFunction(BuiltinFunctions &set) {
109237
109136
 
109238
109137
  namespace duckdb {
109239
109138
 
109240
- struct PragmaDatabaseSizeData : public FunctionOperatorData {
109139
+ struct PragmaDatabaseSizeData : public GlobalTableFunctionState {
109241
109140
  PragmaDatabaseSizeData() : finished(false) {
109242
109141
  }
109243
109142
 
@@ -109273,15 +109172,12 @@ static unique_ptr<FunctionData> PragmaDatabaseSizeBind(ClientContext &context, T
109273
109172
  return nullptr;
109274
109173
  }
109275
109174
 
109276
- unique_ptr<FunctionOperatorData> PragmaDatabaseSizeInit(ClientContext &context, const FunctionData *bind_data,
109277
- const vector<column_t> &column_ids,
109278
- TableFilterCollection *filters) {
109175
+ unique_ptr<GlobalTableFunctionState> PragmaDatabaseSizeInit(ClientContext &context, TableFunctionInitInput &input) {
109279
109176
  return make_unique<PragmaDatabaseSizeData>();
109280
109177
  }
109281
109178
 
109282
- void PragmaDatabaseSizeFunction(ClientContext &context, const FunctionData *bind_data,
109283
- FunctionOperatorData *operator_state, DataChunk &output) {
109284
- auto &data = (PragmaDatabaseSizeData &)*operator_state;
109179
+ void PragmaDatabaseSizeFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
109180
+ auto &data = (PragmaDatabaseSizeData &)*data_p.global_state;
109285
109181
  if (data.finished) {
109286
109182
  return;
109287
109183
  }
@@ -109337,7 +109233,7 @@ void PragmaDatabaseSize::RegisterFunction(BuiltinFunctions &set) {
109337
109233
 
109338
109234
  namespace duckdb {
109339
109235
 
109340
- struct PragmaFunctionsData : public FunctionOperatorData {
109236
+ struct PragmaFunctionsData : public GlobalTableFunctionState {
109341
109237
  PragmaFunctionsData() : offset(0), offset_in_entry(0) {
109342
109238
  }
109343
109239
 
@@ -109369,9 +109265,7 @@ static unique_ptr<FunctionData> PragmaFunctionsBind(ClientContext &context, Tabl
109369
109265
  return nullptr;
109370
109266
  }
109371
109267
 
109372
- unique_ptr<FunctionOperatorData> PragmaFunctionsInit(ClientContext &context, const FunctionData *bind_data,
109373
- const vector<column_t> &column_ids,
109374
- TableFilterCollection *filters) {
109268
+ unique_ptr<GlobalTableFunctionState> PragmaFunctionsInit(ClientContext &context, TableFunctionInitInput &input) {
109375
109269
  auto result = make_unique<PragmaFunctionsData>();
109376
109270
 
109377
109271
  Catalog::GetCatalog(context).schemas->Scan(context, [&](CatalogEntry *entry) {
@@ -109402,9 +109296,8 @@ void AddFunction(BaseScalarFunction &f, idx_t &count, DataChunk &output, bool is
109402
109296
  count++;
109403
109297
  }
109404
109298
 
109405
- static void PragmaFunctionsFunction(ClientContext &context, const FunctionData *bind_data,
109406
- FunctionOperatorData *operator_state, DataChunk &output) {
109407
- auto &data = (PragmaFunctionsData &)*operator_state;
109299
+ static void PragmaFunctionsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
109300
+ auto &data = (PragmaFunctionsData &)*data_p.global_state;
109408
109301
  if (data.offset >= data.entries.size()) {
109409
109302
  // finished returning values
109410
109303
  return;
@@ -109472,7 +109365,7 @@ struct PragmaStorageFunctionData : public TableFunctionData {
109472
109365
  vector<vector<Value>> storage_info;
109473
109366
  };
109474
109367
 
109475
- struct PragmaStorageOperatorData : public FunctionOperatorData {
109368
+ struct PragmaStorageOperatorData : public GlobalTableFunctionState {
109476
109369
  PragmaStorageOperatorData() : offset(0) {
109477
109370
  }
109478
109371
 
@@ -109538,16 +109431,13 @@ static unique_ptr<FunctionData> PragmaStorageInfoBind(ClientContext &context, Ta
109538
109431
  return move(result);
109539
109432
  }
109540
109433
 
109541
- unique_ptr<FunctionOperatorData> PragmaStorageInfoInit(ClientContext &context, const FunctionData *bind_data,
109542
- const vector<column_t> &column_ids,
109543
- TableFilterCollection *filters) {
109434
+ unique_ptr<GlobalTableFunctionState> PragmaStorageInfoInit(ClientContext &context, TableFunctionInitInput &input) {
109544
109435
  return make_unique<PragmaStorageOperatorData>();
109545
109436
  }
109546
109437
 
109547
- static void PragmaStorageInfoFunction(ClientContext &context, const FunctionData *bind_data_p,
109548
- FunctionOperatorData *operator_state, DataChunk &output) {
109549
- auto &bind_data = (PragmaStorageFunctionData &)*bind_data_p;
109550
- auto &data = (PragmaStorageOperatorData &)*operator_state;
109438
+ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
109439
+ auto &bind_data = (PragmaStorageFunctionData &)*data_p.bind_data;
109440
+ auto &data = (PragmaStorageOperatorData &)*data_p.global_state;
109551
109441
  idx_t count = 0;
109552
109442
  while (data.offset < bind_data.storage_info.size() && count < STANDARD_VECTOR_SIZE) {
109553
109443
  auto &entry = bind_data.storage_info[data.offset++];
@@ -109597,7 +109487,7 @@ struct PragmaTableFunctionData : public TableFunctionData {
109597
109487
  CatalogEntry *entry;
109598
109488
  };
109599
109489
 
109600
- struct PragmaTableOperatorData : public FunctionOperatorData {
109490
+ struct PragmaTableOperatorData : public GlobalTableFunctionState {
109601
109491
  PragmaTableOperatorData() : offset(0) {
109602
109492
  }
109603
109493
  idx_t offset;
@@ -109631,9 +109521,7 @@ static unique_ptr<FunctionData> PragmaTableInfoBind(ClientContext &context, Tabl
109631
109521
  return make_unique<PragmaTableFunctionData>(entry);
109632
109522
  }
109633
109523
 
109634
- unique_ptr<FunctionOperatorData> PragmaTableInfoInit(ClientContext &context, const FunctionData *bind_data,
109635
- const vector<column_t> &column_ids,
109636
- TableFilterCollection *filters) {
109524
+ unique_ptr<GlobalTableFunctionState> PragmaTableInfoInit(ClientContext &context, TableFunctionInitInput &input) {
109637
109525
  return make_unique<PragmaTableOperatorData>();
109638
109526
  }
109639
109527
 
@@ -109731,10 +109619,9 @@ static void PragmaTableInfoView(PragmaTableOperatorData &data, ViewCatalogEntry
109731
109619
  data.offset = next;
109732
109620
  }
109733
109621
 
109734
- static void PragmaTableInfoFunction(ClientContext &context, const FunctionData *bind_data_p,
109735
- FunctionOperatorData *operator_state, DataChunk &output) {
109736
- auto &bind_data = (PragmaTableFunctionData &)*bind_data_p;
109737
- auto &state = (PragmaTableOperatorData &)*operator_state;
109622
+ static void PragmaTableInfoFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
109623
+ auto &bind_data = (PragmaTableFunctionData &)*data_p.bind_data;
109624
+ auto &state = (PragmaTableOperatorData &)*data_p.global_state;
109738
109625
  switch (bind_data.entry->type) {
109739
109626
  case CatalogType::TABLE_ENTRY:
109740
109627
  PragmaTableInfoTable(state, (TableCatalogEntry *)bind_data.entry, output);
@@ -109762,7 +109649,7 @@ void PragmaTableInfo::RegisterFunction(BuiltinFunctions &set) {
109762
109649
 
109763
109650
  namespace duckdb {
109764
109651
 
109765
- struct TestAllTypesData : public FunctionOperatorData {
109652
+ struct TestAllTypesData : public GlobalTableFunctionState {
109766
109653
  TestAllTypesData() : offset(0) {
109767
109654
  }
109768
109655
 
@@ -109964,8 +109851,7 @@ static unique_ptr<FunctionData> TestAllTypesBind(ClientContext &context, TableFu
109964
109851
  return nullptr;
109965
109852
  }
109966
109853
 
109967
- unique_ptr<FunctionOperatorData> TestAllTypesInit(ClientContext &context, const FunctionData *bind_data,
109968
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
109854
+ unique_ptr<GlobalTableFunctionState> TestAllTypesInit(ClientContext &context, TableFunctionInitInput &input) {
109969
109855
  auto result = make_unique<TestAllTypesData>();
109970
109856
  auto test_types = GetTestTypes();
109971
109857
  // 3 rows: min, max and NULL
@@ -109979,9 +109865,8 @@ unique_ptr<FunctionOperatorData> TestAllTypesInit(ClientContext &context, const
109979
109865
  return move(result);
109980
109866
  }
109981
109867
 
109982
- void TestAllTypesFunction(ClientContext &context, const FunctionData *bind_data, FunctionOperatorData *operator_state,
109983
- DataChunk &output) {
109984
- auto &data = (TestAllTypesData &)*operator_state;
109868
+ void TestAllTypesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
109869
+ auto &data = (TestAllTypesData &)*data_p.global_state;
109985
109870
  if (data.offset >= data.entries.size()) {
109986
109871
  // finished returning values
109987
109872
  return;
@@ -110139,31 +110024,50 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
110139
110024
 
110140
110025
 
110141
110026
 
110142
-
110143
110027
  namespace duckdb {
110144
110028
 
110145
110029
  //===--------------------------------------------------------------------===//
110146
110030
  // Table Scan
110147
110031
  //===--------------------------------------------------------------------===//
110148
- bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind_data,
110149
- FunctionOperatorData *operator_state, ParallelState *parallel_state_p);
110032
+ bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p,
110033
+ LocalTableFunctionState *local_state, GlobalTableFunctionState *gstate);
110150
110034
 
110151
- struct TableScanOperatorData : public FunctionOperatorData {
110035
+ struct TableScanLocalState : public LocalTableFunctionState {
110152
110036
  //! The current position in the scan
110153
110037
  TableScanState scan_state;
110154
110038
  vector<column_t> column_ids;
110155
110039
  };
110156
110040
 
110157
- static unique_ptr<FunctionOperatorData> TableScanInit(ClientContext &context, const FunctionData *bind_data_p,
110158
- const vector<column_t> &column_ids,
110159
- TableFilterCollection *filters) {
110160
- auto result = make_unique<TableScanOperatorData>();
110161
- auto &transaction = Transaction::GetTransaction(context);
110162
- auto &bind_data = (const TableScanBindData &)*bind_data_p;
110163
- result->column_ids = column_ids;
110164
- result->scan_state.table_filters = filters->table_filters;
110165
- bind_data.table->storage->InitializeScan(transaction, result->scan_state, result->column_ids,
110166
- filters->table_filters);
110041
+ struct TableScanGlobalState : public GlobalTableFunctionState {
110042
+ TableScanGlobalState(ClientContext &context, const FunctionData *bind_data_p) {
110043
+ D_ASSERT(bind_data_p);
110044
+ auto &bind_data = (const TableScanBindData &)*bind_data_p;
110045
+ max_threads = bind_data.table->storage->MaxThreads(context);
110046
+ }
110047
+
110048
+ ParallelTableScanState state;
110049
+ mutex lock;
110050
+ idx_t max_threads;
110051
+
110052
+ idx_t MaxThreads() const override {
110053
+ return max_threads;
110054
+ }
110055
+ };
110056
+
110057
+ static unique_ptr<LocalTableFunctionState> TableScanInitLocal(ClientContext &context, TableFunctionInitInput &input,
110058
+ GlobalTableFunctionState *gstate) {
110059
+ auto result = make_unique<TableScanLocalState>();
110060
+ result->column_ids = input.column_ids;
110061
+ result->scan_state.table_filters = input.filters;
110062
+ TableScanParallelStateNext(context, input.bind_data, result.get(), gstate);
110063
+ return move(result);
110064
+ }
110065
+
110066
+ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
110067
+ D_ASSERT(input.bind_data);
110068
+ auto &bind_data = (const TableScanBindData &)*input.bind_data;
110069
+ auto result = make_unique<TableScanGlobalState>(context, input.bind_data);
110070
+ bind_data.table->storage->InitializeParallelScan(context, result->state);
110167
110071
  return move(result);
110168
110072
  }
110169
110073
 
@@ -110178,63 +110082,34 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
110178
110082
  return bind_data.table->storage->GetStatistics(context, column_id);
110179
110083
  }
110180
110084
 
110181
- static unique_ptr<FunctionOperatorData> TableScanParallelInit(ClientContext &context, const FunctionData *bind_data_p,
110182
- ParallelState *state, const vector<column_t> &column_ids,
110183
- TableFilterCollection *filters) {
110184
- auto result = make_unique<TableScanOperatorData>();
110185
- result->column_ids = column_ids;
110186
- result->scan_state.table_filters = filters->table_filters;
110187
- TableScanParallelStateNext(context, bind_data_p, result.get(), state);
110188
- return move(result);
110189
- }
110190
-
110191
- static void TableScanFunc(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *operator_state,
110192
- DataChunk &output) {
110193
- D_ASSERT(bind_data_p);
110194
- D_ASSERT(operator_state);
110195
- auto &bind_data = (TableScanBindData &)*bind_data_p;
110196
- auto &state = (TableScanOperatorData &)*operator_state;
110085
+ static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
110086
+ auto &bind_data = (TableScanBindData &)*data_p.bind_data;
110087
+ auto &state = (TableScanLocalState &)*data_p.local_state;
110197
110088
  auto &transaction = Transaction::GetTransaction(context);
110198
- bind_data.table->storage->Scan(transaction, output, state.scan_state, state.column_ids);
110199
- bind_data.chunk_count++;
110200
- }
110201
-
110202
- struct ParallelTableFunctionScanState : public ParallelState {
110203
- ParallelTableScanState state;
110204
- mutex lock;
110205
- };
110206
-
110207
- idx_t TableScanMaxThreads(ClientContext &context, const FunctionData *bind_data_p) {
110208
- D_ASSERT(bind_data_p);
110209
- auto &bind_data = (const TableScanBindData &)*bind_data_p;
110210
- return bind_data.table->storage->MaxThreads(context);
110211
- }
110212
-
110213
- unique_ptr<ParallelState> TableScanInitParallelState(ClientContext &context, const FunctionData *bind_data_p,
110214
- const vector<column_t> &column_ids,
110215
- TableFilterCollection *filters) {
110216
- D_ASSERT(bind_data_p);
110217
- auto &bind_data = (const TableScanBindData &)*bind_data_p;
110218
- auto result = make_unique<ParallelTableFunctionScanState>();
110219
- bind_data.table->storage->InitializeParallelScan(context, result->state);
110220
- return move(result);
110089
+ do {
110090
+ bind_data.table->storage->Scan(transaction, output, state.scan_state, state.column_ids);
110091
+ if (output.size() > 0) {
110092
+ return;
110093
+ }
110094
+ if (!TableScanParallelStateNext(context, data_p.bind_data, data_p.local_state, data_p.global_state)) {
110095
+ return;
110096
+ }
110097
+ } while (true);
110221
110098
  }
110222
110099
 
110223
110100
  bool TableScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p,
110224
- FunctionOperatorData *operator_state, ParallelState *parallel_state_p) {
110225
- D_ASSERT(bind_data_p);
110226
- D_ASSERT(parallel_state_p);
110227
- D_ASSERT(operator_state);
110101
+ LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
110228
110102
  auto &bind_data = (const TableScanBindData &)*bind_data_p;
110229
- auto &parallel_state = (ParallelTableFunctionScanState &)*parallel_state_p;
110230
- auto &state = (TableScanOperatorData &)*operator_state;
110103
+ auto &parallel_state = (TableScanGlobalState &)*global_state;
110104
+ auto &state = (TableScanLocalState &)*local_state;
110231
110105
 
110232
110106
  lock_guard<mutex> parallel_lock(parallel_state.lock);
110233
110107
  return bind_data.table->storage->NextParallelScan(context, parallel_state.state, state.scan_state,
110234
110108
  state.column_ids);
110235
110109
  }
110236
110110
 
110237
- double TableScanProgress(ClientContext &context, const FunctionData *bind_data_p) {
110111
+ double TableScanProgress(ClientContext &context, const FunctionData *bind_data_p,
110112
+ const GlobalTableFunctionState *gstate) {
110238
110113
  auto &bind_data = (TableScanBindData &)*bind_data_p;
110239
110114
  idx_t total_rows = bind_data.table->storage->GetTotalRows();
110240
110115
  if (total_rows == 0 || total_rows < STANDARD_VECTOR_SIZE) {
@@ -110251,9 +110126,9 @@ double TableScanProgress(ClientContext &context, const FunctionData *bind_data_p
110251
110126
  }
110252
110127
 
110253
110128
  idx_t TableScanGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
110254
- FunctionOperatorData *operator_state, ParallelState *parallel_state_p) {
110129
+ LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
110255
110130
  auto &bind_data = (const TableScanBindData &)*bind_data_p;
110256
- auto &state = (TableScanOperatorData &)*operator_state;
110131
+ auto &state = (TableScanLocalState &)*local_state;
110257
110132
  if (state.scan_state.row_group_scan_state.row_group) {
110258
110133
  return state.scan_state.row_group_scan_state.row_group->start;
110259
110134
  }
@@ -110279,8 +110154,8 @@ unique_ptr<NodeStatistics> TableScanCardinality(ClientContext &context, const Fu
110279
110154
  //===--------------------------------------------------------------------===//
110280
110155
  // Index Scan
110281
110156
  //===--------------------------------------------------------------------===//
110282
- struct IndexScanOperatorData : public FunctionOperatorData {
110283
- explicit IndexScanOperatorData(data_ptr_t row_id_data) : row_ids(LogicalType::ROW_TYPE, row_id_data) {
110157
+ struct IndexScanGlobalState : public GlobalTableFunctionState {
110158
+ explicit IndexScanGlobalState(data_ptr_t row_id_data) : row_ids(LogicalType::ROW_TYPE, row_id_data) {
110284
110159
  }
110285
110160
 
110286
110161
  Vector row_ids;
@@ -110290,28 +110165,24 @@ struct IndexScanOperatorData : public FunctionOperatorData {
110290
110165
  bool finished;
110291
110166
  };
110292
110167
 
110293
- static unique_ptr<FunctionOperatorData> IndexScanInit(ClientContext &context, const FunctionData *bind_data_p,
110294
- const vector<column_t> &column_ids,
110295
- TableFilterCollection *filters) {
110296
- auto &bind_data = (const TableScanBindData &)*bind_data_p;
110168
+ static unique_ptr<GlobalTableFunctionState> IndexScanInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
110169
+ auto &bind_data = (const TableScanBindData &)*input.bind_data;
110297
110170
  data_ptr_t row_id_data = nullptr;
110298
110171
  if (!bind_data.result_ids.empty()) {
110299
110172
  row_id_data = (data_ptr_t)&bind_data.result_ids[0];
110300
110173
  }
110301
- auto result = make_unique<IndexScanOperatorData>(row_id_data);
110174
+ auto result = make_unique<IndexScanGlobalState>(row_id_data);
110302
110175
  auto &transaction = Transaction::GetTransaction(context);
110303
- result->column_ids = column_ids;
110304
- transaction.storage.InitializeScan(bind_data.table->storage.get(), result->local_storage_state,
110305
- filters->table_filters);
110176
+ result->column_ids = input.column_ids;
110177
+ transaction.storage.InitializeScan(bind_data.table->storage.get(), result->local_storage_state, input.filters);
110306
110178
 
110307
110179
  result->finished = false;
110308
110180
  return move(result);
110309
110181
  }
110310
110182
 
110311
- static void IndexScanFunction(ClientContext &context, const FunctionData *bind_data_p,
110312
- FunctionOperatorData *operator_state, DataChunk &output) {
110313
- auto &bind_data = (const TableScanBindData &)*bind_data_p;
110314
- auto &state = (IndexScanOperatorData &)*operator_state;
110183
+ static void IndexScanFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
110184
+ auto &bind_data = (const TableScanBindData &)*data_p.bind_data;
110185
+ auto &state = (IndexScanGlobalState &)*data_p.global_state;
110315
110186
  auto &transaction = Transaction::GetTransaction(context);
110316
110187
  if (!state.finished) {
110317
110188
  bind_data.table->storage->Fetch(transaction, output, state.column_ids, state.row_ids,
@@ -110461,15 +110332,12 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun
110461
110332
  if (index.Scan(transaction, storage, *index_state, STANDARD_VECTOR_SIZE, bind_data.result_ids)) {
110462
110333
  // use an index scan!
110463
110334
  bind_data.is_index_scan = true;
110464
- get.function.init = IndexScanInit;
110335
+ get.function.init_local = nullptr;
110336
+ get.function.init_global = IndexScanInitGlobal;
110465
110337
  get.function.function = IndexScanFunction;
110466
- get.function.max_threads = nullptr;
110467
- get.function.init_parallel_state = nullptr;
110468
- get.function.parallel_state_next = nullptr;
110469
110338
  get.function.table_scan_progress = nullptr;
110470
110339
  get.function.get_batch_index = nullptr;
110471
110340
  get.function.filter_pushdown = false;
110472
- get.function.supports_batch_index = false;
110473
110341
  } else {
110474
110342
  bind_data.result_ids.clear();
110475
110343
  }
@@ -110487,21 +110355,17 @@ string TableScanToString(const FunctionData *bind_data_p) {
110487
110355
 
110488
110356
  TableFunction TableScanFunction::GetFunction() {
110489
110357
  TableFunction scan_function("seq_scan", {}, TableScanFunc);
110490
- scan_function.init = TableScanInit;
110358
+ scan_function.init_local = TableScanInitLocal;
110359
+ scan_function.init_global = TableScanInitGlobal;
110491
110360
  scan_function.statistics = TableScanStatistics;
110492
110361
  scan_function.dependency = TableScanDependency;
110493
110362
  scan_function.cardinality = TableScanCardinality;
110494
110363
  scan_function.pushdown_complex_filter = TableScanPushdownComplexFilter;
110495
110364
  scan_function.to_string = TableScanToString;
110496
- scan_function.max_threads = TableScanMaxThreads;
110497
- scan_function.init_parallel_state = TableScanInitParallelState;
110498
- scan_function.parallel_init = TableScanParallelInit;
110499
- scan_function.parallel_state_next = TableScanParallelStateNext;
110500
110365
  scan_function.table_scan_progress = TableScanProgress;
110501
110366
  scan_function.get_batch_index = TableScanGetBatchIndex;
110502
110367
  scan_function.projection_pushdown = true;
110503
110368
  scan_function.filter_pushdown = true;
110504
- scan_function.supports_batch_index = true;
110505
110369
  return scan_function;
110506
110370
  }
110507
110371
 
@@ -110539,12 +110403,16 @@ public:
110539
110403
  }
110540
110404
  };
110541
110405
 
110542
- struct UnnestOperatorData : public FunctionOperatorData {
110406
+ struct UnnestOperatorData : public GlobalTableFunctionState {
110543
110407
  UnnestOperatorData() {
110544
110408
  }
110545
110409
 
110546
110410
  unique_ptr<OperatorState> operator_state;
110547
110411
  vector<unique_ptr<Expression>> select_list;
110412
+
110413
+ idx_t MaxThreads() const override {
110414
+ return GlobalTableFunctionState::MAX_THREADS;
110415
+ }
110548
110416
  };
110549
110417
 
110550
110418
  static unique_ptr<FunctionData> UnnestBind(ClientContext &context, TableFunctionBindInput &input,
@@ -110557,9 +110425,8 @@ static unique_ptr<FunctionData> UnnestBind(ClientContext &context, TableFunction
110557
110425
  return make_unique<UnnestBindData>(input.input_table_types[0]);
110558
110426
  }
110559
110427
 
110560
- static unique_ptr<FunctionOperatorData> UnnestInit(ClientContext &context, const FunctionData *bind_data_p,
110561
- const vector<column_t> &column_ids, TableFilterCollection *filters) {
110562
- auto &bind_data = (UnnestBindData &)*bind_data_p;
110428
+ static unique_ptr<GlobalTableFunctionState> UnnestInit(ClientContext &context, TableFunctionInitInput &input) {
110429
+ auto &bind_data = (UnnestBindData &)*input.bind_data;
110563
110430
  auto result = make_unique<UnnestOperatorData>();
110564
110431
  result->operator_state = PhysicalUnnest::GetState(context);
110565
110432
  auto ref = make_unique<BoundReferenceExpression>(bind_data.input_type, 0);
@@ -110569,9 +110436,9 @@ static unique_ptr<FunctionOperatorData> UnnestInit(ClientContext &context, const
110569
110436
  return move(result);
110570
110437
  }
110571
110438
 
110572
- static OperatorResultType UnnestFunction(ClientContext &context, const FunctionData *bind_data_p,
110573
- FunctionOperatorData *state_p, DataChunk &input, DataChunk &output) {
110574
- auto &state = (UnnestOperatorData &)*state_p;
110439
+ static OperatorResultType UnnestFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &input,
110440
+ DataChunk &output) {
110441
+ auto &state = (UnnestOperatorData &)*data_p.global_state;
110575
110442
  return PhysicalUnnest::ExecuteInternal(context, input, output, *state.operator_state, state.select_list, false);
110576
110443
  }
110577
110444
 
@@ -110589,9 +110456,10 @@ void UnnestTableFunction::RegisterFunction(BuiltinFunctions &set) {
110589
110456
 
110590
110457
  namespace duckdb {
110591
110458
 
110592
- struct PragmaVersionData : public FunctionOperatorData {
110459
+ struct PragmaVersionData : public GlobalTableFunctionState {
110593
110460
  PragmaVersionData() : finished(false) {
110594
110461
  }
110462
+
110595
110463
  bool finished;
110596
110464
  };
110597
110465
 
@@ -110604,15 +110472,12 @@ static unique_ptr<FunctionData> PragmaVersionBind(ClientContext &context, TableF
110604
110472
  return nullptr;
110605
110473
  }
110606
110474
 
110607
- static unique_ptr<FunctionOperatorData> PragmaVersionInit(ClientContext &context, const FunctionData *bind_data,
110608
- const vector<column_t> &column_ids,
110609
- TableFilterCollection *filters) {
110475
+ static unique_ptr<GlobalTableFunctionState> PragmaVersionInit(ClientContext &context, TableFunctionInitInput &input) {
110610
110476
  return make_unique<PragmaVersionData>();
110611
110477
  }
110612
110478
 
110613
- static void PragmaVersionFunction(ClientContext &context, const FunctionData *bind_data,
110614
- FunctionOperatorData *operator_state, DataChunk &output) {
110615
- auto &data = (PragmaVersionData &)*operator_state;
110479
+ static void PragmaVersionFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
110480
+ auto &data = (PragmaVersionData &)*data_p.global_state;
110616
110481
  if (data.finished) {
110617
110482
  // finished returning values
110618
110483
  return;
@@ -110624,7 +110489,10 @@ static void PragmaVersionFunction(ClientContext &context, const FunctionData *bi
110624
110489
  }
110625
110490
 
110626
110491
  void PragmaVersion::RegisterFunction(BuiltinFunctions &set) {
110627
- set.AddFunction(TableFunction("pragma_version", {}, PragmaVersionFunction, PragmaVersionBind, PragmaVersionInit));
110492
+ TableFunction pragma_version("pragma_version", {}, PragmaVersionFunction);
110493
+ pragma_version.bind = PragmaVersionBind;
110494
+ pragma_version.init_global = PragmaVersionInit;
110495
+ set.AddFunction(pragma_version);
110628
110496
  }
110629
110497
 
110630
110498
  const char *DuckDB::SourceID() {
@@ -110669,49 +110537,28 @@ string DuckDB::Platform() {
110669
110537
 
110670
110538
  namespace duckdb {
110671
110539
 
110672
- FunctionOperatorData::~FunctionOperatorData() {
110540
+ GlobalTableFunctionState::~GlobalTableFunctionState() {
110673
110541
  }
110674
110542
 
110675
- TableFunctionInfo::~TableFunctionInfo() {
110543
+ LocalTableFunctionState::~LocalTableFunctionState() {
110676
110544
  }
110677
110545
 
110678
- TableFilterCollection::TableFilterCollection(TableFilterSet *table_filters) : table_filters(table_filters) {
110546
+ TableFunctionInfo::~TableFunctionInfo() {
110679
110547
  }
110680
110548
 
110681
110549
  TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_function_t function,
110682
- table_function_bind_t bind, table_function_init_t init, table_statistics_t statistics,
110683
- table_function_cleanup_t cleanup, table_function_dependency_t dependency,
110684
- table_function_cardinality_t cardinality,
110685
- table_function_pushdown_complex_filter_t pushdown_complex_filter,
110686
- table_function_to_string_t to_string, table_function_max_threads_t max_threads,
110687
- table_function_init_parallel_state_t init_parallel_state,
110688
- table_function_parallel_t parallel_function, table_function_init_parallel_t parallel_init,
110689
- table_function_parallel_state_next_t parallel_state_next, bool projection_pushdown,
110690
- bool filter_pushdown, table_function_progress_t query_progress,
110691
- table_in_out_function_t in_out_function)
110692
- : SimpleNamedParameterFunction(move(name), move(arguments)), bind(bind), init(init), function(function),
110693
- in_out_function(in_out_function), statistics(statistics), cleanup(cleanup), dependency(dependency),
110694
- cardinality(cardinality), pushdown_complex_filter(pushdown_complex_filter), to_string(to_string),
110695
- max_threads(max_threads), init_parallel_state(init_parallel_state), parallel_function(parallel_function),
110696
- parallel_init(parallel_init), parallel_state_next(parallel_state_next), table_scan_progress(query_progress),
110697
- projection_pushdown(projection_pushdown), filter_pushdown(filter_pushdown), supports_batch_index(false) {
110550
+ table_function_bind_t bind, table_function_init_global_t init_global,
110551
+ table_function_init_local_t init_local)
110552
+ : SimpleNamedParameterFunction(move(name), move(arguments)), bind(bind), init_global(init_global),
110553
+ init_local(init_local), function(function), in_out_function(nullptr), statistics(nullptr), dependency(nullptr),
110554
+ cardinality(nullptr), pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr),
110555
+ get_batch_index(nullptr), projection_pushdown(false), filter_pushdown(false) {
110698
110556
  }
110699
110557
 
110700
110558
  TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
110701
- table_function_bind_t bind, table_function_init_t init, table_statistics_t statistics,
110702
- table_function_cleanup_t cleanup, table_function_dependency_t dependency,
110703
- table_function_cardinality_t cardinality,
110704
- table_function_pushdown_complex_filter_t pushdown_complex_filter,
110705
- table_function_to_string_t to_string, table_function_max_threads_t max_threads,
110706
- table_function_init_parallel_state_t init_parallel_state,
110707
- table_function_parallel_t parallel_function, table_function_init_parallel_t parallel_init,
110708
- table_function_parallel_state_next_t parallel_state_next, bool projection_pushdown,
110709
- bool filter_pushdown, table_function_progress_t query_progress,
110710
- table_in_out_function_t in_out_function)
110711
- : TableFunction(string(), arguments, function, bind, init, statistics, cleanup, dependency, cardinality,
110712
- pushdown_complex_filter, to_string, max_threads, init_parallel_state, parallel_function,
110713
- parallel_init, parallel_state_next, projection_pushdown, filter_pushdown, query_progress,
110714
- in_out_function) {
110559
+ table_function_bind_t bind, table_function_init_global_t init_global,
110560
+ table_function_init_local_t init_local)
110561
+ : TableFunction(string(), arguments, function, bind, init_global, init_local) {
110715
110562
  }
110716
110563
  TableFunction::TableFunction() : SimpleNamedParameterFunction("", {}) {
110717
110564
  }
@@ -113099,6 +112946,7 @@ struct CTableFunctionInfo : public TableFunctionInfo {
113099
112946
 
113100
112947
  duckdb_table_function_bind_t bind = nullptr;
113101
112948
  duckdb_table_function_init_t init = nullptr;
112949
+ duckdb_table_function_init_t local_init = nullptr;
113102
112950
  duckdb_table_function_t function = nullptr;
113103
112951
  void *extra_info = nullptr;
113104
112952
  duckdb_delete_callback_t delete_callback = nullptr;
@@ -113135,7 +112983,7 @@ struct CTableInternalBindInfo {
113135
112983
  string error;
113136
112984
  };
113137
112985
 
113138
- struct CTableInitData : public FunctionOperatorData {
112986
+ struct CTableInitData {
113139
112987
  ~CTableInitData() {
113140
112988
  if (init_data && delete_callback) {
113141
112989
  delete_callback(init_data);
@@ -113146,29 +112994,43 @@ struct CTableInitData : public FunctionOperatorData {
113146
112994
 
113147
112995
  void *init_data = nullptr;
113148
112996
  duckdb_delete_callback_t delete_callback = nullptr;
112997
+ idx_t max_threads = 1;
112998
+ };
112999
+
113000
+ struct CTableGlobalInitData : public GlobalTableFunctionState {
113001
+ CTableInitData init_data;
113002
+
113003
+ idx_t MaxThreads() const override {
113004
+ return init_data.max_threads;
113005
+ }
113006
+ };
113007
+
113008
+ struct CTableLocalInitData : public LocalTableFunctionState {
113009
+ CTableInitData init_data;
113149
113010
  };
113150
113011
 
113151
113012
  struct CTableInternalInitInfo {
113152
113013
  CTableInternalInitInfo(CTableBindData &bind_data, CTableInitData &init_data, const vector<column_t> &column_ids,
113153
- TableFilterCollection *filters)
113014
+ TableFilterSet *filters)
113154
113015
  : bind_data(bind_data), init_data(init_data), column_ids(column_ids), filters(filters), success(true) {
113155
113016
  }
113156
113017
 
113157
113018
  CTableBindData &bind_data;
113158
113019
  CTableInitData &init_data;
113159
113020
  const vector<column_t> &column_ids;
113160
- TableFilterCollection *filters;
113021
+ TableFilterSet *filters;
113161
113022
  bool success;
113162
113023
  string error;
113163
113024
  };
113164
113025
 
113165
113026
  struct CTableInternalFunctionInfo {
113166
- CTableInternalFunctionInfo(CTableBindData &bind_data, CTableInitData &init_data)
113167
- : bind_data(bind_data), init_data(init_data), success(true) {
113027
+ CTableInternalFunctionInfo(CTableBindData &bind_data, CTableInitData &init_data, CTableInitData &local_data)
113028
+ : bind_data(bind_data), init_data(init_data), local_data(local_data), success(true) {
113168
113029
  }
113169
113030
 
113170
113031
  CTableBindData &bind_data;
113171
113032
  CTableInitData &init_data;
113033
+ CTableInitData &local_data;
113172
113034
  bool success;
113173
113035
  string error;
113174
113036
  };
@@ -113188,13 +113050,11 @@ unique_ptr<FunctionData> CTableFunctionBind(ClientContext &context, TableFunctio
113188
113050
  return move(result);
113189
113051
  }
113190
113052
 
113191
- unique_ptr<FunctionOperatorData> CTableFunctionInit(ClientContext &context, const FunctionData *bind_data_p,
113192
- const vector<column_t> &column_ids,
113193
- TableFilterCollection *filters) {
113194
- auto &bind_data = (CTableBindData &)*bind_data_p;
113195
- auto result = make_unique<CTableInitData>();
113053
+ unique_ptr<GlobalTableFunctionState> CTableFunctionInit(ClientContext &context, TableFunctionInitInput &data_p) {
113054
+ auto &bind_data = (CTableBindData &)*data_p.bind_data;
113055
+ auto result = make_unique<CTableGlobalInitData>();
113196
113056
 
113197
- CTableInternalInitInfo init_info(bind_data, *result, column_ids, filters);
113057
+ CTableInternalInitInfo init_info(bind_data, result->init_data, data_p.column_ids, data_p.filters);
113198
113058
  bind_data.info->init(&init_info);
113199
113059
  if (!init_info.success) {
113200
113060
  throw Exception(init_info.error);
@@ -113202,11 +113062,27 @@ unique_ptr<FunctionOperatorData> CTableFunctionInit(ClientContext &context, cons
113202
113062
  return move(result);
113203
113063
  }
113204
113064
 
113205
- void CTableFunction(ClientContext &context, const FunctionData *bind_data_p, FunctionOperatorData *operator_state,
113206
- DataChunk &output) {
113207
- auto &bind_data = (CTableBindData &)*bind_data_p;
113208
- auto &init_data = (CTableInitData &)*operator_state;
113209
- CTableInternalFunctionInfo function_info(bind_data, init_data);
113065
+ unique_ptr<LocalTableFunctionState> CTableFunctionLocalInit(ClientContext &context, TableFunctionInitInput &data_p,
113066
+ GlobalTableFunctionState *gstate) {
113067
+ auto &bind_data = (CTableBindData &)*data_p.bind_data;
113068
+ auto result = make_unique<CTableLocalInitData>();
113069
+ if (!bind_data.info->local_init) {
113070
+ return move(result);
113071
+ }
113072
+
113073
+ CTableInternalInitInfo init_info(bind_data, result->init_data, data_p.column_ids, data_p.filters);
113074
+ bind_data.info->local_init(&init_info);
113075
+ if (!init_info.success) {
113076
+ throw Exception(init_info.error);
113077
+ }
113078
+ return move(result);
113079
+ }
113080
+
113081
+ void CTableFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
113082
+ auto &bind_data = (CTableBindData &)*data_p.bind_data;
113083
+ auto &global_data = (CTableGlobalInitData &)*data_p.global_state;
113084
+ auto &local_data = (CTableLocalInitData &)*data_p.local_state;
113085
+ CTableInternalFunctionInfo function_info(bind_data, global_data.init_data, local_data.init_data);
113210
113086
  bind_data.info->function(&function_info, &output);
113211
113087
  if (!function_info.success) {
113212
113088
  throw Exception(function_info.error);
@@ -113220,7 +113096,7 @@ void CTableFunction(ClientContext &context, const FunctionData *bind_data_p, Fun
113220
113096
  //===--------------------------------------------------------------------===//
113221
113097
  duckdb_table_function duckdb_create_table_function() {
113222
113098
  auto function = new duckdb::TableFunction("", {}, duckdb::CTableFunction, duckdb::CTableFunctionBind,
113223
- duckdb::CTableFunctionInit);
113099
+ duckdb::CTableFunctionInit, duckdb::CTableFunctionLocalInit);
113224
113100
  function->function_info = duckdb::make_shared<duckdb::CTableFunctionInfo>();
113225
113101
  return function;
113226
113102
  }
@@ -113279,6 +113155,15 @@ void duckdb_table_function_set_init(duckdb_table_function function, duckdb_table
113279
113155
  info->init = init;
113280
113156
  }
113281
113157
 
113158
+ void duckdb_table_function_set_local_init(duckdb_table_function function, duckdb_table_function_init_t init) {
113159
+ if (!function || !init) {
113160
+ return;
113161
+ }
113162
+ auto tf = (duckdb::TableFunction *)function;
113163
+ auto info = (duckdb::CTableFunctionInfo *)tf->function_info.get();
113164
+ info->local_init = init;
113165
+ }
113166
+
113282
113167
  void duckdb_table_function_set_function(duckdb_table_function table_function, duckdb_table_function_t function) {
113283
113168
  if (!table_function || !function) {
113284
113169
  return;
@@ -113426,6 +113311,14 @@ idx_t duckdb_init_get_column_index(duckdb_init_info info, idx_t column_index) {
113426
113311
  return function_info->column_ids[column_index];
113427
113312
  }
113428
113313
 
113314
+ void duckdb_init_set_max_threads(duckdb_init_info info, idx_t max_threads) {
113315
+ if (!info) {
113316
+ return;
113317
+ }
113318
+ auto function_info = (duckdb::CTableInternalInitInfo *)info;
113319
+ function_info->init_data.max_threads = max_threads;
113320
+ }
113321
+
113429
113322
  //===--------------------------------------------------------------------===//
113430
113323
  // Function Interface
113431
113324
  //===--------------------------------------------------------------------===//
@@ -113453,6 +113346,14 @@ void *duckdb_function_get_init_data(duckdb_function_info info) {
113453
113346
  return function_info->init_data.init_data;
113454
113347
  }
113455
113348
 
113349
+ void *duckdb_function_get_local_init_data(duckdb_function_info info) {
113350
+ if (!info) {
113351
+ return nullptr;
113352
+ }
113353
+ auto function_info = (duckdb::CTableInternalFunctionInfo *)info;
113354
+ return function_info->local_data.init_data;
113355
+ }
113356
+
113456
113357
  void duckdb_function_set_error(duckdb_function_info info, const char *error) {
113457
113358
  if (!info || !error) {
113458
113359
  return;
@@ -178610,6 +178511,7 @@ void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScan
178610
178511
  state.transaction_local_data = false;
178611
178512
  // figure out the max row we can scan for both the regular and the transaction-local storage
178612
178513
  state.max_row = total_rows;
178514
+ state.vector_index = 0;
178613
178515
  state.local_state.max_index = 0;
178614
178516
  auto &transaction = Transaction::GetTransaction(context);
178615
178517
  transaction.storage.InitializeScan(this, state.local_state, nullptr);
@@ -178625,13 +178527,19 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState
178625
178527
  max_row = state.current_row_group->start +
178626
178528
  MinValue<idx_t>(state.current_row_group->count,
178627
178529
  STANDARD_VECTOR_SIZE * state.vector_index + STANDARD_VECTOR_SIZE);
178530
+ D_ASSERT(vector_index * STANDARD_VECTOR_SIZE < state.current_row_group->count);
178628
178531
  } else {
178629
178532
  vector_index = 0;
178630
178533
  max_row = state.current_row_group->start + state.current_row_group->count;
178631
178534
  }
178632
178535
  max_row = MinValue<idx_t>(max_row, state.max_row);
178633
- bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
178634
- state.current_row_group, vector_index, max_row);
178536
+ bool need_to_scan;
178537
+ if (state.current_row_group->count == 0) {
178538
+ need_to_scan = false;
178539
+ } else {
178540
+ need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
178541
+ state.current_row_group, vector_index, max_row);
178542
+ }
178635
178543
  if (ClientConfig::GetConfig(context).verify_parallelism) {
178636
178544
  state.vector_index++;
178637
178545
  if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
@@ -179739,16 +179647,18 @@ LocalTableStorage::~LocalTableStorage() {
179739
179647
  }
179740
179648
 
179741
179649
  void LocalTableStorage::InitializeScan(LocalScanState &state, TableFilterSet *table_filters) {
179650
+ state.table_filters = table_filters;
179651
+ state.chunk_index = 0;
179742
179652
  if (collection.ChunkCount() == 0) {
179743
179653
  // nothing to scan
179654
+ state.max_index = 0;
179655
+ state.last_chunk_count = 0;
179744
179656
  return;
179745
179657
  }
179746
179658
  state.SetStorage(shared_from_this());
179747
179659
 
179748
- state.chunk_index = 0;
179749
179660
  state.max_index = collection.ChunkCount() - 1;
179750
179661
  state.last_chunk_count = collection.Chunks().back()->size();
179751
- state.table_filters = table_filters;
179752
179662
  }
179753
179663
 
179754
179664
  idx_t LocalTableStorage::EstimatedSize() {