duckdb 0.3.5-dev725.0 → 0.3.5-dev750.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "463e45ba9"
15
- #define DUCKDB_VERSION "v0.3.5-dev725"
14
+ #define DUCKDB_SOURCE_ID "4e5f39098"
15
+ #define DUCKDB_VERSION "v0.3.5-dev750"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -10479,21 +10479,27 @@ namespace duckdb {
10479
10479
 
10480
10480
  class BaseStatistics;
10481
10481
  class LogicalGet;
10482
- struct ParallelState;
10483
10482
  class TableFilterSet;
10484
10483
 
10485
- struct FunctionOperatorData {
10486
- DUCKDB_API virtual ~FunctionOperatorData();
10487
- };
10488
-
10489
10484
  struct TableFunctionInfo {
10490
10485
  DUCKDB_API virtual ~TableFunctionInfo();
10491
10486
  };
10492
10487
 
10493
- struct TableFilterCollection {
10494
- DUCKDB_API explicit TableFilterCollection(TableFilterSet *table_filters);
10488
+ struct GlobalTableFunctionState {
10489
+ public:
10490
+ // value returned from MaxThreads when as many threads as possible should be used
10491
+ constexpr static const int64_t MAX_THREADS = 999999999;
10492
+
10493
+ public:
10494
+ DUCKDB_API virtual ~GlobalTableFunctionState();
10495
+
10496
+ DUCKDB_API virtual idx_t MaxThreads() const {
10497
+ return 1;
10498
+ }
10499
+ };
10495
10500
 
10496
- TableFilterSet *table_filters;
10501
+ struct LocalTableFunctionState {
10502
+ DUCKDB_API virtual ~LocalTableFunctionState();
10497
10503
  };
10498
10504
 
10499
10505
  struct TableFunctionBindInput {
@@ -10511,40 +10517,46 @@ struct TableFunctionBindInput {
10511
10517
  TableFunctionInfo *info;
10512
10518
  };
10513
10519
 
10520
+ struct TableFunctionInitInput {
10521
+ TableFunctionInitInput(const FunctionData *bind_data_p, const vector<column_t> &column_ids_p,
10522
+ TableFilterSet *filters_p)
10523
+ : bind_data(bind_data_p), column_ids(column_ids_p), filters(filters_p) {
10524
+ }
10525
+
10526
+ const FunctionData *bind_data;
10527
+ const vector<column_t> &column_ids;
10528
+ TableFilterSet *filters;
10529
+ };
10530
+
10531
+ struct TableFunctionInput {
10532
+ TableFunctionInput(const FunctionData *bind_data_p, LocalTableFunctionState *local_state_p,
10533
+ GlobalTableFunctionState *global_state_p)
10534
+ : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) {
10535
+ }
10536
+
10537
+ const FunctionData *bind_data;
10538
+ LocalTableFunctionState *local_state;
10539
+ GlobalTableFunctionState *global_state;
10540
+ };
10541
+
10514
10542
  typedef unique_ptr<FunctionData> (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input,
10515
10543
  vector<LogicalType> &return_types, vector<string> &names);
10516
- typedef unique_ptr<FunctionOperatorData> (*table_function_init_t)(ClientContext &context, const FunctionData *bind_data,
10517
- const vector<column_t> &column_ids,
10518
- TableFilterCollection *filters);
10544
+ typedef unique_ptr<GlobalTableFunctionState> (*table_function_init_global_t)(ClientContext &context,
10545
+ TableFunctionInitInput &input);
10546
+ typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(ClientContext &context,
10547
+ TableFunctionInitInput &input,
10548
+ GlobalTableFunctionState *global_state);
10519
10549
  typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data,
10520
10550
  column_t column_index);
10521
- typedef void (*table_function_t)(ClientContext &context, const FunctionData *bind_data,
10522
- FunctionOperatorData *operator_state, DataChunk &output);
10523
-
10524
- typedef OperatorResultType (*table_in_out_function_t)(ClientContext &context, const FunctionData *bind_data,
10525
- FunctionOperatorData *operator_state, DataChunk &input,
10526
- DataChunk &output);
10551
+ typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output);
10527
10552
 
10528
- typedef void (*table_function_parallel_t)(ClientContext &context, const FunctionData *bind_data,
10529
- FunctionOperatorData *operator_state, DataChunk &output,
10530
- ParallelState *parallel_state);
10553
+ typedef OperatorResultType (*table_in_out_function_t)(ClientContext &context, TableFunctionInput &data,
10554
+ DataChunk &input, DataChunk &output);
10531
10555
  typedef idx_t (*table_function_get_batch_index_t)(ClientContext &context, const FunctionData *bind_data,
10532
- FunctionOperatorData *operator_state, ParallelState *parallel_state);
10533
- typedef void (*table_function_cleanup_t)(ClientContext &context, const FunctionData *bind_data,
10534
- FunctionOperatorData *operator_state);
10535
- typedef idx_t (*table_function_max_threads_t)(ClientContext &context, const FunctionData *bind_data);
10536
- typedef unique_ptr<ParallelState> (*table_function_init_parallel_state_t)(ClientContext &context,
10537
- const FunctionData *bind_data,
10538
- const vector<column_t> &column_ids,
10539
- TableFilterCollection *filters);
10540
- typedef unique_ptr<FunctionOperatorData> (*table_function_init_parallel_t)(ClientContext &context,
10541
- const FunctionData *bind_data,
10542
- ParallelState *state,
10543
- const vector<column_t> &column_ids,
10544
- TableFilterCollection *filters);
10545
- typedef bool (*table_function_parallel_state_next_t)(ClientContext &context, const FunctionData *bind_data,
10546
- FunctionOperatorData *state, ParallelState *parallel_state);
10547
- typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data);
10556
+ LocalTableFunctionState *local_state,
10557
+ GlobalTableFunctionState *global_state);
10558
+ typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data,
10559
+ const GlobalTableFunctionState *global_state);
10548
10560
  typedef void (*table_function_dependency_t)(unordered_set<CatalogEntry *> &dependencies, const FunctionData *bind_data);
10549
10561
  typedef unique_ptr<NodeStatistics> (*table_function_cardinality_t)(ClientContext &context,
10550
10562
  const FunctionData *bind_data);
@@ -10557,40 +10569,26 @@ class TableFunction : public SimpleNamedParameterFunction {
10557
10569
  public:
10558
10570
  DUCKDB_API
10559
10571
  TableFunction(string name, vector<LogicalType> arguments, table_function_t function,
10560
- table_function_bind_t bind = nullptr, table_function_init_t init = nullptr,
10561
- table_statistics_t statistics = nullptr, table_function_cleanup_t cleanup = nullptr,
10562
- table_function_dependency_t dependency = nullptr, table_function_cardinality_t cardinality = nullptr,
10563
- table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr,
10564
- table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr,
10565
- table_function_init_parallel_state_t init_parallel_state = nullptr,
10566
- table_function_parallel_t parallel_function = nullptr,
10567
- table_function_init_parallel_t parallel_init = nullptr,
10568
- table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false,
10569
- bool filter_pushdown = false, table_function_progress_t query_progress = nullptr,
10570
- table_in_out_function_t in_out_function = nullptr);
10572
+ table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr,
10573
+ table_function_init_local_t init_local = nullptr);
10571
10574
  DUCKDB_API
10572
10575
  TableFunction(const vector<LogicalType> &arguments, table_function_t function, table_function_bind_t bind = nullptr,
10573
- table_function_init_t init = nullptr, table_statistics_t statistics = nullptr,
10574
- table_function_cleanup_t cleanup = nullptr, table_function_dependency_t dependency = nullptr,
10575
- table_function_cardinality_t cardinality = nullptr,
10576
- table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr,
10577
- table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr,
10578
- table_function_init_parallel_state_t init_parallel_state = nullptr,
10579
- table_function_parallel_t parallel_function = nullptr,
10580
- table_function_init_parallel_t parallel_init = nullptr,
10581
- table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false,
10582
- bool filter_pushdown = false, table_function_progress_t query_progress = nullptr,
10583
- table_in_out_function_t in_out_function = nullptr);
10576
+ table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr);
10584
10577
  DUCKDB_API TableFunction();
10585
10578
 
10586
10579
  //! Bind function
10587
10580
  //! This function is used for determining the return type of a table producing function and returning bind data
10588
10581
  //! The returned FunctionData object should be constant and should not be changed during execution.
10589
10582
  table_function_bind_t bind;
10590
- //! (Optional) init function
10591
- //! Initialize the operator state of the function. The operator state is used to keep track of the progress in the
10592
- //! table function.
10593
- table_function_init_t init;
10583
+ //! (Optional) global init function
10584
+ //! Initialize the global operator state of the function.
10585
+ //! The global operator state is used to keep track of the progress in the table function and is shared between
10586
+ //! all threads working on the table function.
10587
+ table_function_init_global_t init_global;
10588
+ //! (Optional) local init function
10589
+ //! Initialize the local operator state of the function.
10590
+ //! The local operator state is used to keep track of the progress in the table function and is thread-local.
10591
+ table_function_init_local_t init_local;
10594
10592
  //! The main function
10595
10593
  table_function_t function;
10596
10594
  //! The table in-out function (if this is an in-out function)
@@ -10598,9 +10596,6 @@ public:
10598
10596
  //! (Optional) statistics function
10599
10597
  //! Returns the statistics of a specified column
10600
10598
  table_statistics_t statistics;
10601
- //! (Optional) cleanup function
10602
- //! The final cleanup function, called after all data is exhausted from the main function
10603
- table_function_cleanup_t cleanup;
10604
10599
  //! (Optional) dependency function
10605
10600
  //! Sets up which catalog entries this table function depend on
10606
10601
  table_function_dependency_t dependency;
@@ -10612,17 +10607,6 @@ public:
10612
10607
  table_function_pushdown_complex_filter_t pushdown_complex_filter;
10613
10608
  //! (Optional) function for rendering the operator to a string in profiling output
10614
10609
  table_function_to_string_t to_string;
10615
- //! (Optional) function that returns the maximum amount of threads that can work on this task
10616
- table_function_max_threads_t max_threads;
10617
- //! (Optional) initialize the parallel scan state, called once in total.
10618
- table_function_init_parallel_state_t init_parallel_state;
10619
- //! (Optional) Parallel version of the main function
10620
- table_function_parallel_t parallel_function;
10621
- //! (Optional) initialize the parallel scan given the parallel state. Called once per task. Return nullptr if there
10622
- //! is nothing left to scan.
10623
- table_function_init_parallel_t parallel_init;
10624
- //! (Optional) return the next chunk to process in the parallel scan, or return nullptr if there is none
10625
- table_function_parallel_state_next_t parallel_state_next;
10626
10610
  //! (Optional) return how much of the table we have scanned up to this point (% of the data)
10627
10611
  table_function_progress_t table_scan_progress;
10628
10612
  //! (Optional) returns the current batch index of the current scan operator
@@ -10633,33 +10617,12 @@ public:
10633
10617
  //! Whether or not the table function supports filter pushdown. If not supported a filter will be added
10634
10618
  //! that applies the table filter directly.
10635
10619
  bool filter_pushdown;
10636
- //! Whether or not the table function supports fetching of a batch index
10637
- bool supports_batch_index;
10638
10620
  //! Additional function info, passed to the bind
10639
10621
  shared_ptr<TableFunctionInfo> function_info;
10640
10622
  };
10641
10623
 
10642
10624
  } // namespace duckdb
10643
10625
 
10644
- //===----------------------------------------------------------------------===//
10645
- // DuckDB
10646
- //
10647
- // duckdb/parallel/parallel_state.hpp
10648
- //
10649
- //
10650
- //===----------------------------------------------------------------------===//
10651
-
10652
-
10653
-
10654
- namespace duckdb {
10655
-
10656
- struct ParallelState {
10657
- virtual ~ParallelState() {
10658
- }
10659
- };
10660
-
10661
- } // namespace duckdb
10662
-
10663
10626
  //===----------------------------------------------------------------------===//
10664
10627
  // DuckDB
10665
10628
  //
@@ -14638,11 +14601,11 @@ typedef unordered_map<block_id_t, unique_ptr<BufferHandle>> buffer_handle_set_t;
14638
14601
 
14639
14602
  struct ColumnScanState {
14640
14603
  //! The column segment that is currently being scanned
14641
- ColumnSegment *current;
14604
+ ColumnSegment *current = nullptr;
14642
14605
  //! The current row index of the scan
14643
- idx_t row_index;
14606
+ idx_t row_index = 0;
14644
14607
  //! The internal row index (i.e. the position of the SegmentScanState)
14645
- idx_t internal_index;
14608
+ idx_t internal_index = 0;
14646
14609
  //! Segment scan state
14647
14610
  unique_ptr<SegmentScanState> scan_state;
14648
14611
  //! Child states of the vector
@@ -14676,10 +14639,10 @@ struct LocalScanState {
14676
14639
  return storage.get();
14677
14640
  }
14678
14641
 
14679
- idx_t chunk_index;
14680
- idx_t max_index;
14681
- idx_t last_chunk_count;
14682
- TableFilterSet *table_filters;
14642
+ idx_t chunk_index = 0;
14643
+ idx_t max_index = 0;
14644
+ idx_t last_chunk_count = 0;
14645
+ TableFilterSet *table_filters = nullptr;
14683
14646
 
14684
14647
  private:
14685
14648
  shared_ptr<LocalTableStorage> storage;
@@ -14693,17 +14656,13 @@ public:
14693
14656
  //! The parent scan state
14694
14657
  TableScanState &parent;
14695
14658
  //! The current row_group we are scanning
14696
- RowGroup *row_group;
14659
+ RowGroup *row_group = nullptr;
14697
14660
  //! The vector index within the row_group
14698
- idx_t vector_index;
14661
+ idx_t vector_index = 0;
14699
14662
  //! The maximum row index of this row_group scan
14700
- idx_t max_row;
14663
+ idx_t max_row = 0;
14701
14664
  //! Child column scans
14702
14665
  unique_ptr<ColumnScanState[]> column_scans;
14703
-
14704
- public:
14705
- //! Move to the next vector, skipping past the current one
14706
- void NextVector();
14707
14666
  };
14708
14667
 
14709
14668
  class TableScanState {
@@ -14713,7 +14672,7 @@ public:
14713
14672
  //! The row_group scan state
14714
14673
  RowGroupScanState row_group_scan_state;
14715
14674
  //! The total maximum row index
14716
- idx_t max_row;
14675
+ idx_t max_row = 0;
14717
14676
  //! The column identifiers of the scan
14718
14677
  vector<column_t> column_ids;
14719
14678
  //! The table filters (if any)
@@ -14722,10 +14681,6 @@ public:
14722
14681
  unique_ptr<AdaptiveFilter> adaptive_filter;
14723
14682
  //! Transaction-local scan state
14724
14683
  LocalScanState local_state;
14725
-
14726
- public:
14727
- //! Move to the next vector
14728
- void NextVector();
14729
14684
  };
14730
14685
 
14731
14686
  class CreateIndexScanState : public TableScanState {
@@ -16619,6 +16574,15 @@ Sets the init function of the table function
16619
16574
  */
16620
16575
  DUCKDB_API void duckdb_table_function_set_init(duckdb_table_function table_function, duckdb_table_function_init_t init);
16621
16576
 
16577
+ /*!
16578
+ Sets the thread-local init function of the table function
16579
+
16580
+ * table_function: The table function
16581
+ * init: The init function
16582
+ */
16583
+ DUCKDB_API void duckdb_table_function_set_local_init(duckdb_table_function table_function,
16584
+ duckdb_table_function_init_t init);
16585
+
16622
16586
  /*!
16623
16587
  Sets the main function of the table function
16624
16588
 
@@ -16762,6 +16726,14 @@ This function must be used if projection pushdown is enabled to figure out which
16762
16726
  */
16763
16727
  DUCKDB_API idx_t duckdb_init_get_column_index(duckdb_init_info info, idx_t column_index);
16764
16728
 
16729
+ /*!
16730
+ Sets how many threads can process this table function in parallel (default: 1)
16731
+
16732
+ * info: The info object
16733
+ * max_threads: The maximum amount of threads that can process this table function
16734
+ */
16735
+ DUCKDB_API void duckdb_init_set_max_threads(duckdb_init_info info, idx_t max_threads);
16736
+
16765
16737
  /*!
16766
16738
  Report that an error has occurred while calling init.
16767
16739
 
@@ -16793,13 +16765,21 @@ For tracking state, use the init data instead.
16793
16765
  DUCKDB_API void *duckdb_function_get_bind_data(duckdb_function_info info);
16794
16766
 
16795
16767
  /*!
16796
- Gets the init data set by `duckdb_bind_set_init_data` during the bind.
16768
+ Gets the init data set by `duckdb_init_set_init_data` during the init.
16797
16769
 
16798
16770
  * info: The info object
16799
16771
  * returns: The init data object
16800
16772
  */
16801
16773
  DUCKDB_API void *duckdb_function_get_init_data(duckdb_function_info info);
16802
16774
 
16775
+ /*!
16776
+ Gets the thread-local init data set by `duckdb_init_set_init_data` during the local_init.
16777
+
16778
+ * info: The info object
16779
+ * returns: The init data object
16780
+ */
16781
+ DUCKDB_API void *duckdb_function_get_local_init_data(duckdb_function_info info);
16782
+
16803
16783
  /*!
16804
16784
  Report that an error has occurred while executing the function.
16805
16785