duckdb 0.3.5-dev725.0 → 0.3.5-dev750.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +708 -798
- package/src/duckdb.hpp +99 -119
- package/src/parquet-amalgamation.cpp +30206 -30263
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "4e5f39098"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev750"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -10479,21 +10479,27 @@ namespace duckdb {
|
|
|
10479
10479
|
|
|
10480
10480
|
class BaseStatistics;
|
|
10481
10481
|
class LogicalGet;
|
|
10482
|
-
struct ParallelState;
|
|
10483
10482
|
class TableFilterSet;
|
|
10484
10483
|
|
|
10485
|
-
struct FunctionOperatorData {
|
|
10486
|
-
DUCKDB_API virtual ~FunctionOperatorData();
|
|
10487
|
-
};
|
|
10488
|
-
|
|
10489
10484
|
struct TableFunctionInfo {
|
|
10490
10485
|
DUCKDB_API virtual ~TableFunctionInfo();
|
|
10491
10486
|
};
|
|
10492
10487
|
|
|
10493
|
-
struct
|
|
10494
|
-
|
|
10488
|
+
struct GlobalTableFunctionState {
|
|
10489
|
+
public:
|
|
10490
|
+
// value returned from MaxThreads when as many threads as possible should be used
|
|
10491
|
+
constexpr static const int64_t MAX_THREADS = 999999999;
|
|
10492
|
+
|
|
10493
|
+
public:
|
|
10494
|
+
DUCKDB_API virtual ~GlobalTableFunctionState();
|
|
10495
|
+
|
|
10496
|
+
DUCKDB_API virtual idx_t MaxThreads() const {
|
|
10497
|
+
return 1;
|
|
10498
|
+
}
|
|
10499
|
+
};
|
|
10495
10500
|
|
|
10496
|
-
|
|
10501
|
+
struct LocalTableFunctionState {
|
|
10502
|
+
DUCKDB_API virtual ~LocalTableFunctionState();
|
|
10497
10503
|
};
|
|
10498
10504
|
|
|
10499
10505
|
struct TableFunctionBindInput {
|
|
@@ -10511,40 +10517,46 @@ struct TableFunctionBindInput {
|
|
|
10511
10517
|
TableFunctionInfo *info;
|
|
10512
10518
|
};
|
|
10513
10519
|
|
|
10520
|
+
struct TableFunctionInitInput {
|
|
10521
|
+
TableFunctionInitInput(const FunctionData *bind_data_p, const vector<column_t> &column_ids_p,
|
|
10522
|
+
TableFilterSet *filters_p)
|
|
10523
|
+
: bind_data(bind_data_p), column_ids(column_ids_p), filters(filters_p) {
|
|
10524
|
+
}
|
|
10525
|
+
|
|
10526
|
+
const FunctionData *bind_data;
|
|
10527
|
+
const vector<column_t> &column_ids;
|
|
10528
|
+
TableFilterSet *filters;
|
|
10529
|
+
};
|
|
10530
|
+
|
|
10531
|
+
struct TableFunctionInput {
|
|
10532
|
+
TableFunctionInput(const FunctionData *bind_data_p, LocalTableFunctionState *local_state_p,
|
|
10533
|
+
GlobalTableFunctionState *global_state_p)
|
|
10534
|
+
: bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) {
|
|
10535
|
+
}
|
|
10536
|
+
|
|
10537
|
+
const FunctionData *bind_data;
|
|
10538
|
+
LocalTableFunctionState *local_state;
|
|
10539
|
+
GlobalTableFunctionState *global_state;
|
|
10540
|
+
};
|
|
10541
|
+
|
|
10514
10542
|
typedef unique_ptr<FunctionData> (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input,
|
|
10515
10543
|
vector<LogicalType> &return_types, vector<string> &names);
|
|
10516
|
-
typedef unique_ptr<
|
|
10517
|
-
|
|
10518
|
-
|
|
10544
|
+
typedef unique_ptr<GlobalTableFunctionState> (*table_function_init_global_t)(ClientContext &context,
|
|
10545
|
+
TableFunctionInitInput &input);
|
|
10546
|
+
typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(ClientContext &context,
|
|
10547
|
+
TableFunctionInitInput &input,
|
|
10548
|
+
GlobalTableFunctionState *global_state);
|
|
10519
10549
|
typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data,
|
|
10520
10550
|
column_t column_index);
|
|
10521
|
-
typedef void (*table_function_t)(ClientContext &context,
|
|
10522
|
-
FunctionOperatorData *operator_state, DataChunk &output);
|
|
10523
|
-
|
|
10524
|
-
typedef OperatorResultType (*table_in_out_function_t)(ClientContext &context, const FunctionData *bind_data,
|
|
10525
|
-
FunctionOperatorData *operator_state, DataChunk &input,
|
|
10526
|
-
DataChunk &output);
|
|
10551
|
+
typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output);
|
|
10527
10552
|
|
|
10528
|
-
typedef
|
|
10529
|
-
|
|
10530
|
-
ParallelState *parallel_state);
|
|
10553
|
+
typedef OperatorResultType (*table_in_out_function_t)(ClientContext &context, TableFunctionInput &data,
|
|
10554
|
+
DataChunk &input, DataChunk &output);
|
|
10531
10555
|
typedef idx_t (*table_function_get_batch_index_t)(ClientContext &context, const FunctionData *bind_data,
|
|
10532
|
-
|
|
10533
|
-
|
|
10534
|
-
|
|
10535
|
-
|
|
10536
|
-
typedef unique_ptr<ParallelState> (*table_function_init_parallel_state_t)(ClientContext &context,
|
|
10537
|
-
const FunctionData *bind_data,
|
|
10538
|
-
const vector<column_t> &column_ids,
|
|
10539
|
-
TableFilterCollection *filters);
|
|
10540
|
-
typedef unique_ptr<FunctionOperatorData> (*table_function_init_parallel_t)(ClientContext &context,
|
|
10541
|
-
const FunctionData *bind_data,
|
|
10542
|
-
ParallelState *state,
|
|
10543
|
-
const vector<column_t> &column_ids,
|
|
10544
|
-
TableFilterCollection *filters);
|
|
10545
|
-
typedef bool (*table_function_parallel_state_next_t)(ClientContext &context, const FunctionData *bind_data,
|
|
10546
|
-
FunctionOperatorData *state, ParallelState *parallel_state);
|
|
10547
|
-
typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data);
|
|
10556
|
+
LocalTableFunctionState *local_state,
|
|
10557
|
+
GlobalTableFunctionState *global_state);
|
|
10558
|
+
typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data,
|
|
10559
|
+
const GlobalTableFunctionState *global_state);
|
|
10548
10560
|
typedef void (*table_function_dependency_t)(unordered_set<CatalogEntry *> &dependencies, const FunctionData *bind_data);
|
|
10549
10561
|
typedef unique_ptr<NodeStatistics> (*table_function_cardinality_t)(ClientContext &context,
|
|
10550
10562
|
const FunctionData *bind_data);
|
|
@@ -10557,40 +10569,26 @@ class TableFunction : public SimpleNamedParameterFunction {
|
|
|
10557
10569
|
public:
|
|
10558
10570
|
DUCKDB_API
|
|
10559
10571
|
TableFunction(string name, vector<LogicalType> arguments, table_function_t function,
|
|
10560
|
-
table_function_bind_t bind = nullptr,
|
|
10561
|
-
|
|
10562
|
-
table_function_dependency_t dependency = nullptr, table_function_cardinality_t cardinality = nullptr,
|
|
10563
|
-
table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr,
|
|
10564
|
-
table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr,
|
|
10565
|
-
table_function_init_parallel_state_t init_parallel_state = nullptr,
|
|
10566
|
-
table_function_parallel_t parallel_function = nullptr,
|
|
10567
|
-
table_function_init_parallel_t parallel_init = nullptr,
|
|
10568
|
-
table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false,
|
|
10569
|
-
bool filter_pushdown = false, table_function_progress_t query_progress = nullptr,
|
|
10570
|
-
table_in_out_function_t in_out_function = nullptr);
|
|
10572
|
+
table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr,
|
|
10573
|
+
table_function_init_local_t init_local = nullptr);
|
|
10571
10574
|
DUCKDB_API
|
|
10572
10575
|
TableFunction(const vector<LogicalType> &arguments, table_function_t function, table_function_bind_t bind = nullptr,
|
|
10573
|
-
|
|
10574
|
-
table_function_cleanup_t cleanup = nullptr, table_function_dependency_t dependency = nullptr,
|
|
10575
|
-
table_function_cardinality_t cardinality = nullptr,
|
|
10576
|
-
table_function_pushdown_complex_filter_t pushdown_complex_filter = nullptr,
|
|
10577
|
-
table_function_to_string_t to_string = nullptr, table_function_max_threads_t max_threads = nullptr,
|
|
10578
|
-
table_function_init_parallel_state_t init_parallel_state = nullptr,
|
|
10579
|
-
table_function_parallel_t parallel_function = nullptr,
|
|
10580
|
-
table_function_init_parallel_t parallel_init = nullptr,
|
|
10581
|
-
table_function_parallel_state_next_t parallel_state_next = nullptr, bool projection_pushdown = false,
|
|
10582
|
-
bool filter_pushdown = false, table_function_progress_t query_progress = nullptr,
|
|
10583
|
-
table_in_out_function_t in_out_function = nullptr);
|
|
10576
|
+
table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr);
|
|
10584
10577
|
DUCKDB_API TableFunction();
|
|
10585
10578
|
|
|
10586
10579
|
//! Bind function
|
|
10587
10580
|
//! This function is used for determining the return type of a table producing function and returning bind data
|
|
10588
10581
|
//! The returned FunctionData object should be constant and should not be changed during execution.
|
|
10589
10582
|
table_function_bind_t bind;
|
|
10590
|
-
//! (Optional) init function
|
|
10591
|
-
//! Initialize the operator state of the function.
|
|
10592
|
-
//! table function
|
|
10593
|
-
|
|
10583
|
+
//! (Optional) global init function
|
|
10584
|
+
//! Initialize the global operator state of the function.
|
|
10585
|
+
//! The global operator state is used to keep track of the progress in the table function and is shared between
|
|
10586
|
+
//! all threads working on the table function.
|
|
10587
|
+
table_function_init_global_t init_global;
|
|
10588
|
+
//! (Optional) local init function
|
|
10589
|
+
//! Initialize the local operator state of the function.
|
|
10590
|
+
//! The local operator state is used to keep track of the progress in the table function and is thread-local.
|
|
10591
|
+
table_function_init_local_t init_local;
|
|
10594
10592
|
//! The main function
|
|
10595
10593
|
table_function_t function;
|
|
10596
10594
|
//! The table in-out function (if this is an in-out function)
|
|
@@ -10598,9 +10596,6 @@ public:
|
|
|
10598
10596
|
//! (Optional) statistics function
|
|
10599
10597
|
//! Returns the statistics of a specified column
|
|
10600
10598
|
table_statistics_t statistics;
|
|
10601
|
-
//! (Optional) cleanup function
|
|
10602
|
-
//! The final cleanup function, called after all data is exhausted from the main function
|
|
10603
|
-
table_function_cleanup_t cleanup;
|
|
10604
10599
|
//! (Optional) dependency function
|
|
10605
10600
|
//! Sets up which catalog entries this table function depend on
|
|
10606
10601
|
table_function_dependency_t dependency;
|
|
@@ -10612,17 +10607,6 @@ public:
|
|
|
10612
10607
|
table_function_pushdown_complex_filter_t pushdown_complex_filter;
|
|
10613
10608
|
//! (Optional) function for rendering the operator to a string in profiling output
|
|
10614
10609
|
table_function_to_string_t to_string;
|
|
10615
|
-
//! (Optional) function that returns the maximum amount of threads that can work on this task
|
|
10616
|
-
table_function_max_threads_t max_threads;
|
|
10617
|
-
//! (Optional) initialize the parallel scan state, called once in total.
|
|
10618
|
-
table_function_init_parallel_state_t init_parallel_state;
|
|
10619
|
-
//! (Optional) Parallel version of the main function
|
|
10620
|
-
table_function_parallel_t parallel_function;
|
|
10621
|
-
//! (Optional) initialize the parallel scan given the parallel state. Called once per task. Return nullptr if there
|
|
10622
|
-
//! is nothing left to scan.
|
|
10623
|
-
table_function_init_parallel_t parallel_init;
|
|
10624
|
-
//! (Optional) return the next chunk to process in the parallel scan, or return nullptr if there is none
|
|
10625
|
-
table_function_parallel_state_next_t parallel_state_next;
|
|
10626
10610
|
//! (Optional) return how much of the table we have scanned up to this point (% of the data)
|
|
10627
10611
|
table_function_progress_t table_scan_progress;
|
|
10628
10612
|
//! (Optional) returns the current batch index of the current scan operator
|
|
@@ -10633,33 +10617,12 @@ public:
|
|
|
10633
10617
|
//! Whether or not the table function supports filter pushdown. If not supported a filter will be added
|
|
10634
10618
|
//! that applies the table filter directly.
|
|
10635
10619
|
bool filter_pushdown;
|
|
10636
|
-
//! Whether or not the table function supports fetching of a batch index
|
|
10637
|
-
bool supports_batch_index;
|
|
10638
10620
|
//! Additional function info, passed to the bind
|
|
10639
10621
|
shared_ptr<TableFunctionInfo> function_info;
|
|
10640
10622
|
};
|
|
10641
10623
|
|
|
10642
10624
|
} // namespace duckdb
|
|
10643
10625
|
|
|
10644
|
-
//===----------------------------------------------------------------------===//
|
|
10645
|
-
// DuckDB
|
|
10646
|
-
//
|
|
10647
|
-
// duckdb/parallel/parallel_state.hpp
|
|
10648
|
-
//
|
|
10649
|
-
//
|
|
10650
|
-
//===----------------------------------------------------------------------===//
|
|
10651
|
-
|
|
10652
|
-
|
|
10653
|
-
|
|
10654
|
-
namespace duckdb {
|
|
10655
|
-
|
|
10656
|
-
struct ParallelState {
|
|
10657
|
-
virtual ~ParallelState() {
|
|
10658
|
-
}
|
|
10659
|
-
};
|
|
10660
|
-
|
|
10661
|
-
} // namespace duckdb
|
|
10662
|
-
|
|
10663
10626
|
//===----------------------------------------------------------------------===//
|
|
10664
10627
|
// DuckDB
|
|
10665
10628
|
//
|
|
@@ -14638,11 +14601,11 @@ typedef unordered_map<block_id_t, unique_ptr<BufferHandle>> buffer_handle_set_t;
|
|
|
14638
14601
|
|
|
14639
14602
|
struct ColumnScanState {
|
|
14640
14603
|
//! The column segment that is currently being scanned
|
|
14641
|
-
ColumnSegment *current;
|
|
14604
|
+
ColumnSegment *current = nullptr;
|
|
14642
14605
|
//! The current row index of the scan
|
|
14643
|
-
idx_t row_index;
|
|
14606
|
+
idx_t row_index = 0;
|
|
14644
14607
|
//! The internal row index (i.e. the position of the SegmentScanState)
|
|
14645
|
-
idx_t internal_index;
|
|
14608
|
+
idx_t internal_index = 0;
|
|
14646
14609
|
//! Segment scan state
|
|
14647
14610
|
unique_ptr<SegmentScanState> scan_state;
|
|
14648
14611
|
//! Child states of the vector
|
|
@@ -14676,10 +14639,10 @@ struct LocalScanState {
|
|
|
14676
14639
|
return storage.get();
|
|
14677
14640
|
}
|
|
14678
14641
|
|
|
14679
|
-
idx_t chunk_index;
|
|
14680
|
-
idx_t max_index;
|
|
14681
|
-
idx_t last_chunk_count;
|
|
14682
|
-
TableFilterSet *table_filters;
|
|
14642
|
+
idx_t chunk_index = 0;
|
|
14643
|
+
idx_t max_index = 0;
|
|
14644
|
+
idx_t last_chunk_count = 0;
|
|
14645
|
+
TableFilterSet *table_filters = nullptr;
|
|
14683
14646
|
|
|
14684
14647
|
private:
|
|
14685
14648
|
shared_ptr<LocalTableStorage> storage;
|
|
@@ -14693,17 +14656,13 @@ public:
|
|
|
14693
14656
|
//! The parent scan state
|
|
14694
14657
|
TableScanState &parent;
|
|
14695
14658
|
//! The current row_group we are scanning
|
|
14696
|
-
RowGroup *row_group;
|
|
14659
|
+
RowGroup *row_group = nullptr;
|
|
14697
14660
|
//! The vector index within the row_group
|
|
14698
|
-
idx_t vector_index;
|
|
14661
|
+
idx_t vector_index = 0;
|
|
14699
14662
|
//! The maximum row index of this row_group scan
|
|
14700
|
-
idx_t max_row;
|
|
14663
|
+
idx_t max_row = 0;
|
|
14701
14664
|
//! Child column scans
|
|
14702
14665
|
unique_ptr<ColumnScanState[]> column_scans;
|
|
14703
|
-
|
|
14704
|
-
public:
|
|
14705
|
-
//! Move to the next vector, skipping past the current one
|
|
14706
|
-
void NextVector();
|
|
14707
14666
|
};
|
|
14708
14667
|
|
|
14709
14668
|
class TableScanState {
|
|
@@ -14713,7 +14672,7 @@ public:
|
|
|
14713
14672
|
//! The row_group scan state
|
|
14714
14673
|
RowGroupScanState row_group_scan_state;
|
|
14715
14674
|
//! The total maximum row index
|
|
14716
|
-
idx_t max_row;
|
|
14675
|
+
idx_t max_row = 0;
|
|
14717
14676
|
//! The column identifiers of the scan
|
|
14718
14677
|
vector<column_t> column_ids;
|
|
14719
14678
|
//! The table filters (if any)
|
|
@@ -14722,10 +14681,6 @@ public:
|
|
|
14722
14681
|
unique_ptr<AdaptiveFilter> adaptive_filter;
|
|
14723
14682
|
//! Transaction-local scan state
|
|
14724
14683
|
LocalScanState local_state;
|
|
14725
|
-
|
|
14726
|
-
public:
|
|
14727
|
-
//! Move to the next vector
|
|
14728
|
-
void NextVector();
|
|
14729
14684
|
};
|
|
14730
14685
|
|
|
14731
14686
|
class CreateIndexScanState : public TableScanState {
|
|
@@ -16619,6 +16574,15 @@ Sets the init function of the table function
|
|
|
16619
16574
|
*/
|
|
16620
16575
|
DUCKDB_API void duckdb_table_function_set_init(duckdb_table_function table_function, duckdb_table_function_init_t init);
|
|
16621
16576
|
|
|
16577
|
+
/*!
|
|
16578
|
+
Sets the thread-local init function of the table function
|
|
16579
|
+
|
|
16580
|
+
* table_function: The table function
|
|
16581
|
+
* init: The init function
|
|
16582
|
+
*/
|
|
16583
|
+
DUCKDB_API void duckdb_table_function_set_local_init(duckdb_table_function table_function,
|
|
16584
|
+
duckdb_table_function_init_t init);
|
|
16585
|
+
|
|
16622
16586
|
/*!
|
|
16623
16587
|
Sets the main function of the table function
|
|
16624
16588
|
|
|
@@ -16762,6 +16726,14 @@ This function must be used if projection pushdown is enabled to figure out which
|
|
|
16762
16726
|
*/
|
|
16763
16727
|
DUCKDB_API idx_t duckdb_init_get_column_index(duckdb_init_info info, idx_t column_index);
|
|
16764
16728
|
|
|
16729
|
+
/*!
|
|
16730
|
+
Sets how many threads can process this table function in parallel (default: 1)
|
|
16731
|
+
|
|
16732
|
+
* info: The info object
|
|
16733
|
+
* max_threads: The maximum amount of threads that can process this table function
|
|
16734
|
+
*/
|
|
16735
|
+
DUCKDB_API void duckdb_init_set_max_threads(duckdb_init_info info, idx_t max_threads);
|
|
16736
|
+
|
|
16765
16737
|
/*!
|
|
16766
16738
|
Report that an error has occurred while calling init.
|
|
16767
16739
|
|
|
@@ -16793,13 +16765,21 @@ For tracking state, use the init data instead.
|
|
|
16793
16765
|
DUCKDB_API void *duckdb_function_get_bind_data(duckdb_function_info info);
|
|
16794
16766
|
|
|
16795
16767
|
/*!
|
|
16796
|
-
Gets the init data set by `
|
|
16768
|
+
Gets the init data set by `duckdb_init_set_init_data` during the init.
|
|
16797
16769
|
|
|
16798
16770
|
* info: The info object
|
|
16799
16771
|
* returns: The init data object
|
|
16800
16772
|
*/
|
|
16801
16773
|
DUCKDB_API void *duckdb_function_get_init_data(duckdb_function_info info);
|
|
16802
16774
|
|
|
16775
|
+
/*!
|
|
16776
|
+
Gets the thread-local init data set by `duckdb_init_set_init_data` during the local_init.
|
|
16777
|
+
|
|
16778
|
+
* info: The info object
|
|
16779
|
+
* returns: The init data object
|
|
16780
|
+
*/
|
|
16781
|
+
DUCKDB_API void *duckdb_function_get_local_init_data(duckdb_function_info info);
|
|
16782
|
+
|
|
16803
16783
|
/*!
|
|
16804
16784
|
Report that an error has occurred while executing the function.
|
|
16805
16785
|
|