duckdb 0.3.5-dev242.0 → 0.3.5-dev256.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +87 -22
- package/src/duckdb.hpp +24 -5
- package/src/parquet-amalgamation.cpp +36759 -36759
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -58827,10 +58827,10 @@ static void SortCollectionForPartition(WindowOperatorState &state, BoundWindowEx
|
|
|
58827
58827
|
|
|
58828
58828
|
// fuse input and sort collection into one
|
|
58829
58829
|
// (sorting columns are not decoded, and we need them later)
|
|
58830
|
-
|
|
58831
|
-
|
|
58832
|
-
|
|
58833
|
-
|
|
58830
|
+
auto payload_types = input.Types();
|
|
58831
|
+
payload_types.insert(payload_types.end(), over.Types().begin(), over.Types().end());
|
|
58832
|
+
DataChunk payload_chunk;
|
|
58833
|
+
payload_chunk.InitializeEmpty(payload_types);
|
|
58834
58834
|
|
|
58835
58835
|
// initialise partitioning memory
|
|
58836
58836
|
// to minimise copying, we fill up a chunk and then sink it.
|
|
@@ -58856,8 +58856,15 @@ static void SortCollectionForPartition(WindowOperatorState &state, BoundWindowEx
|
|
|
58856
58856
|
// sink collection chunks into row format
|
|
58857
58857
|
const idx_t chunk_count = over.ChunkCount();
|
|
58858
58858
|
for (idx_t i = 0; i < chunk_count; i++) {
|
|
58859
|
+
auto &input_chunk = *input.Chunks()[i];
|
|
58860
|
+
for (idx_t col_idx = 0; col_idx < input_chunk.ColumnCount(); ++col_idx) {
|
|
58861
|
+
payload_chunk.data[col_idx].Reference(input_chunk.data[col_idx]);
|
|
58862
|
+
}
|
|
58859
58863
|
auto &over_chunk = *over.Chunks()[i];
|
|
58860
|
-
|
|
58864
|
+
for (idx_t col_idx = 0; col_idx < over_chunk.ColumnCount(); ++col_idx) {
|
|
58865
|
+
payload_chunk.data[input_chunk.ColumnCount() + col_idx].Reference(over_chunk.data[col_idx]);
|
|
58866
|
+
}
|
|
58867
|
+
payload_chunk.SetCardinality(input_chunk);
|
|
58861
58868
|
|
|
58862
58869
|
// Extract the hash partition, if any
|
|
58863
58870
|
if (hashes) {
|
|
@@ -83735,7 +83742,7 @@ static void HistogramFinalize(Vector &state_vector, FunctionData *, Vector &resu
|
|
|
83735
83742
|
list_struct_data = FlatVector::GetData<list_entry_t>(*count_list);
|
|
83736
83743
|
list_struct_data[rid].length = ListVector::GetListSize(*count_list) - old_len;
|
|
83737
83744
|
list_struct_data[rid].offset = old_len;
|
|
83738
|
-
old_len
|
|
83745
|
+
old_len += list_struct_data[rid].length;
|
|
83739
83746
|
}
|
|
83740
83747
|
}
|
|
83741
83748
|
|
|
@@ -110790,6 +110797,19 @@ void duckdb_function_set_error(duckdb_function_info info, const char *error) {
|
|
|
110790
110797
|
|
|
110791
110798
|
|
|
110792
110799
|
|
|
110800
|
+
using duckdb::DatabaseData;
|
|
110801
|
+
|
|
110802
|
+
void duckdb_execute_tasks(duckdb_database database, idx_t max_tasks) {
|
|
110803
|
+
if (!database) {
|
|
110804
|
+
return;
|
|
110805
|
+
}
|
|
110806
|
+
auto wrapper = (DatabaseData *)database;
|
|
110807
|
+
auto &scheduler = duckdb::TaskScheduler::GetScheduler(*wrapper->database->instance);
|
|
110808
|
+
scheduler.ExecuteTasks(max_tasks);
|
|
110809
|
+
}
|
|
110810
|
+
|
|
110811
|
+
|
|
110812
|
+
|
|
110793
110813
|
|
|
110794
110814
|
|
|
110795
110815
|
|
|
@@ -112850,6 +112870,14 @@ struct ExplainOutputSetting {
|
|
|
112850
112870
|
static Value GetSetting(ClientContext &context);
|
|
112851
112871
|
};
|
|
112852
112872
|
|
|
112873
|
+
struct ExternalThreadsSetting {
|
|
112874
|
+
static constexpr const char *Name = "external_threads";
|
|
112875
|
+
static constexpr const char *Description = "The number of external threads that work on DuckDB tasks.";
|
|
112876
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::BIGINT;
|
|
112877
|
+
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);
|
|
112878
|
+
static Value GetSetting(ClientContext &context);
|
|
112879
|
+
};
|
|
112880
|
+
|
|
112853
112881
|
struct ForceCompressionSetting {
|
|
112854
112882
|
static constexpr const char *Name = "force_compression";
|
|
112855
112883
|
static constexpr const char *Description = "DEBUG SETTING: forces a specific compression method to be used";
|
|
@@ -112997,6 +113025,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
|
112997
113025
|
DUCKDB_LOCAL(EnableProfilingSetting),
|
|
112998
113026
|
DUCKDB_LOCAL(EnableProgressBarSetting),
|
|
112999
113027
|
DUCKDB_LOCAL(ExplainOutputSetting),
|
|
113028
|
+
DUCKDB_GLOBAL(ExternalThreadsSetting),
|
|
113000
113029
|
DUCKDB_GLOBAL(ForceCompressionSetting),
|
|
113001
113030
|
DUCKDB_LOCAL(LogQueryPathSetting),
|
|
113002
113031
|
DUCKDB_GLOBAL(MaximumMemorySetting),
|
|
@@ -113772,7 +113801,7 @@ void DatabaseInstance::Initialize(const char *path, DBConfig *new_config) {
|
|
|
113772
113801
|
make_unique<StorageManager>(*this, path ? string(path) : string(), config.access_mode == AccessMode::READ_ONLY);
|
|
113773
113802
|
catalog = make_unique<Catalog>(*this);
|
|
113774
113803
|
transaction_manager = make_unique<TransactionManager>(*this);
|
|
113775
|
-
scheduler = make_unique<TaskScheduler>();
|
|
113804
|
+
scheduler = make_unique<TaskScheduler>(*this);
|
|
113776
113805
|
object_cache = make_unique<ObjectCache>();
|
|
113777
113806
|
connection_manager = make_unique<ConnectionManager>();
|
|
113778
113807
|
|
|
@@ -113862,6 +113891,7 @@ void DatabaseInstance::Configure(DBConfig &new_config) {
|
|
|
113862
113891
|
} else {
|
|
113863
113892
|
config.maximum_threads = new_config.maximum_threads;
|
|
113864
113893
|
}
|
|
113894
|
+
config.external_threads = new_config.external_threads;
|
|
113865
113895
|
config.load_extensions = new_config.load_extensions;
|
|
113866
113896
|
config.force_compression = new_config.force_compression;
|
|
113867
113897
|
config.allocator = move(new_config.allocator);
|
|
@@ -126514,6 +126544,18 @@ Value ExplainOutputSetting::GetSetting(ClientContext &context) {
|
|
|
126514
126544
|
}
|
|
126515
126545
|
}
|
|
126516
126546
|
|
|
126547
|
+
//===--------------------------------------------------------------------===//
|
|
126548
|
+
// External Threads Setting
|
|
126549
|
+
//===--------------------------------------------------------------------===//
|
|
126550
|
+
void ExternalThreadsSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
|
|
126551
|
+
config.external_threads = input.GetValue<int64_t>();
|
|
126552
|
+
}
|
|
126553
|
+
|
|
126554
|
+
Value ExternalThreadsSetting::GetSetting(ClientContext &context) {
|
|
126555
|
+
auto &config = DBConfig::GetConfig(context);
|
|
126556
|
+
return Value::BIGINT(config.external_threads);
|
|
126557
|
+
}
|
|
126558
|
+
|
|
126517
126559
|
//===--------------------------------------------------------------------===//
|
|
126518
126560
|
// Force Compression
|
|
126519
126561
|
//===--------------------------------------------------------------------===//
|
|
@@ -141519,7 +141561,7 @@ ProducerToken::ProducerToken(TaskScheduler &scheduler, unique_ptr<QueueProducerT
|
|
|
141519
141561
|
ProducerToken::~ProducerToken() {
|
|
141520
141562
|
}
|
|
141521
141563
|
|
|
141522
|
-
TaskScheduler::TaskScheduler() : queue(make_unique<ConcurrentQueue>()) {
|
|
141564
|
+
TaskScheduler::TaskScheduler(DatabaseInstance &db) : db(db), queue(make_unique<ConcurrentQueue>()) {
|
|
141523
141565
|
}
|
|
141524
141566
|
|
|
141525
141567
|
TaskScheduler::~TaskScheduler() {
|
|
@@ -141556,7 +141598,7 @@ void TaskScheduler::ExecuteForever(atomic<bool> *marker) {
|
|
|
141556
141598
|
// loop until the marker is set to false
|
|
141557
141599
|
while (*marker) {
|
|
141558
141600
|
// wait for a signal with a timeout; the timeout allows us to periodically check
|
|
141559
|
-
queue->semaphore.wait(
|
|
141601
|
+
queue->semaphore.wait();
|
|
141560
141602
|
if (queue->q.try_dequeue(task)) {
|
|
141561
141603
|
task->Execute(TaskExecutionMode::PROCESS_ALL);
|
|
141562
141604
|
task.reset();
|
|
@@ -141567,6 +141609,26 @@ void TaskScheduler::ExecuteForever(atomic<bool> *marker) {
|
|
|
141567
141609
|
#endif
|
|
141568
141610
|
}
|
|
141569
141611
|
|
|
141612
|
+
void TaskScheduler::ExecuteTasks(idx_t max_tasks) {
|
|
141613
|
+
#ifndef DUCKDB_NO_THREADS
|
|
141614
|
+
unique_ptr<Task> task;
|
|
141615
|
+
for (idx_t i = 0; i < max_tasks; i++) {
|
|
141616
|
+
queue->semaphore.wait(TASK_TIMEOUT_USECS);
|
|
141617
|
+
if (!queue->q.try_dequeue(task)) {
|
|
141618
|
+
return;
|
|
141619
|
+
}
|
|
141620
|
+
try {
|
|
141621
|
+
task->Execute(TaskExecutionMode::PROCESS_ALL);
|
|
141622
|
+
task.reset();
|
|
141623
|
+
} catch (...) {
|
|
141624
|
+
return;
|
|
141625
|
+
}
|
|
141626
|
+
}
|
|
141627
|
+
#else
|
|
141628
|
+
throw NotImplementedException("DuckDB was compiled without threads! Background thread loop is not allowed.");
|
|
141629
|
+
#endif
|
|
141630
|
+
}
|
|
141631
|
+
|
|
141570
141632
|
#ifndef DUCKDB_NO_THREADS
|
|
141571
141633
|
static void ThreadExecuteTasks(TaskScheduler *scheduler, atomic<bool> *marker) {
|
|
141572
141634
|
scheduler->ExecuteForever(marker);
|
|
@@ -141574,7 +141636,8 @@ static void ThreadExecuteTasks(TaskScheduler *scheduler, atomic<bool> *marker) {
|
|
|
141574
141636
|
#endif
|
|
141575
141637
|
|
|
141576
141638
|
int32_t TaskScheduler::NumberOfThreads() {
|
|
141577
|
-
|
|
141639
|
+
auto &config = DBConfig::GetConfig(db);
|
|
141640
|
+
return threads.size() + config.external_threads + 1;
|
|
141578
141641
|
}
|
|
141579
141642
|
|
|
141580
141643
|
void TaskScheduler::SetThreads(int32_t n) {
|
|
@@ -141596,6 +141659,20 @@ void TaskScheduler::SetThreadsInternal(int32_t n) {
|
|
|
141596
141659
|
return;
|
|
141597
141660
|
}
|
|
141598
141661
|
idx_t new_thread_count = n - 1;
|
|
141662
|
+
if (threads.size() > new_thread_count) {
|
|
141663
|
+
// we are reducing the number of threads: clear all threads first
|
|
141664
|
+
for (idx_t i = 0; i < threads.size(); i++) {
|
|
141665
|
+
*markers[i] = false;
|
|
141666
|
+
}
|
|
141667
|
+
queue->semaphore.signal(threads.size());
|
|
141668
|
+
// now join the threads to ensure they are fully stopped before erasing them
|
|
141669
|
+
for (idx_t i = 0; i < threads.size(); i++) {
|
|
141670
|
+
threads[i]->internal_thread->join();
|
|
141671
|
+
}
|
|
141672
|
+
// erase the threads/markers
|
|
141673
|
+
threads.clear();
|
|
141674
|
+
markers.clear();
|
|
141675
|
+
}
|
|
141599
141676
|
if (threads.size() < new_thread_count) {
|
|
141600
141677
|
// we are increasing the number of threads: launch them and run tasks on them
|
|
141601
141678
|
idx_t create_new_threads = new_thread_count - threads.size();
|
|
@@ -141608,18 +141685,6 @@ void TaskScheduler::SetThreadsInternal(int32_t n) {
|
|
|
141608
141685
|
threads.push_back(move(thread_wrapper));
|
|
141609
141686
|
markers.push_back(move(marker));
|
|
141610
141687
|
}
|
|
141611
|
-
} else if (threads.size() > new_thread_count) {
|
|
141612
|
-
// we are reducing the number of threads: cancel any threads exceeding new_thread_count
|
|
141613
|
-
for (idx_t i = new_thread_count; i < threads.size(); i++) {
|
|
141614
|
-
*markers[i] = false;
|
|
141615
|
-
}
|
|
141616
|
-
// now join the threads to ensure they are fully stopped before erasing them
|
|
141617
|
-
for (idx_t i = new_thread_count; i < threads.size(); i++) {
|
|
141618
|
-
threads[i]->internal_thread->join();
|
|
141619
|
-
}
|
|
141620
|
-
// erase the threads/markers
|
|
141621
|
-
threads.resize(new_thread_count);
|
|
141622
|
-
markers.resize(new_thread_count);
|
|
141623
141688
|
}
|
|
141624
141689
|
#endif
|
|
141625
141690
|
}
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "6962a87a9"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev256"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -10593,11 +10593,11 @@ struct ProducerToken {
|
|
|
10593
10593
|
|
|
10594
10594
|
//! The TaskScheduler is responsible for managing tasks and threads
|
|
10595
10595
|
class TaskScheduler {
|
|
10596
|
-
// timeout for semaphore wait, default
|
|
10597
|
-
constexpr static int64_t TASK_TIMEOUT_USECS =
|
|
10596
|
+
// timeout for semaphore wait, default 5ms
|
|
10597
|
+
constexpr static int64_t TASK_TIMEOUT_USECS = 5000;
|
|
10598
10598
|
|
|
10599
10599
|
public:
|
|
10600
|
-
TaskScheduler();
|
|
10600
|
+
TaskScheduler(DatabaseInstance &db);
|
|
10601
10601
|
~TaskScheduler();
|
|
10602
10602
|
|
|
10603
10603
|
static TaskScheduler &GetScheduler(ClientContext &context);
|
|
@@ -10610,6 +10610,8 @@ public:
|
|
|
10610
10610
|
bool GetTaskFromProducer(ProducerToken &token, unique_ptr<Task> &task);
|
|
10611
10611
|
//! Run tasks forever until "marker" is set to false, "marker" must remain valid until the thread is joined
|
|
10612
10612
|
void ExecuteForever(atomic<bool> *marker);
|
|
10613
|
+
//! Run tasks until `max_tasks` have been completed, or until there are no more tasks available
|
|
10614
|
+
void ExecuteTasks(idx_t max_tasks);
|
|
10613
10615
|
|
|
10614
10616
|
//! Sets the amount of active threads executing tasks for the system; n-1 background threads will be launched.
|
|
10615
10617
|
//! The main thread will also be used for execution
|
|
@@ -10620,6 +10622,8 @@ public:
|
|
|
10620
10622
|
private:
|
|
10621
10623
|
void SetThreadsInternal(int32_t n);
|
|
10622
10624
|
|
|
10625
|
+
private:
|
|
10626
|
+
DatabaseInstance &db;
|
|
10623
10627
|
//! The task queue
|
|
10624
10628
|
unique_ptr<ConcurrentQueue> queue;
|
|
10625
10629
|
//! The active background threads of the task scheduler
|
|
@@ -16893,6 +16897,19 @@ Closes the result and de-allocates all memory allocated for the arrow result.
|
|
|
16893
16897
|
*/
|
|
16894
16898
|
DUCKDB_API void duckdb_destroy_arrow(duckdb_arrow *result);
|
|
16895
16899
|
|
|
16900
|
+
//===--------------------------------------------------------------------===//
|
|
16901
|
+
// Threading Information
|
|
16902
|
+
//===--------------------------------------------------------------------===//
|
|
16903
|
+
/*!
|
|
16904
|
+
Execute DuckDB tasks on this thread.
|
|
16905
|
+
|
|
16906
|
+
Will return after `max_tasks` have been executed, or if there are no more tasks present.
|
|
16907
|
+
|
|
16908
|
+
* database: The database object to execute tasks for
|
|
16909
|
+
* max_tasks: The maximum amount of tasks to execute
|
|
16910
|
+
*/
|
|
16911
|
+
DUCKDB_API void duckdb_execute_tasks(duckdb_database database, idx_t max_tasks);
|
|
16912
|
+
|
|
16896
16913
|
#ifdef __cplusplus
|
|
16897
16914
|
}
|
|
16898
16915
|
#endif
|
|
@@ -18141,6 +18158,8 @@ public:
|
|
|
18141
18158
|
idx_t maximum_memory = (idx_t)-1;
|
|
18142
18159
|
//! The maximum amount of CPU threads used by the database system. Default: all available.
|
|
18143
18160
|
idx_t maximum_threads = (idx_t)-1;
|
|
18161
|
+
//! The number of external threads that work on DuckDB tasks. Default: none.
|
|
18162
|
+
idx_t external_threads = 0;
|
|
18144
18163
|
//! Whether or not to create and use a temporary directory to store intermediates that do not fit in memory
|
|
18145
18164
|
bool use_temporary_directory = true;
|
|
18146
18165
|
//! Directory to store temporary structures that do not fit in memory
|