duckdb 0.3.5-dev242.0 → 0.3.5-dev250.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.5-dev242.0",
4
+ "version": "0.3.5-dev250.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -110790,6 +110790,19 @@ void duckdb_function_set_error(duckdb_function_info info, const char *error) {
110790
110790
 
110791
110791
 
110792
110792
 
110793
+ using duckdb::DatabaseData;
110794
+
110795
+ void duckdb_execute_tasks(duckdb_database database, idx_t max_tasks) {
110796
+ if (!database) {
110797
+ return;
110798
+ }
110799
+ auto wrapper = (DatabaseData *)database;
110800
+ auto &scheduler = duckdb::TaskScheduler::GetScheduler(*wrapper->database->instance);
110801
+ scheduler.ExecuteTasks(max_tasks);
110802
+ }
110803
+
110804
+
110805
+
110793
110806
 
110794
110807
 
110795
110808
 
@@ -112850,6 +112863,14 @@ struct ExplainOutputSetting {
112850
112863
  static Value GetSetting(ClientContext &context);
112851
112864
  };
112852
112865
 
112866
+ struct ExternalThreadsSetting {
112867
+ static constexpr const char *Name = "external_threads";
112868
+ static constexpr const char *Description = "The number of external threads that work on DuckDB tasks.";
112869
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::BIGINT;
112870
+ static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
112871
+ static Value GetSetting(ClientContext &context);
112872
+ };
112873
+
112853
112874
  struct ForceCompressionSetting {
112854
112875
  static constexpr const char *Name = "force_compression";
112855
112876
  static constexpr const char *Description = "DEBUG SETTING: forces a specific compression method to be used";
@@ -112997,6 +113018,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
112997
113018
  DUCKDB_LOCAL(EnableProfilingSetting),
112998
113019
  DUCKDB_LOCAL(EnableProgressBarSetting),
112999
113020
  DUCKDB_LOCAL(ExplainOutputSetting),
113021
+ DUCKDB_GLOBAL(ExternalThreadsSetting),
113000
113022
  DUCKDB_GLOBAL(ForceCompressionSetting),
113001
113023
  DUCKDB_LOCAL(LogQueryPathSetting),
113002
113024
  DUCKDB_GLOBAL(MaximumMemorySetting),
@@ -113772,7 +113794,7 @@ void DatabaseInstance::Initialize(const char *path, DBConfig *new_config) {
113772
113794
  make_unique<StorageManager>(*this, path ? string(path) : string(), config.access_mode == AccessMode::READ_ONLY);
113773
113795
  catalog = make_unique<Catalog>(*this);
113774
113796
  transaction_manager = make_unique<TransactionManager>(*this);
113775
- scheduler = make_unique<TaskScheduler>();
113797
+ scheduler = make_unique<TaskScheduler>(*this);
113776
113798
  object_cache = make_unique<ObjectCache>();
113777
113799
  connection_manager = make_unique<ConnectionManager>();
113778
113800
 
@@ -113862,6 +113884,7 @@ void DatabaseInstance::Configure(DBConfig &new_config) {
113862
113884
  } else {
113863
113885
  config.maximum_threads = new_config.maximum_threads;
113864
113886
  }
113887
+ config.external_threads = new_config.external_threads;
113865
113888
  config.load_extensions = new_config.load_extensions;
113866
113889
  config.force_compression = new_config.force_compression;
113867
113890
  config.allocator = move(new_config.allocator);
@@ -126514,6 +126537,18 @@ Value ExplainOutputSetting::GetSetting(ClientContext &context) {
126514
126537
  }
126515
126538
  }
126516
126539
 
126540
+ //===--------------------------------------------------------------------===//
126541
+ // External Threads Setting
126542
+ //===--------------------------------------------------------------------===//
126543
+ void ExternalThreadsSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
126544
+ config.external_threads = input.GetValue<int64_t>();
126545
+ }
126546
+
126547
+ Value ExternalThreadsSetting::GetSetting(ClientContext &context) {
126548
+ auto &config = DBConfig::GetConfig(context);
126549
+ return Value::BIGINT(config.external_threads);
126550
+ }
126551
+
126517
126552
  //===--------------------------------------------------------------------===//
126518
126553
  // Force Compression
126519
126554
  //===--------------------------------------------------------------------===//
@@ -141519,7 +141554,7 @@ ProducerToken::ProducerToken(TaskScheduler &scheduler, unique_ptr<QueueProducerT
141519
141554
  ProducerToken::~ProducerToken() {
141520
141555
  }
141521
141556
 
141522
- TaskScheduler::TaskScheduler() : queue(make_unique<ConcurrentQueue>()) {
141557
+ TaskScheduler::TaskScheduler(DatabaseInstance &db) : db(db), queue(make_unique<ConcurrentQueue>()) {
141523
141558
  }
141524
141559
 
141525
141560
  TaskScheduler::~TaskScheduler() {
@@ -141556,7 +141591,7 @@ void TaskScheduler::ExecuteForever(atomic<bool> *marker) {
141556
141591
  // loop until the marker is set to false
141557
141592
  while (*marker) {
141558
141593
  // wait for a signal with a timeout; the timeout allows us to periodically check
141559
- queue->semaphore.wait(TASK_TIMEOUT_USECS);
141594
+ queue->semaphore.wait();
141560
141595
  if (queue->q.try_dequeue(task)) {
141561
141596
  task->Execute(TaskExecutionMode::PROCESS_ALL);
141562
141597
  task.reset();
@@ -141567,6 +141602,26 @@ void TaskScheduler::ExecuteForever(atomic<bool> *marker) {
141567
141602
  #endif
141568
141603
  }
141569
141604
 
141605
+ void TaskScheduler::ExecuteTasks(idx_t max_tasks) {
141606
+ #ifndef DUCKDB_NO_THREADS
141607
+ unique_ptr<Task> task;
141608
+ for (idx_t i = 0; i < max_tasks; i++) {
141609
+ queue->semaphore.wait(TASK_TIMEOUT_USECS);
141610
+ if (!queue->q.try_dequeue(task)) {
141611
+ return;
141612
+ }
141613
+ try {
141614
+ task->Execute(TaskExecutionMode::PROCESS_ALL);
141615
+ task.reset();
141616
+ } catch (...) {
141617
+ return;
141618
+ }
141619
+ }
141620
+ #else
141621
+ throw NotImplementedException("DuckDB was compiled without threads! Background thread loop is not allowed.");
141622
+ #endif
141623
+ }
141624
+
141570
141625
  #ifndef DUCKDB_NO_THREADS
141571
141626
  static void ThreadExecuteTasks(TaskScheduler *scheduler, atomic<bool> *marker) {
141572
141627
  scheduler->ExecuteForever(marker);
@@ -141574,7 +141629,8 @@ static void ThreadExecuteTasks(TaskScheduler *scheduler, atomic<bool> *marker) {
141574
141629
  #endif
141575
141630
 
141576
141631
  int32_t TaskScheduler::NumberOfThreads() {
141577
- return threads.size() + 1;
141632
+ auto &config = DBConfig::GetConfig(db);
141633
+ return threads.size() + config.external_threads + 1;
141578
141634
  }
141579
141635
 
141580
141636
  void TaskScheduler::SetThreads(int32_t n) {
@@ -141596,6 +141652,20 @@ void TaskScheduler::SetThreadsInternal(int32_t n) {
141596
141652
  return;
141597
141653
  }
141598
141654
  idx_t new_thread_count = n - 1;
141655
+ if (threads.size() > new_thread_count) {
141656
+ // we are reducing the number of threads: clear all threads first
141657
+ for (idx_t i = 0; i < threads.size(); i++) {
141658
+ *markers[i] = false;
141659
+ }
141660
+ queue->semaphore.signal(threads.size());
141661
+ // now join the threads to ensure they are fully stopped before erasing them
141662
+ for (idx_t i = 0; i < threads.size(); i++) {
141663
+ threads[i]->internal_thread->join();
141664
+ }
141665
+ // erase the threads/markers
141666
+ threads.clear();
141667
+ markers.clear();
141668
+ }
141599
141669
  if (threads.size() < new_thread_count) {
141600
141670
  // we are increasing the number of threads: launch them and run tasks on them
141601
141671
  idx_t create_new_threads = new_thread_count - threads.size();
@@ -141608,18 +141678,6 @@ void TaskScheduler::SetThreadsInternal(int32_t n) {
141608
141678
  threads.push_back(move(thread_wrapper));
141609
141679
  markers.push_back(move(marker));
141610
141680
  }
141611
- } else if (threads.size() > new_thread_count) {
141612
- // we are reducing the number of threads: cancel any threads exceeding new_thread_count
141613
- for (idx_t i = new_thread_count; i < threads.size(); i++) {
141614
- *markers[i] = false;
141615
- }
141616
- // now join the threads to ensure they are fully stopped before erasing them
141617
- for (idx_t i = new_thread_count; i < threads.size(); i++) {
141618
- threads[i]->internal_thread->join();
141619
- }
141620
- // erase the threads/markers
141621
- threads.resize(new_thread_count);
141622
- markers.resize(new_thread_count);
141623
141681
  }
141624
141682
  #endif
141625
141683
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "a90b3be6d"
15
- #define DUCKDB_VERSION "v0.3.5-dev242"
14
+ #define DUCKDB_SOURCE_ID "91d3928b6"
15
+ #define DUCKDB_VERSION "v0.3.5-dev250"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -10593,11 +10593,11 @@ struct ProducerToken {
10593
10593
 
10594
10594
  //! The TaskScheduler is responsible for managing tasks and threads
10595
10595
  class TaskScheduler {
10596
- // timeout for semaphore wait, default 50ms
10597
- constexpr static int64_t TASK_TIMEOUT_USECS = 50000;
10596
+ // timeout for semaphore wait, default 5ms
10597
+ constexpr static int64_t TASK_TIMEOUT_USECS = 5000;
10598
10598
 
10599
10599
  public:
10600
- TaskScheduler();
10600
+ TaskScheduler(DatabaseInstance &db);
10601
10601
  ~TaskScheduler();
10602
10602
 
10603
10603
  static TaskScheduler &GetScheduler(ClientContext &context);
@@ -10610,6 +10610,8 @@ public:
10610
10610
  bool GetTaskFromProducer(ProducerToken &token, unique_ptr<Task> &task);
10611
10611
  //! Run tasks forever until "marker" is set to false, "marker" must remain valid until the thread is joined
10612
10612
  void ExecuteForever(atomic<bool> *marker);
10613
+ //! Run tasks until `max_tasks` have been completed, or until there are no more tasks available
10614
+ void ExecuteTasks(idx_t max_tasks);
10613
10615
 
10614
10616
  //! Sets the amount of active threads executing tasks for the system; n-1 background threads will be launched.
10615
10617
  //! The main thread will also be used for execution
@@ -10620,6 +10622,8 @@ public:
10620
10622
  private:
10621
10623
  void SetThreadsInternal(int32_t n);
10622
10624
 
10625
+ private:
10626
+ DatabaseInstance &db;
10623
10627
  //! The task queue
10624
10628
  unique_ptr<ConcurrentQueue> queue;
10625
10629
  //! The active background threads of the task scheduler
@@ -16893,6 +16897,19 @@ Closes the result and de-allocates all memory allocated for the arrow result.
16893
16897
  */
16894
16898
  DUCKDB_API void duckdb_destroy_arrow(duckdb_arrow *result);
16895
16899
 
16900
+ //===--------------------------------------------------------------------===//
16901
+ // Threading Information
16902
+ //===--------------------------------------------------------------------===//
16903
+ /*!
16904
+ Execute DuckDB tasks on this thread.
16905
+
16906
+ Will return after `max_tasks` have been executed, or if there are no more tasks present.
16907
+
16908
+ * database: The database object to execute tasks for
16909
+ * max_tasks: The maximum amount of tasks to execute
16910
+ */
16911
+ DUCKDB_API void duckdb_execute_tasks(duckdb_database database, idx_t max_tasks);
16912
+
16896
16913
  #ifdef __cplusplus
16897
16914
  }
16898
16915
  #endif
@@ -18141,6 +18158,8 @@ public:
18141
18158
  idx_t maximum_memory = (idx_t)-1;
18142
18159
  //! The maximum amount of CPU threads used by the database system. Default: all available.
18143
18160
  idx_t maximum_threads = (idx_t)-1;
18161
+ //! The number of external threads that work on DuckDB tasks. Default: none.
18162
+ idx_t external_threads = 0;
18144
18163
  //! Whether or not to create and use a temporary directory to store intermediates that do not fit in memory
18145
18164
  bool use_temporary_directory = true;
18146
18165
  //! Directory to store temporary structures that do not fit in memory