duckdb 0.3.5-dev242.0 → 0.3.5-dev256.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.5-dev242.0",
4
+ "version": "0.3.5-dev256.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -58827,10 +58827,10 @@ static void SortCollectionForPartition(WindowOperatorState &state, BoundWindowEx
58827
58827
 
58828
58828
  // fuse input and sort collection into one
58829
58829
  // (sorting columns are not decoded, and we need them later)
58830
- ChunkCollection payload;
58831
- payload.Fuse(input);
58832
- payload.Fuse(over);
58833
- auto payload_types = payload.Types();
58830
+ auto payload_types = input.Types();
58831
+ payload_types.insert(payload_types.end(), over.Types().begin(), over.Types().end());
58832
+ DataChunk payload_chunk;
58833
+ payload_chunk.InitializeEmpty(payload_types);
58834
58834
 
58835
58835
  // initialise partitioning memory
58836
58836
  // to minimise copying, we fill up a chunk and then sink it.
@@ -58856,8 +58856,15 @@ static void SortCollectionForPartition(WindowOperatorState &state, BoundWindowEx
58856
58856
  // sink collection chunks into row format
58857
58857
  const idx_t chunk_count = over.ChunkCount();
58858
58858
  for (idx_t i = 0; i < chunk_count; i++) {
58859
+ auto &input_chunk = *input.Chunks()[i];
58860
+ for (idx_t col_idx = 0; col_idx < input_chunk.ColumnCount(); ++col_idx) {
58861
+ payload_chunk.data[col_idx].Reference(input_chunk.data[col_idx]);
58862
+ }
58859
58863
  auto &over_chunk = *over.Chunks()[i];
58860
- auto &payload_chunk = *payload.Chunks()[i];
58864
+ for (idx_t col_idx = 0; col_idx < over_chunk.ColumnCount(); ++col_idx) {
58865
+ payload_chunk.data[input_chunk.ColumnCount() + col_idx].Reference(over_chunk.data[col_idx]);
58866
+ }
58867
+ payload_chunk.SetCardinality(input_chunk);
58861
58868
 
58862
58869
  // Extract the hash partition, if any
58863
58870
  if (hashes) {
@@ -83735,7 +83742,7 @@ static void HistogramFinalize(Vector &state_vector, FunctionData *, Vector &resu
83735
83742
  list_struct_data = FlatVector::GetData<list_entry_t>(*count_list);
83736
83743
  list_struct_data[rid].length = ListVector::GetListSize(*count_list) - old_len;
83737
83744
  list_struct_data[rid].offset = old_len;
83738
- old_len = list_struct_data[rid].length;
83745
+ old_len += list_struct_data[rid].length;
83739
83746
  }
83740
83747
  }
83741
83748
 
@@ -110790,6 +110797,19 @@ void duckdb_function_set_error(duckdb_function_info info, const char *error) {
110790
110797
 
110791
110798
 
110792
110799
 
110800
+ using duckdb::DatabaseData;
110801
+
110802
+ void duckdb_execute_tasks(duckdb_database database, idx_t max_tasks) {
110803
+ if (!database) {
110804
+ return;
110805
+ }
110806
+ auto wrapper = (DatabaseData *)database;
110807
+ auto &scheduler = duckdb::TaskScheduler::GetScheduler(*wrapper->database->instance);
110808
+ scheduler.ExecuteTasks(max_tasks);
110809
+ }
110810
+
110811
+
110812
+
110793
110813
 
110794
110814
 
110795
110815
 
@@ -112850,6 +112870,14 @@ struct ExplainOutputSetting {
112850
112870
  static Value GetSetting(ClientContext &context);
112851
112871
  };
112852
112872
 
112873
+ struct ExternalThreadsSetting {
112874
+ static constexpr const char *Name = "external_threads";
112875
+ static constexpr const char *Description = "The number of external threads that work on DuckDB tasks.";
112876
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::BIGINT;
112877
+ static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
112878
+ static Value GetSetting(ClientContext &context);
112879
+ };
112880
+
112853
112881
  struct ForceCompressionSetting {
112854
112882
  static constexpr const char *Name = "force_compression";
112855
112883
  static constexpr const char *Description = "DEBUG SETTING: forces a specific compression method to be used";
@@ -112997,6 +113025,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
112997
113025
  DUCKDB_LOCAL(EnableProfilingSetting),
112998
113026
  DUCKDB_LOCAL(EnableProgressBarSetting),
112999
113027
  DUCKDB_LOCAL(ExplainOutputSetting),
113028
+ DUCKDB_GLOBAL(ExternalThreadsSetting),
113000
113029
  DUCKDB_GLOBAL(ForceCompressionSetting),
113001
113030
  DUCKDB_LOCAL(LogQueryPathSetting),
113002
113031
  DUCKDB_GLOBAL(MaximumMemorySetting),
@@ -113772,7 +113801,7 @@ void DatabaseInstance::Initialize(const char *path, DBConfig *new_config) {
113772
113801
  make_unique<StorageManager>(*this, path ? string(path) : string(), config.access_mode == AccessMode::READ_ONLY);
113773
113802
  catalog = make_unique<Catalog>(*this);
113774
113803
  transaction_manager = make_unique<TransactionManager>(*this);
113775
- scheduler = make_unique<TaskScheduler>();
113804
+ scheduler = make_unique<TaskScheduler>(*this);
113776
113805
  object_cache = make_unique<ObjectCache>();
113777
113806
  connection_manager = make_unique<ConnectionManager>();
113778
113807
 
@@ -113862,6 +113891,7 @@ void DatabaseInstance::Configure(DBConfig &new_config) {
113862
113891
  } else {
113863
113892
  config.maximum_threads = new_config.maximum_threads;
113864
113893
  }
113894
+ config.external_threads = new_config.external_threads;
113865
113895
  config.load_extensions = new_config.load_extensions;
113866
113896
  config.force_compression = new_config.force_compression;
113867
113897
  config.allocator = move(new_config.allocator);
@@ -126514,6 +126544,18 @@ Value ExplainOutputSetting::GetSetting(ClientContext &context) {
126514
126544
  }
126515
126545
  }
126516
126546
 
126547
+ //===--------------------------------------------------------------------===//
126548
+ // External Threads Setting
126549
+ //===--------------------------------------------------------------------===//
126550
+ void ExternalThreadsSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
126551
+ config.external_threads = input.GetValue<int64_t>();
126552
+ }
126553
+
126554
+ Value ExternalThreadsSetting::GetSetting(ClientContext &context) {
126555
+ auto &config = DBConfig::GetConfig(context);
126556
+ return Value::BIGINT(config.external_threads);
126557
+ }
126558
+
126517
126559
  //===--------------------------------------------------------------------===//
126518
126560
  // Force Compression
126519
126561
  //===--------------------------------------------------------------------===//
@@ -141519,7 +141561,7 @@ ProducerToken::ProducerToken(TaskScheduler &scheduler, unique_ptr<QueueProducerT
141519
141561
  ProducerToken::~ProducerToken() {
141520
141562
  }
141521
141563
 
141522
- TaskScheduler::TaskScheduler() : queue(make_unique<ConcurrentQueue>()) {
141564
+ TaskScheduler::TaskScheduler(DatabaseInstance &db) : db(db), queue(make_unique<ConcurrentQueue>()) {
141523
141565
  }
141524
141566
 
141525
141567
  TaskScheduler::~TaskScheduler() {
@@ -141556,7 +141598,7 @@ void TaskScheduler::ExecuteForever(atomic<bool> *marker) {
141556
141598
  // loop until the marker is set to false
141557
141599
  while (*marker) {
141558
141600
  // wait for a signal with a timeout; the timeout allows us to periodically check
141559
- queue->semaphore.wait(TASK_TIMEOUT_USECS);
141601
+ queue->semaphore.wait();
141560
141602
  if (queue->q.try_dequeue(task)) {
141561
141603
  task->Execute(TaskExecutionMode::PROCESS_ALL);
141562
141604
  task.reset();
@@ -141567,6 +141609,26 @@ void TaskScheduler::ExecuteForever(atomic<bool> *marker) {
141567
141609
  #endif
141568
141610
  }
141569
141611
 
141612
+ void TaskScheduler::ExecuteTasks(idx_t max_tasks) {
141613
+ #ifndef DUCKDB_NO_THREADS
141614
+ unique_ptr<Task> task;
141615
+ for (idx_t i = 0; i < max_tasks; i++) {
141616
+ queue->semaphore.wait(TASK_TIMEOUT_USECS);
141617
+ if (!queue->q.try_dequeue(task)) {
141618
+ return;
141619
+ }
141620
+ try {
141621
+ task->Execute(TaskExecutionMode::PROCESS_ALL);
141622
+ task.reset();
141623
+ } catch (...) {
141624
+ return;
141625
+ }
141626
+ }
141627
+ #else
141628
+ throw NotImplementedException("DuckDB was compiled without threads! Background thread loop is not allowed.");
141629
+ #endif
141630
+ }
141631
+
141570
141632
  #ifndef DUCKDB_NO_THREADS
141571
141633
  static void ThreadExecuteTasks(TaskScheduler *scheduler, atomic<bool> *marker) {
141572
141634
  scheduler->ExecuteForever(marker);
@@ -141574,7 +141636,8 @@ static void ThreadExecuteTasks(TaskScheduler *scheduler, atomic<bool> *marker) {
141574
141636
  #endif
141575
141637
 
141576
141638
  int32_t TaskScheduler::NumberOfThreads() {
141577
- return threads.size() + 1;
141639
+ auto &config = DBConfig::GetConfig(db);
141640
+ return threads.size() + config.external_threads + 1;
141578
141641
  }
141579
141642
 
141580
141643
  void TaskScheduler::SetThreads(int32_t n) {
@@ -141596,6 +141659,20 @@ void TaskScheduler::SetThreadsInternal(int32_t n) {
141596
141659
  return;
141597
141660
  }
141598
141661
  idx_t new_thread_count = n - 1;
141662
+ if (threads.size() > new_thread_count) {
141663
+ // we are reducing the number of threads: clear all threads first
141664
+ for (idx_t i = 0; i < threads.size(); i++) {
141665
+ *markers[i] = false;
141666
+ }
141667
+ queue->semaphore.signal(threads.size());
141668
+ // now join the threads to ensure they are fully stopped before erasing them
141669
+ for (idx_t i = 0; i < threads.size(); i++) {
141670
+ threads[i]->internal_thread->join();
141671
+ }
141672
+ // erase the threads/markers
141673
+ threads.clear();
141674
+ markers.clear();
141675
+ }
141599
141676
  if (threads.size() < new_thread_count) {
141600
141677
  // we are increasing the number of threads: launch them and run tasks on them
141601
141678
  idx_t create_new_threads = new_thread_count - threads.size();
@@ -141608,18 +141685,6 @@ void TaskScheduler::SetThreadsInternal(int32_t n) {
141608
141685
  threads.push_back(move(thread_wrapper));
141609
141686
  markers.push_back(move(marker));
141610
141687
  }
141611
- } else if (threads.size() > new_thread_count) {
141612
- // we are reducing the number of threads: cancel any threads exceeding new_thread_count
141613
- for (idx_t i = new_thread_count; i < threads.size(); i++) {
141614
- *markers[i] = false;
141615
- }
141616
- // now join the threads to ensure they are fully stopped before erasing them
141617
- for (idx_t i = new_thread_count; i < threads.size(); i++) {
141618
- threads[i]->internal_thread->join();
141619
- }
141620
- // erase the threads/markers
141621
- threads.resize(new_thread_count);
141622
- markers.resize(new_thread_count);
141623
141688
  }
141624
141689
  #endif
141625
141690
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "a90b3be6d"
15
- #define DUCKDB_VERSION "v0.3.5-dev242"
14
+ #define DUCKDB_SOURCE_ID "6962a87a9"
15
+ #define DUCKDB_VERSION "v0.3.5-dev256"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -10593,11 +10593,11 @@ struct ProducerToken {
10593
10593
 
10594
10594
  //! The TaskScheduler is responsible for managing tasks and threads
10595
10595
  class TaskScheduler {
10596
- // timeout for semaphore wait, default 50ms
10597
- constexpr static int64_t TASK_TIMEOUT_USECS = 50000;
10596
+ // timeout for semaphore wait, default 5ms
10597
+ constexpr static int64_t TASK_TIMEOUT_USECS = 5000;
10598
10598
 
10599
10599
  public:
10600
- TaskScheduler();
10600
+ TaskScheduler(DatabaseInstance &db);
10601
10601
  ~TaskScheduler();
10602
10602
 
10603
10603
  static TaskScheduler &GetScheduler(ClientContext &context);
@@ -10610,6 +10610,8 @@ public:
10610
10610
  bool GetTaskFromProducer(ProducerToken &token, unique_ptr<Task> &task);
10611
10611
  //! Run tasks forever until "marker" is set to false, "marker" must remain valid until the thread is joined
10612
10612
  void ExecuteForever(atomic<bool> *marker);
10613
+ //! Run tasks until `max_tasks` have been completed, or until there are no more tasks available
10614
+ void ExecuteTasks(idx_t max_tasks);
10613
10615
 
10614
10616
  //! Sets the amount of active threads executing tasks for the system; n-1 background threads will be launched.
10615
10617
  //! The main thread will also be used for execution
@@ -10620,6 +10622,8 @@ public:
10620
10622
  private:
10621
10623
  void SetThreadsInternal(int32_t n);
10622
10624
 
10625
+ private:
10626
+ DatabaseInstance &db;
10623
10627
  //! The task queue
10624
10628
  unique_ptr<ConcurrentQueue> queue;
10625
10629
  //! The active background threads of the task scheduler
@@ -16893,6 +16897,19 @@ Closes the result and de-allocates all memory allocated for the arrow result.
16893
16897
  */
16894
16898
  DUCKDB_API void duckdb_destroy_arrow(duckdb_arrow *result);
16895
16899
 
16900
+ //===--------------------------------------------------------------------===//
16901
+ // Threading Information
16902
+ //===--------------------------------------------------------------------===//
16903
+ /*!
16904
+ Execute DuckDB tasks on this thread.
16905
+
16906
+ Will return after `max_tasks` have been executed, or if there are no more tasks present.
16907
+
16908
+ * database: The database object to execute tasks for
16909
+ * max_tasks: The maximum amount of tasks to execute
16910
+ */
16911
+ DUCKDB_API void duckdb_execute_tasks(duckdb_database database, idx_t max_tasks);
16912
+
16896
16913
  #ifdef __cplusplus
16897
16914
  }
16898
16915
  #endif
@@ -18141,6 +18158,8 @@ public:
18141
18158
  idx_t maximum_memory = (idx_t)-1;
18142
18159
  //! The maximum amount of CPU threads used by the database system. Default: all available.
18143
18160
  idx_t maximum_threads = (idx_t)-1;
18161
+ //! The number of external threads that work on DuckDB tasks. Default: none.
18162
+ idx_t external_threads = 0;
18144
18163
  //! Whether or not to create and use a temporary directory to store intermediates that do not fit in memory
18145
18164
  bool use_temporary_directory = true;
18146
18165
  //! Directory to store temporary structures that do not fit in memory