duckdb 0.5.2-dev1104.0 → 0.5.2-dev1118.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1104.0",
5
+ "version": "0.5.2-dev1118.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -66378,34 +66378,230 @@ unique_ptr<GlobalSinkState> PhysicalWindow::GetGlobalSinkState(ClientContext &co
66378
66378
  return make_unique<WindowGlobalSinkState>(*this, context);
66379
66379
  }
66380
66380
 
66381
- class WindowMergeTask : public ExecutorTask {
66381
+ enum class WindowSortStage : uint8_t { INIT, PREPARE, MERGE, SORTED };
66382
+
66383
+ class WindowGlobalMergeState;
66384
+
66385
+ class WindowLocalMergeState {
66382
66386
  public:
66383
- WindowMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, WindowGlobalHashGroup &hash_group_p)
66384
- : ExecutorTask(context_p), event(move(event_p)), hash_group(hash_group_p) {
66387
+ WindowLocalMergeState() : merge_state(nullptr), stage(WindowSortStage::INIT) {
66388
+ finished = true;
66385
66389
  }
66386
66390
 
66387
- TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
66388
- // Initialize merge sorted and iterate until done
66389
- auto &global_sort = *hash_group.global_sort;
66391
+ bool TaskFinished() {
66392
+ return finished;
66393
+ }
66394
+ void ExecuteTask();
66395
+
66396
+ WindowGlobalMergeState *merge_state;
66397
+ WindowSortStage stage;
66398
+ atomic<bool> finished;
66399
+ };
66400
+
66401
+ class WindowGlobalMergeState {
66402
+ public:
66403
+ explicit WindowGlobalMergeState(GlobalSortState &sort_state)
66404
+ : sort_state(sort_state), stage(WindowSortStage::INIT), total_tasks(0), tasks_assigned(0), tasks_completed(0) {
66405
+ }
66406
+
66407
+ bool IsSorted() const {
66408
+ lock_guard<mutex> guard(lock);
66409
+ return stage == WindowSortStage::SORTED;
66410
+ }
66411
+
66412
+ bool AssignTask(WindowLocalMergeState &local_state);
66413
+ bool TryPrepareNextStage();
66414
+ void CompleteTask();
66415
+
66416
+ GlobalSortState &sort_state;
66417
+
66418
+ private:
66419
+ mutable mutex lock;
66420
+ WindowSortStage stage;
66421
+ idx_t total_tasks;
66422
+ idx_t tasks_assigned;
66423
+ idx_t tasks_completed;
66424
+ };
66425
+
66426
+ void WindowLocalMergeState::ExecuteTask() {
66427
+ auto &global_sort = merge_state->sort_state;
66428
+ switch (stage) {
66429
+ case WindowSortStage::PREPARE:
66430
+ global_sort.PrepareMergePhase();
66431
+ break;
66432
+ case WindowSortStage::MERGE: {
66390
66433
  MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
66391
66434
  merge_sorter.PerformInMergeRound();
66392
- event->FinishTask();
66393
- return TaskExecutionResult::TASK_FINISHED;
66435
+ break;
66436
+ }
66437
+ default:
66438
+ throw InternalException("Unexpected WindowGlobalMergeState in ExecuteTask!");
66439
+ }
66440
+
66441
+ merge_state->CompleteTask();
66442
+ finished = true;
66443
+ }
66444
+
66445
+ bool WindowGlobalMergeState::AssignTask(WindowLocalMergeState &local_state) {
66446
+ lock_guard<mutex> guard(lock);
66447
+
66448
+ if (tasks_assigned >= total_tasks) {
66449
+ return false;
66450
+ }
66451
+
66452
+ local_state.merge_state = this;
66453
+ local_state.stage = stage;
66454
+ local_state.finished = false;
66455
+ tasks_assigned++;
66456
+
66457
+ return true;
66458
+ }
66459
+
66460
+ void WindowGlobalMergeState::CompleteTask() {
66461
+ lock_guard<mutex> guard(lock);
66462
+
66463
+ ++tasks_completed;
66464
+ }
66465
+
66466
+ bool WindowGlobalMergeState::TryPrepareNextStage() {
66467
+ lock_guard<mutex> guard(lock);
66468
+
66469
+ if (tasks_completed < total_tasks) {
66470
+ return false;
66471
+ }
66472
+
66473
+ tasks_assigned = tasks_completed = 0;
66474
+
66475
+ switch (stage) {
66476
+ case WindowSortStage::INIT:
66477
+ total_tasks = 1;
66478
+ stage = WindowSortStage::PREPARE;
66479
+ return true;
66480
+
66481
+ case WindowSortStage::PREPARE:
66482
+ total_tasks = sort_state.sorted_blocks.size() / 2;
66483
+ if (!total_tasks) {
66484
+ break;
66485
+ }
66486
+ stage = WindowSortStage::MERGE;
66487
+ sort_state.InitializeMergeRound();
66488
+ return true;
66489
+
66490
+ case WindowSortStage::MERGE:
66491
+ sort_state.CompleteMergeRound(true);
66492
+ total_tasks = sort_state.sorted_blocks.size() / 2;
66493
+ if (!total_tasks) {
66494
+ break;
66495
+ }
66496
+ sort_state.InitializeMergeRound();
66497
+ return true;
66498
+
66499
+ case WindowSortStage::SORTED:
66500
+ break;
66501
+ }
66502
+
66503
+ stage = WindowSortStage::SORTED;
66504
+
66505
+ return false;
66506
+ }
66507
+
66508
+ class WindowGlobalMergeStates {
66509
+ public:
66510
+ using WindowGlobalMergeStatePtr = unique_ptr<WindowGlobalMergeState>;
66511
+
66512
+ WindowGlobalMergeStates(WindowGlobalSinkState &sink, idx_t group) {
66513
+ // Schedule all the sorts for maximum thread utilisation
66514
+ for (; group < sink.hash_groups.size(); group = sink.GetNextSortGroup()) {
66515
+ auto &hash_group = *sink.hash_groups[group];
66516
+
66517
+ // Prepare for merge sort phase
66518
+ auto state = make_unique<WindowGlobalMergeState>(*hash_group.global_sort);
66519
+ states.emplace_back(move(state));
66520
+ }
66521
+ }
66522
+
66523
+ vector<WindowGlobalMergeStatePtr> states;
66524
+ };
66525
+
66526
+ class WindowMergeTask : public ExecutorTask {
66527
+ public:
66528
+ WindowMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, WindowGlobalMergeStates &hash_groups_p)
66529
+ : ExecutorTask(context_p), event(move(event_p)), hash_groups(hash_groups_p) {
66394
66530
  }
66395
66531
 
66532
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
66533
+
66396
66534
  private:
66397
66535
  shared_ptr<Event> event;
66398
- WindowGlobalHashGroup &hash_group;
66536
+ WindowLocalMergeState local_state;
66537
+ WindowGlobalMergeStates &hash_groups;
66399
66538
  };
66400
66539
 
66540
+ TaskExecutionResult WindowMergeTask::ExecuteTask(TaskExecutionMode mode) {
66541
+ // Loop until all hash groups are done
66542
+ size_t sorted = 0;
66543
+ while (sorted < hash_groups.states.size()) {
66544
+ // First check if there is an unfinished task for this thread
66545
+ if (!local_state.TaskFinished()) {
66546
+ local_state.ExecuteTask();
66547
+ continue;
66548
+ }
66549
+
66550
+ // Thread is done with its assigned task, try to fetch new work
66551
+ for (auto group = sorted; group < hash_groups.states.size(); ++group) {
66552
+ auto &global_state = hash_groups.states[group];
66553
+ if (global_state->IsSorted()) {
66554
+ // This hash group is done
66555
+ // Update the high water mark of densely completed groups
66556
+ if (sorted == group) {
66557
+ ++sorted;
66558
+ }
66559
+ continue;
66560
+ }
66561
+
66562
+ // Try to assign work for this hash group to this thread
66563
+ if (global_state->AssignTask(local_state)) {
66564
+ // We assigned a task to this thread!
66565
+ // Break out of this loop to re-enter the top-level loop and execute the task
66566
+ break;
66567
+ }
66568
+
66569
+ // Hash group global state couldn't assign a task to this thread
66570
+ // Try to prepare the next stage
66571
+ if (!global_state->TryPrepareNextStage()) {
66572
+ // This current hash group is not yet done
66573
+ // But we were not able to assign a task for it to this thread
66574
+ // See if the next hash group is better
66575
+ continue;
66576
+ }
66577
+
66578
+ // We were able to prepare the next stage for this hash group!
66579
+ // Try to assign a task once more
66580
+ if (global_state->AssignTask(local_state)) {
66581
+ // We assigned a task to this thread!
66582
+ // Break out of this loop to re-enter the top-level loop and execute the task
66583
+ break;
66584
+ }
66585
+
66586
+ // We were able to prepare the next merge round,
66587
+ // but we were not able to assign a task for it to this thread
66588
+ // The tasks were assigned to other threads while this thread waited for the lock
66589
+ // Go to the next iteration to see if another hash group has a task
66590
+ }
66591
+ }
66592
+
66593
+ event->FinishTask();
66594
+ return TaskExecutionResult::TASK_FINISHED;
66595
+ }
66596
+
66401
66597
  class WindowMergeEvent : public BasePipelineEvent {
66402
66598
  public:
66403
- WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
66404
- : BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
66599
+ WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, idx_t group)
66600
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p), merge_states(gstate_p, group) {
66405
66601
  }
66406
66602
 
66407
66603
  WindowGlobalSinkState &gstate;
66408
- WindowGlobalHashGroup &hash_group;
66604
+ WindowGlobalMergeStates merge_states;
66409
66605
 
66410
66606
  public:
66411
66607
  void Schedule() override {
@@ -66417,26 +66613,10 @@ public:
66417
66613
 
66418
66614
  vector<unique_ptr<Task>> merge_tasks;
66419
66615
  for (idx_t tnum = 0; tnum < num_threads; tnum++) {
66420
- merge_tasks.push_back(make_unique<WindowMergeTask>(shared_from_this(), context, hash_group));
66616
+ merge_tasks.push_back(make_unique<WindowMergeTask>(shared_from_this(), context, merge_states));
66421
66617
  }
66422
66618
  SetTasks(move(merge_tasks));
66423
66619
  }
66424
-
66425
- void FinishEvent() override {
66426
- hash_group.global_sort->CompleteMergeRound(true);
66427
- CreateMergeTasks(*pipeline, *this, gstate, hash_group);
66428
- }
66429
-
66430
- static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
66431
- WindowGlobalHashGroup &hash_group) {
66432
-
66433
- // Multiple blocks remaining in the group: Schedule the next round
66434
- if (hash_group.global_sort->sorted_blocks.size() > 1) {
66435
- hash_group.global_sort->InitializeMergeRound();
66436
- auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
66437
- event.InsertEvent(move(new_event));
66438
- }
66439
- }
66440
66620
  };
66441
66621
 
66442
66622
  SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
@@ -66459,13 +66639,8 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
66459
66639
  }
66460
66640
 
66461
66641
  // Schedule all the sorts for maximum thread utilisation
66462
- for (; group < state.hash_groups.size(); group = state.GetNextSortGroup()) {
66463
- auto &hash_group = *state.hash_groups[group];
66464
-
66465
- // Prepare for merge sort phase
66466
- hash_group.PrepareMergePhase();
66467
- WindowMergeEvent::CreateMergeTasks(pipeline, event, state, hash_group);
66468
- }
66642
+ auto new_event = make_shared<WindowMergeEvent>(state, pipeline, group);
66643
+ event.InsertEvent(move(new_event));
66469
66644
 
66470
66645
  return SinkFinalizeType::READY;
66471
66646
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "b8ed45a8d"
15
- #define DUCKDB_VERSION "v0.5.2-dev1104"
14
+ #define DUCKDB_SOURCE_ID "5160f5742"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1118"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -11629,10 +11629,10 @@ public:
11629
11629
  void ScanAtIndex(ColumnDataParallelScanState &state, ColumnDataLocalScanState &lstate, DataChunk &result,
11630
11630
  idx_t chunk_index, idx_t segment_index, idx_t row_index) const;
11631
11631
 
11632
- private:
11633
11632
  //! Initialize the column data collection
11634
11633
  void Initialize(vector<LogicalType> types);
11635
11634
 
11635
+ private:
11636
11636
  //! Creates a new segment within the ColumnDataCollection
11637
11637
  void CreateSegment();
11638
11638