duckdb 0.5.2-dev1096.0 → 0.5.2-dev1112.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1096.0",
5
+ "version": "0.5.2-dev1112.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -58640,19 +58640,6 @@ void Construct(vector<Key> &keys, row_t *row_ids, Node *&node, KeySection &key_s
58640
58640
  }
58641
58641
  }
58642
58642
 
58643
- void FindFirstNotNullKey(vector<Key> &keys, bool &skipped_all_nulls, idx_t &start_idx) {
58644
-
58645
- if (!skipped_all_nulls) {
58646
- for (idx_t i = 0; i < keys.size(); i++) {
58647
- if (!keys[i].Empty()) {
58648
- start_idx = i;
58649
- skipped_all_nulls = true;
58650
- return;
58651
- }
58652
- }
58653
- }
58654
- }
58655
-
58656
58643
  void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator &allocator) {
58657
58644
 
58658
58645
  auto payload_types = logical_types;
@@ -58661,7 +58648,6 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
58661
58648
  ArenaAllocator arena_allocator(allocator);
58662
58649
  vector<Key> keys(STANDARD_VECTOR_SIZE);
58663
58650
 
58664
- auto skipped_all_nulls = false;
58665
58651
  auto temp_art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
58666
58652
  this->constraint_type, this->db);
58667
58653
 
@@ -58686,22 +58672,6 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
58686
58672
  arena_allocator.Reset();
58687
58673
  GenerateKeys(arena_allocator, ordered_chunk, keys);
58688
58674
 
58689
- // we order NULLS FIRST, so we might have to skip nulls at the start of our sorted data
58690
- idx_t start_idx = 0;
58691
- FindFirstNotNullKey(keys, skipped_all_nulls, start_idx);
58692
-
58693
- if (start_idx != 0 && IsPrimary()) {
58694
- throw ConstraintException("NULLs in new data violate the primary key constraint of the index");
58695
- }
58696
-
58697
- if (!skipped_all_nulls) {
58698
- if (IsPrimary()) {
58699
- // chunk consists only of NULLs
58700
- throw ConstraintException("NULLs in new data violate the primary key constraint of the index");
58701
- }
58702
- continue;
58703
- }
58704
-
58705
58675
  // prepare the row_identifiers
58706
58676
  row_identifiers.Flatten(ordered_chunk.size());
58707
58677
  auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
@@ -58709,7 +58679,7 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
58709
58679
  // construct the ART of this chunk
58710
58680
  auto art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
58711
58681
  this->constraint_type, this->db);
58712
- auto key_section = KeySection(start_idx, ordered_chunk.size() - 1, 0, 0);
58682
+ auto key_section = KeySection(0, ordered_chunk.size() - 1, 0, 0);
58713
58683
  auto has_constraint = IsUnique();
58714
58684
  Construct(keys, row_ids, art->tree, key_section, has_constraint);
58715
58685
 
@@ -66408,34 +66378,230 @@ unique_ptr<GlobalSinkState> PhysicalWindow::GetGlobalSinkState(ClientContext &co
66408
66378
  return make_unique<WindowGlobalSinkState>(*this, context);
66409
66379
  }
66410
66380
 
66411
- class WindowMergeTask : public ExecutorTask {
66381
+ enum class WindowSortStage : uint8_t { INIT, PREPARE, MERGE, SORTED };
66382
+
66383
+ class WindowGlobalMergeState;
66384
+
66385
+ class WindowLocalMergeState {
66412
66386
  public:
66413
- WindowMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, WindowGlobalHashGroup &hash_group_p)
66414
- : ExecutorTask(context_p), event(move(event_p)), hash_group(hash_group_p) {
66387
+ WindowLocalMergeState() : merge_state(nullptr), stage(WindowSortStage::INIT) {
66388
+ finished = true;
66415
66389
  }
66416
66390
 
66417
- TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
66418
- // Initialize merge sorted and iterate until done
66419
- auto &global_sort = *hash_group.global_sort;
66391
+ bool TaskFinished() {
66392
+ return finished;
66393
+ }
66394
+ void ExecuteTask();
66395
+
66396
+ WindowGlobalMergeState *merge_state;
66397
+ WindowSortStage stage;
66398
+ atomic<bool> finished;
66399
+ };
66400
+
66401
+ class WindowGlobalMergeState {
66402
+ public:
66403
+ explicit WindowGlobalMergeState(GlobalSortState &sort_state)
66404
+ : sort_state(sort_state), stage(WindowSortStage::INIT), total_tasks(0), tasks_assigned(0), tasks_completed(0) {
66405
+ }
66406
+
66407
+ bool IsSorted() const {
66408
+ lock_guard<mutex> guard(lock);
66409
+ return stage == WindowSortStage::SORTED;
66410
+ }
66411
+
66412
+ bool AssignTask(WindowLocalMergeState &local_state);
66413
+ bool TryPrepareNextStage();
66414
+ void CompleteTask();
66415
+
66416
+ GlobalSortState &sort_state;
66417
+
66418
+ private:
66419
+ mutable mutex lock;
66420
+ WindowSortStage stage;
66421
+ idx_t total_tasks;
66422
+ idx_t tasks_assigned;
66423
+ idx_t tasks_completed;
66424
+ };
66425
+
66426
+ void WindowLocalMergeState::ExecuteTask() {
66427
+ auto &global_sort = merge_state->sort_state;
66428
+ switch (stage) {
66429
+ case WindowSortStage::PREPARE:
66430
+ global_sort.PrepareMergePhase();
66431
+ break;
66432
+ case WindowSortStage::MERGE: {
66420
66433
  MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
66421
66434
  merge_sorter.PerformInMergeRound();
66422
- event->FinishTask();
66423
- return TaskExecutionResult::TASK_FINISHED;
66435
+ break;
66436
+ }
66437
+ default:
66438
+ throw InternalException("Unexpected WindowGlobalMergeState in ExecuteTask!");
66439
+ }
66440
+
66441
+ merge_state->CompleteTask();
66442
+ finished = true;
66443
+ }
66444
+
66445
+ bool WindowGlobalMergeState::AssignTask(WindowLocalMergeState &local_state) {
66446
+ lock_guard<mutex> guard(lock);
66447
+
66448
+ if (tasks_assigned >= total_tasks) {
66449
+ return false;
66424
66450
  }
66425
66451
 
66452
+ local_state.merge_state = this;
66453
+ local_state.stage = stage;
66454
+ local_state.finished = false;
66455
+ tasks_assigned++;
66456
+
66457
+ return true;
66458
+ }
66459
+
66460
+ void WindowGlobalMergeState::CompleteTask() {
66461
+ lock_guard<mutex> guard(lock);
66462
+
66463
+ ++tasks_completed;
66464
+ }
66465
+
66466
+ bool WindowGlobalMergeState::TryPrepareNextStage() {
66467
+ lock_guard<mutex> guard(lock);
66468
+
66469
+ if (tasks_completed < total_tasks) {
66470
+ return false;
66471
+ }
66472
+
66473
+ tasks_assigned = tasks_completed = 0;
66474
+
66475
+ switch (stage) {
66476
+ case WindowSortStage::INIT:
66477
+ total_tasks = 1;
66478
+ stage = WindowSortStage::PREPARE;
66479
+ return true;
66480
+
66481
+ case WindowSortStage::PREPARE:
66482
+ total_tasks = sort_state.sorted_blocks.size() / 2;
66483
+ if (!total_tasks) {
66484
+ break;
66485
+ }
66486
+ stage = WindowSortStage::MERGE;
66487
+ sort_state.InitializeMergeRound();
66488
+ return true;
66489
+
66490
+ case WindowSortStage::MERGE:
66491
+ sort_state.CompleteMergeRound(true);
66492
+ total_tasks = sort_state.sorted_blocks.size() / 2;
66493
+ if (!total_tasks) {
66494
+ break;
66495
+ }
66496
+ sort_state.InitializeMergeRound();
66497
+ return true;
66498
+
66499
+ case WindowSortStage::SORTED:
66500
+ break;
66501
+ }
66502
+
66503
+ stage = WindowSortStage::SORTED;
66504
+
66505
+ return false;
66506
+ }
66507
+
66508
+ class WindowGlobalMergeStates {
66509
+ public:
66510
+ using WindowGlobalMergeStatePtr = unique_ptr<WindowGlobalMergeState>;
66511
+
66512
+ WindowGlobalMergeStates(WindowGlobalSinkState &sink, idx_t group) {
66513
+ // Schedule all the sorts for maximum thread utilisation
66514
+ for (; group < sink.hash_groups.size(); group = sink.GetNextSortGroup()) {
66515
+ auto &hash_group = *sink.hash_groups[group];
66516
+
66517
+ // Prepare for merge sort phase
66518
+ auto state = make_unique<WindowGlobalMergeState>(*hash_group.global_sort);
66519
+ states.emplace_back(move(state));
66520
+ }
66521
+ }
66522
+
66523
+ vector<WindowGlobalMergeStatePtr> states;
66524
+ };
66525
+
66526
+ class WindowMergeTask : public ExecutorTask {
66527
+ public:
66528
+ WindowMergeTask(shared_ptr<Event> event_p, ClientContext &context_p, WindowGlobalMergeStates &hash_groups_p)
66529
+ : ExecutorTask(context_p), event(move(event_p)), hash_groups(hash_groups_p) {
66530
+ }
66531
+
66532
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override;
66533
+
66426
66534
  private:
66427
66535
  shared_ptr<Event> event;
66428
- WindowGlobalHashGroup &hash_group;
66536
+ WindowLocalMergeState local_state;
66537
+ WindowGlobalMergeStates &hash_groups;
66429
66538
  };
66430
66539
 
66540
+ TaskExecutionResult WindowMergeTask::ExecuteTask(TaskExecutionMode mode) {
66541
+ // Loop until all hash groups are done
66542
+ size_t sorted = 0;
66543
+ while (sorted < hash_groups.states.size()) {
66544
+ // First check if there is an unfinished task for this thread
66545
+ if (!local_state.TaskFinished()) {
66546
+ local_state.ExecuteTask();
66547
+ continue;
66548
+ }
66549
+
66550
+ // Thread is done with its assigned task, try to fetch new work
66551
+ for (auto group = sorted; group < hash_groups.states.size(); ++group) {
66552
+ auto &global_state = hash_groups.states[group];
66553
+ if (global_state->IsSorted()) {
66554
+ // This hash group is done
66555
+ // Update the high water mark of densely completed groups
66556
+ if (sorted == group) {
66557
+ ++sorted;
66558
+ }
66559
+ continue;
66560
+ }
66561
+
66562
+ // Try to assign work for this hash group to this thread
66563
+ if (global_state->AssignTask(local_state)) {
66564
+ // We assigned a task to this thread!
66565
+ // Break out of this loop to re-enter the top-level loop and execute the task
66566
+ break;
66567
+ }
66568
+
66569
+ // Hash group global state couldn't assign a task to this thread
66570
+ // Try to prepare the next stage
66571
+ if (!global_state->TryPrepareNextStage()) {
66572
+ // This current hash group is not yet done
66573
+ // But we were not able to assign a task for it to this thread
66574
+ // See if the next hash group is better
66575
+ continue;
66576
+ }
66577
+
66578
+ // We were able to prepare the next stage for this hash group!
66579
+ // Try to assign a task once more
66580
+ if (global_state->AssignTask(local_state)) {
66581
+ // We assigned a task to this thread!
66582
+ // Break out of this loop to re-enter the top-level loop and execute the task
66583
+ break;
66584
+ }
66585
+
66586
+ // We were able to prepare the next merge round,
66587
+ // but we were not able to assign a task for it to this thread
66588
+ // The tasks were assigned to other threads while this thread waited for the lock
66589
+ // Go to the next iteration to see if another hash group has a task
66590
+ }
66591
+ }
66592
+
66593
+ event->FinishTask();
66594
+ return TaskExecutionResult::TASK_FINISHED;
66595
+ }
66596
+
66431
66597
  class WindowMergeEvent : public BasePipelineEvent {
66432
66598
  public:
66433
- WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
66434
- : BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
66599
+ WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, idx_t group)
66600
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p), merge_states(gstate_p, group) {
66435
66601
  }
66436
66602
 
66437
66603
  WindowGlobalSinkState &gstate;
66438
- WindowGlobalHashGroup &hash_group;
66604
+ WindowGlobalMergeStates merge_states;
66439
66605
 
66440
66606
  public:
66441
66607
  void Schedule() override {
@@ -66447,26 +66613,10 @@ public:
66447
66613
 
66448
66614
  vector<unique_ptr<Task>> merge_tasks;
66449
66615
  for (idx_t tnum = 0; tnum < num_threads; tnum++) {
66450
- merge_tasks.push_back(make_unique<WindowMergeTask>(shared_from_this(), context, hash_group));
66616
+ merge_tasks.push_back(make_unique<WindowMergeTask>(shared_from_this(), context, merge_states));
66451
66617
  }
66452
66618
  SetTasks(move(merge_tasks));
66453
66619
  }
66454
-
66455
- void FinishEvent() override {
66456
- hash_group.global_sort->CompleteMergeRound(true);
66457
- CreateMergeTasks(*pipeline, *this, gstate, hash_group);
66458
- }
66459
-
66460
- static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
66461
- WindowGlobalHashGroup &hash_group) {
66462
-
66463
- // Multiple blocks remaining in the group: Schedule the next round
66464
- if (hash_group.global_sort->sorted_blocks.size() > 1) {
66465
- hash_group.global_sort->InitializeMergeRound();
66466
- auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
66467
- event.InsertEvent(move(new_event));
66468
- }
66469
- }
66470
66620
  };
66471
66621
 
66472
66622
  SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
@@ -66489,13 +66639,8 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
66489
66639
  }
66490
66640
 
66491
66641
  // Schedule all the sorts for maximum thread utilisation
66492
- for (; group < state.hash_groups.size(); group = state.GetNextSortGroup()) {
66493
- auto &hash_group = *state.hash_groups[group];
66494
-
66495
- // Prepare for merge sort phase
66496
- hash_group.PrepareMergePhase();
66497
- WindowMergeEvent::CreateMergeTasks(pipeline, event, state, hash_group);
66498
- }
66642
+ auto new_event = make_shared<WindowMergeEvent>(state, pipeline, group);
66643
+ event.InsertEvent(move(new_event));
66499
66644
 
66500
66645
  return SinkFinalizeType::READY;
66501
66646
  }
@@ -82706,17 +82851,22 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreate &op
82706
82851
 
82707
82852
 
82708
82853
 
82854
+
82855
+
82856
+
82709
82857
  namespace duckdb {
82710
82858
 
82711
82859
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) {
82712
82860
 
82713
82861
  D_ASSERT(op.children.empty());
82714
82862
 
82863
+ // table scan operator for index key columns and row IDs
82715
82864
  unique_ptr<TableFilterSet> table_filters;
82716
82865
  op.info->column_ids.emplace_back(COLUMN_IDENTIFIER_ROW_ID);
82717
82866
 
82718
82867
  auto &bind_data = (TableScanBindData &)*op.bind_data;
82719
82868
  bind_data.is_create_index = true;
82869
+
82720
82870
  auto table_scan =
82721
82871
  make_unique<PhysicalTableScan>(op.info->scan_types, op.function, move(op.bind_data), op.info->column_ids,
82722
82872
  op.info->names, move(table_filters), op.estimated_cardinality);
@@ -82724,10 +82874,32 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
82724
82874
  dependencies.insert(&op.table);
82725
82875
  op.info->column_ids.pop_back();
82726
82876
 
82877
+ D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size());
82878
+ D_ASSERT(op.info->scan_types.size() - 1 <= op.info->column_ids.size());
82879
+
82880
+ // filter operator for IS_NOT_NULL on each key column
82881
+ vector<LogicalType> filter_types;
82882
+ vector<unique_ptr<Expression>> filter_select_list;
82883
+
82884
+ for (idx_t i = 0; i < op.info->scan_types.size() - 1; i++) {
82885
+ filter_types.push_back(op.info->scan_types[i]);
82886
+ auto is_not_null_expr =
82887
+ make_unique<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN);
82888
+ auto bound_ref =
82889
+ make_unique<BoundReferenceExpression>(op.info->names[op.info->column_ids[i]], op.info->scan_types[i], i);
82890
+ is_not_null_expr->children.push_back(move(bound_ref));
82891
+ filter_select_list.push_back(move(is_not_null_expr));
82892
+ }
82893
+
82894
+ auto null_filter = make_unique<PhysicalFilter>(move(filter_types), move(filter_select_list), STANDARD_VECTOR_SIZE);
82895
+ null_filter->types.emplace_back(LogicalType::ROW_TYPE);
82896
+ null_filter->children.push_back(move(table_scan));
82897
+
82898
+ // actual physical create index operator
82727
82899
  auto physical_create_index =
82728
82900
  make_unique<PhysicalCreateIndex>(op, op.table, op.info->column_ids, move(op.expressions), move(op.info),
82729
82901
  move(op.unbound_expressions), op.estimated_cardinality);
82730
- physical_create_index->children.push_back(move(table_scan));
82902
+ physical_create_index->children.push_back(move(null_filter));
82731
82903
  return move(physical_create_index);
82732
82904
  }
82733
82905
 
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "efa7dfa64"
15
- #define DUCKDB_VERSION "v0.5.2-dev1096"
14
+ #define DUCKDB_SOURCE_ID "d58f34a7e"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1112"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //