duckdb 0.4.1-dev1530.0 → 0.4.1-dev1544.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.4.1-dev1530.0",
4
+ "version": "0.4.1-dev1544.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -68473,7 +68473,7 @@ public:
68473
68473
 
68474
68474
  class HashJoinGlobalState : public GlobalSinkState {
68475
68475
  public:
68476
- HashJoinGlobalState() {
68476
+ HashJoinGlobalState() : scanned_data(false) {
68477
68477
  }
68478
68478
 
68479
68479
  //! The HT used by the join
@@ -68482,6 +68482,8 @@ public:
68482
68482
  unique_ptr<PerfectHashJoinExecutor> perfect_join_executor;
68483
68483
  //! Whether or not the hash table has been finalized
68484
68484
  bool finalized = false;
68485
+ //! Whether or not we have started scanning data using GetData
68486
+ atomic<bool> scanned_data;
68485
68487
  };
68486
68488
 
68487
68489
  unique_ptr<GlobalSinkState> PhysicalHashJoin::GetGlobalSinkState(ClientContext &context) const {
@@ -68640,6 +68642,7 @@ OperatorResultType PhysicalHashJoin::Execute(ExecutionContext &context, DataChun
68640
68642
  auto &state = (PhysicalHashJoinState &)state_p;
68641
68643
  auto &sink = (HashJoinGlobalState &)*sink_state;
68642
68644
  D_ASSERT(sink.finalized);
68645
+ D_ASSERT(!sink.scanned_data);
68643
68646
 
68644
68647
  if (sink.hash_table->Count() == 0 && EmptyResultIfRHSIsEmpty()) {
68645
68648
  return OperatorResultType::FINISHED;
@@ -68702,6 +68705,7 @@ void PhysicalHashJoin::GetData(ExecutionContext &context, DataChunk &chunk, Glob
68702
68705
  // check if we need to scan any unmatched tuples from the RHS for the full/right outer join
68703
68706
  auto &sink = (HashJoinGlobalState &)*sink_state;
68704
68707
  auto &state = (HashJoinScanState &)gstate;
68708
+ sink.scanned_data = true;
68705
68709
  sink.hash_table->ScanFullOuter(chunk, state.ht_scan_state);
68706
68710
  }
68707
68711
 
@@ -147624,52 +147628,96 @@ struct PipelineEventStack {
147624
147628
  Event *pipeline_complete_event;
147625
147629
  };
147626
147630
 
147627
- Pipeline *Executor::ScheduleUnionPipeline(const shared_ptr<Pipeline> &pipeline, const Pipeline *parent,
147628
- event_map_t &event_map, vector<shared_ptr<Event>> &events) {
147629
- pipeline->Ready();
147631
+ using event_map_t = unordered_map<const Pipeline *, PipelineEventStack>;
147632
+
147633
+ struct ScheduleEventData {
147634
+ ScheduleEventData(const vector<shared_ptr<Pipeline>> &pipelines,
147635
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &child_pipelines,
147636
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &union_pipelines,
147637
+ unordered_map<Pipeline *, vector<Pipeline *>> &child_dependencies,
147638
+ vector<shared_ptr<Event>> &events, bool initial_schedule)
147639
+ : pipelines(pipelines), child_pipelines(child_pipelines), union_pipelines(union_pipelines),
147640
+ child_dependencies(child_dependencies), events(events), initial_schedule(initial_schedule) {
147641
+ }
147642
+
147643
+ const vector<shared_ptr<Pipeline>> &pipelines;
147644
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &child_pipelines;
147645
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &union_pipelines;
147646
+ unordered_map<Pipeline *, vector<Pipeline *>> &child_dependencies;
147647
+ unordered_map<Pipeline *, vector<Pipeline *>> scheduled_pipelines;
147648
+ vector<shared_ptr<Event>> &events;
147649
+ bool initial_schedule;
147650
+ event_map_t event_map;
147651
+ };
147630
147652
 
147653
+ void Executor::SchedulePipeline(const shared_ptr<Pipeline> &pipeline, ScheduleEventData &event_data,
147654
+ vector<Pipeline *> &scheduled_pipelines) {
147631
147655
  D_ASSERT(pipeline);
147632
- auto pipeline_event = make_shared<PipelineEvent>(pipeline);
147633
147656
 
147634
- auto parent_stack_entry = event_map.find(parent);
147635
- D_ASSERT(parent_stack_entry != event_map.end());
147657
+ auto &event_map = event_data.event_map;
147658
+ auto &events = event_data.events;
147659
+ auto &union_pipelines = event_data.union_pipelines;
147660
+ pipeline->Ready();
147636
147661
 
147637
- auto &parent_stack = parent_stack_entry->second;
147662
+ auto pipeline_event = make_shared<PipelineEvent>(pipeline);
147638
147663
 
147639
147664
  PipelineEventStack stack;
147640
147665
  stack.pipeline_event = pipeline_event.get();
147641
- stack.pipeline_finish_event = parent_stack.pipeline_finish_event;
147642
- stack.pipeline_complete_event = parent_stack.pipeline_complete_event;
147666
+ if (!scheduled_pipelines.empty()) {
147667
+ // this pipeline has a parent pipeline - i.e. it is scheduled as part of a `UNION`
147668
+ // set up the events
147669
+ auto parent = scheduled_pipelines.back();
147670
+ auto parent_stack_entry = event_map.find(parent);
147671
+ D_ASSERT(parent_stack_entry != event_map.end());
147672
+
147673
+ auto &parent_stack = parent_stack_entry->second;
147674
+ stack.pipeline_finish_event = parent_stack.pipeline_finish_event;
147675
+ stack.pipeline_complete_event = parent_stack.pipeline_complete_event;
147676
+
147677
+ stack.pipeline_event->AddDependency(*parent_stack.pipeline_event);
147678
+ parent_stack.pipeline_finish_event->AddDependency(*pipeline_event);
147679
+ } else {
147680
+ // stand-alone pipeline
147681
+ auto pipeline_finish_event = make_shared<PipelineFinishEvent>(pipeline);
147682
+ auto pipeline_complete_event =
147683
+ make_shared<PipelineCompleteEvent>(pipeline->executor, event_data.initial_schedule);
147684
+
147685
+ pipeline_finish_event->AddDependency(*pipeline_event);
147686
+ pipeline_complete_event->AddDependency(*pipeline_finish_event);
147643
147687
 
147644
- stack.pipeline_event->AddDependency(*parent_stack.pipeline_event);
147645
- parent_stack.pipeline_finish_event->AddDependency(*pipeline_event);
147688
+ stack.pipeline_finish_event = pipeline_finish_event.get();
147689
+ stack.pipeline_complete_event = pipeline_complete_event.get();
147690
+
147691
+ events.push_back(move(pipeline_finish_event));
147692
+ events.push_back(move(pipeline_complete_event));
147693
+ }
147646
147694
 
147647
147695
  events.push_back(move(pipeline_event));
147648
147696
  event_map.insert(make_pair(pipeline.get(), stack));
147649
147697
 
147650
- auto parent_pipeline = pipeline.get();
147651
-
147698
+ scheduled_pipelines.push_back(pipeline.get());
147652
147699
  auto union_entry = union_pipelines.find(pipeline.get());
147653
147700
  if (union_entry != union_pipelines.end()) {
147654
147701
  for (auto &entry : union_entry->second) {
147655
- parent_pipeline = ScheduleUnionPipeline(entry, parent_pipeline, event_map, events);
147702
+ SchedulePipeline(entry, event_data, scheduled_pipelines);
147656
147703
  }
147657
147704
  }
147658
-
147659
- return parent_pipeline;
147660
147705
  }
147661
147706
 
147662
- void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline, event_map_t &event_map,
147663
- vector<shared_ptr<Event>> &events) {
147707
+ void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline,
147708
+ ScheduleEventData &event_data) {
147709
+ auto &events = event_data.events;
147710
+ auto &child_dependencies = event_data.child_dependencies;
147664
147711
  pipeline->Ready();
147665
147712
 
147666
147713
  auto child_ptr = pipeline.get();
147667
147714
  auto dependencies = child_dependencies.find(child_ptr);
147668
- D_ASSERT(union_pipelines.find(child_ptr) == union_pipelines.end());
147715
+ D_ASSERT(event_data.union_pipelines.find(child_ptr) == event_data.union_pipelines.end());
147669
147716
  D_ASSERT(dependencies != child_dependencies.end());
147670
147717
  // create the pipeline event and the event stack
147671
147718
  auto pipeline_event = make_shared<PipelineEvent>(pipeline);
147672
147719
 
147720
+ auto &event_map = event_data.event_map;
147673
147721
  auto parent_entry = event_map.find(parent);
147674
147722
  PipelineEventStack stack;
147675
147723
  stack.pipeline_event = pipeline_event.get();
@@ -147678,17 +147726,26 @@ void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline
147678
147726
 
147679
147727
  // set up the dependencies for this child pipeline
147680
147728
  unordered_set<Event *> finish_events;
147681
- for (auto &dep : dependencies->second) {
147682
- auto dep_entry = event_map.find(dep);
147683
- D_ASSERT(dep_entry != event_map.end());
147684
- D_ASSERT(dep_entry->second.pipeline_event);
147685
- D_ASSERT(dep_entry->second.pipeline_finish_event);
147729
+ for (auto &main_dep : dependencies->second) {
147730
+ vector<Pipeline *> pipeline_dependencies;
147731
+ auto dep_scheduled = event_data.scheduled_pipelines.find(main_dep);
147732
+ if (dep_scheduled == event_data.scheduled_pipelines.end()) {
147733
+ pipeline_dependencies.push_back(main_dep);
147734
+ } else {
147735
+ pipeline_dependencies = dep_scheduled->second;
147736
+ }
147737
+ for (auto &dep : pipeline_dependencies) {
147738
+ auto dep_entry = event_map.find(dep);
147739
+ D_ASSERT(dep_entry != event_map.end());
147740
+ D_ASSERT(dep_entry->second.pipeline_event);
147741
+ D_ASSERT(dep_entry->second.pipeline_finish_event);
147686
147742
 
147687
- auto finish_event = dep_entry->second.pipeline_finish_event;
147688
- stack.pipeline_event->AddDependency(*dep_entry->second.pipeline_event);
147689
- if (finish_events.find(finish_event) == finish_events.end()) {
147690
- finish_event->AddDependency(*stack.pipeline_event);
147691
- finish_events.insert(finish_event);
147743
+ auto finish_event = dep_entry->second.pipeline_finish_event;
147744
+ stack.pipeline_event->AddDependency(*dep_entry->second.pipeline_event);
147745
+ if (finish_events.find(finish_event) == finish_events.end()) {
147746
+ finish_event->AddDependency(*stack.pipeline_event);
147747
+ finish_events.insert(finish_event);
147748
+ }
147692
147749
  }
147693
147750
  }
147694
147751
 
@@ -147696,56 +147753,25 @@ void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline
147696
147753
  event_map.insert(make_pair(child_ptr, stack));
147697
147754
  }
147698
147755
 
147699
- void Executor::SchedulePipeline(const shared_ptr<Pipeline> &pipeline, event_map_t &event_map,
147700
- vector<shared_ptr<Event>> &events, bool complete_pipeline) {
147701
- D_ASSERT(pipeline);
147702
-
147703
- pipeline->Ready();
147704
-
147705
- auto pipeline_event = make_shared<PipelineEvent>(pipeline);
147706
- auto pipeline_finish_event = make_shared<PipelineFinishEvent>(pipeline);
147707
- auto pipeline_complete_event = make_shared<PipelineCompleteEvent>(pipeline->executor, complete_pipeline);
147708
-
147709
- PipelineEventStack stack;
147710
- stack.pipeline_event = pipeline_event.get();
147711
- stack.pipeline_finish_event = pipeline_finish_event.get();
147712
- stack.pipeline_complete_event = pipeline_complete_event.get();
147713
-
147714
- pipeline_finish_event->AddDependency(*pipeline_event);
147715
- pipeline_complete_event->AddDependency(*pipeline_finish_event);
147716
-
147717
- events.push_back(move(pipeline_event));
147718
- events.push_back(move(pipeline_finish_event));
147719
- events.push_back(move(pipeline_complete_event));
147720
-
147721
- event_map.insert(make_pair(pipeline.get(), stack));
147722
-
147723
- auto union_entry = union_pipelines.find(pipeline.get());
147724
- if (union_entry != union_pipelines.end()) {
147725
- auto parent_pipeline = pipeline.get();
147726
- for (auto &entry : union_entry->second) {
147727
- parent_pipeline = ScheduleUnionPipeline(entry, parent_pipeline, event_map, events);
147728
- }
147729
- }
147730
- }
147731
-
147732
- void Executor::ScheduleEventsInternal(const vector<shared_ptr<Pipeline>> &pipelines,
147733
- unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &child_pipelines,
147734
- vector<shared_ptr<Event>> &events, bool main_schedule) {
147756
+ void Executor::ScheduleEventsInternal(ScheduleEventData &event_data) {
147757
+ auto &events = event_data.events;
147735
147758
  D_ASSERT(events.empty());
147736
147759
  // create all the required pipeline events
147737
- event_map_t event_map;
147738
- for (auto &pipeline : pipelines) {
147739
- SchedulePipeline(pipeline, event_map, events, main_schedule);
147760
+ auto &event_map = event_data.event_map;
147761
+ for (auto &pipeline : event_data.pipelines) {
147762
+ vector<Pipeline *> scheduled_pipelines;
147763
+ SchedulePipeline(pipeline, event_data, scheduled_pipelines);
147764
+
147765
+ event_data.scheduled_pipelines[pipeline.get()] = move(scheduled_pipelines);
147740
147766
  }
147741
147767
  // schedule child pipelines
147742
- for (auto &entry : child_pipelines) {
147768
+ for (auto &entry : event_data.child_pipelines) {
147743
147769
  // iterate in reverse order
147744
147770
  // since child entries are added from top to bottom
147745
147771
  // dependencies are in reverse order (bottom to top)
147746
147772
  for (idx_t i = entry.second.size(); i > 0; i--) {
147747
147773
  auto &child_entry = entry.second[i - 1];
147748
- ScheduleChildPipeline(entry.first, child_entry, event_map, events);
147774
+ ScheduleChildPipeline(entry.first, child_entry, event_data);
147749
147775
  }
147750
147776
  }
147751
147777
  // set up the dependencies between pipeline events
@@ -147770,12 +147796,16 @@ void Executor::ScheduleEventsInternal(const vector<shared_ptr<Pipeline>> &pipeli
147770
147796
  }
147771
147797
 
147772
147798
  void Executor::ScheduleEvents() {
147773
- ScheduleEventsInternal(pipelines, child_pipelines, events);
147799
+ ScheduleEventData event_data(pipelines, child_pipelines, union_pipelines, child_dependencies, events, true);
147800
+ ScheduleEventsInternal(event_data);
147774
147801
  }
147775
147802
 
147776
147803
  void Executor::ReschedulePipelines(const vector<shared_ptr<Pipeline>> &pipelines, vector<shared_ptr<Event>> &events) {
147777
147804
  unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> child_pipelines;
147778
- ScheduleEventsInternal(pipelines, child_pipelines, events, false);
147805
+ unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> union_pipelines;
147806
+ unordered_map<Pipeline *, vector<Pipeline *>> child_dependencies;
147807
+ ScheduleEventData event_data(pipelines, child_pipelines, union_pipelines, child_dependencies, events, false);
147808
+ ScheduleEventsInternal(event_data);
147779
147809
  }
147780
147810
 
147781
147811
  void Executor::ExtractPipelines(shared_ptr<Pipeline> &pipeline, vector<shared_ptr<Pipeline>> &result) {
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "cadbb22d0"
15
- #define DUCKDB_VERSION "v0.4.1-dev1530"
14
+ #define DUCKDB_SOURCE_ID "cc3979656"
15
+ #define DUCKDB_VERSION "v0.4.1-dev1544"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -11612,8 +11612,7 @@ class Task;
11612
11612
 
11613
11613
  struct PipelineEventStack;
11614
11614
  struct ProducerToken;
11615
-
11616
- using event_map_t = unordered_map<const Pipeline *, PipelineEventStack>;
11615
+ struct ScheduleEventData;
11617
11616
 
11618
11617
  class Executor {
11619
11618
  friend class Pipeline;
@@ -11677,16 +11676,12 @@ private:
11677
11676
  void InitializeInternal(PhysicalOperator *physical_plan);
11678
11677
 
11679
11678
  void ScheduleEvents();
11680
- void ScheduleEventsInternal(const vector<shared_ptr<Pipeline>> &pipelines,
11681
- unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &child_pipelines,
11682
- vector<shared_ptr<Event>> &events, bool main_schedule = true);
11683
-
11684
- void SchedulePipeline(const shared_ptr<Pipeline> &pipeline, event_map_t &event_map,
11685
- vector<shared_ptr<Event>> &events, bool complete_pipeline);
11686
- Pipeline *ScheduleUnionPipeline(const shared_ptr<Pipeline> &pipeline, const Pipeline *parent,
11687
- event_map_t &event_map, vector<shared_ptr<Event>> &events);
11688
- void ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline, event_map_t &event_map,
11689
- vector<shared_ptr<Event>> &events);
11679
+ static void ScheduleEventsInternal(ScheduleEventData &event_data);
11680
+
11681
+ static void SchedulePipeline(const shared_ptr<Pipeline> &pipeline, ScheduleEventData &event_data,
11682
+ vector<Pipeline *> &scheduled_pipelines);
11683
+ static void ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline,
11684
+ ScheduleEventData &event_data);
11690
11685
  void ExtractPipelines(shared_ptr<Pipeline> &pipeline, vector<shared_ptr<Pipeline>> &result);
11691
11686
  bool NextExecutor();
11692
11687