duckdb 0.4.1-dev1530.0 → 0.4.1-dev1544.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +103 -73
- package/src/duckdb.hpp +9 -14
- package/src/parquet-amalgamation.cpp +37765 -37765
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -68473,7 +68473,7 @@ public:
|
|
|
68473
68473
|
|
|
68474
68474
|
class HashJoinGlobalState : public GlobalSinkState {
|
|
68475
68475
|
public:
|
|
68476
|
-
HashJoinGlobalState() {
|
|
68476
|
+
HashJoinGlobalState() : scanned_data(false) {
|
|
68477
68477
|
}
|
|
68478
68478
|
|
|
68479
68479
|
//! The HT used by the join
|
|
@@ -68482,6 +68482,8 @@ public:
|
|
|
68482
68482
|
unique_ptr<PerfectHashJoinExecutor> perfect_join_executor;
|
|
68483
68483
|
//! Whether or not the hash table has been finalized
|
|
68484
68484
|
bool finalized = false;
|
|
68485
|
+
//! Whether or not we have started scanning data using GetData
|
|
68486
|
+
atomic<bool> scanned_data;
|
|
68485
68487
|
};
|
|
68486
68488
|
|
|
68487
68489
|
unique_ptr<GlobalSinkState> PhysicalHashJoin::GetGlobalSinkState(ClientContext &context) const {
|
|
@@ -68640,6 +68642,7 @@ OperatorResultType PhysicalHashJoin::Execute(ExecutionContext &context, DataChun
|
|
|
68640
68642
|
auto &state = (PhysicalHashJoinState &)state_p;
|
|
68641
68643
|
auto &sink = (HashJoinGlobalState &)*sink_state;
|
|
68642
68644
|
D_ASSERT(sink.finalized);
|
|
68645
|
+
D_ASSERT(!sink.scanned_data);
|
|
68643
68646
|
|
|
68644
68647
|
if (sink.hash_table->Count() == 0 && EmptyResultIfRHSIsEmpty()) {
|
|
68645
68648
|
return OperatorResultType::FINISHED;
|
|
@@ -68702,6 +68705,7 @@ void PhysicalHashJoin::GetData(ExecutionContext &context, DataChunk &chunk, Glob
|
|
|
68702
68705
|
// check if we need to scan any unmatched tuples from the RHS for the full/right outer join
|
|
68703
68706
|
auto &sink = (HashJoinGlobalState &)*sink_state;
|
|
68704
68707
|
auto &state = (HashJoinScanState &)gstate;
|
|
68708
|
+
sink.scanned_data = true;
|
|
68705
68709
|
sink.hash_table->ScanFullOuter(chunk, state.ht_scan_state);
|
|
68706
68710
|
}
|
|
68707
68711
|
|
|
@@ -147624,52 +147628,96 @@ struct PipelineEventStack {
|
|
|
147624
147628
|
Event *pipeline_complete_event;
|
|
147625
147629
|
};
|
|
147626
147630
|
|
|
147627
|
-
|
|
147628
|
-
|
|
147629
|
-
|
|
147631
|
+
using event_map_t = unordered_map<const Pipeline *, PipelineEventStack>;
|
|
147632
|
+
|
|
147633
|
+
struct ScheduleEventData {
|
|
147634
|
+
ScheduleEventData(const vector<shared_ptr<Pipeline>> &pipelines,
|
|
147635
|
+
unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &child_pipelines,
|
|
147636
|
+
unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &union_pipelines,
|
|
147637
|
+
unordered_map<Pipeline *, vector<Pipeline *>> &child_dependencies,
|
|
147638
|
+
vector<shared_ptr<Event>> &events, bool initial_schedule)
|
|
147639
|
+
: pipelines(pipelines), child_pipelines(child_pipelines), union_pipelines(union_pipelines),
|
|
147640
|
+
child_dependencies(child_dependencies), events(events), initial_schedule(initial_schedule) {
|
|
147641
|
+
}
|
|
147642
|
+
|
|
147643
|
+
const vector<shared_ptr<Pipeline>> &pipelines;
|
|
147644
|
+
unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &child_pipelines;
|
|
147645
|
+
unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &union_pipelines;
|
|
147646
|
+
unordered_map<Pipeline *, vector<Pipeline *>> &child_dependencies;
|
|
147647
|
+
unordered_map<Pipeline *, vector<Pipeline *>> scheduled_pipelines;
|
|
147648
|
+
vector<shared_ptr<Event>> &events;
|
|
147649
|
+
bool initial_schedule;
|
|
147650
|
+
event_map_t event_map;
|
|
147651
|
+
};
|
|
147630
147652
|
|
|
147653
|
+
void Executor::SchedulePipeline(const shared_ptr<Pipeline> &pipeline, ScheduleEventData &event_data,
|
|
147654
|
+
vector<Pipeline *> &scheduled_pipelines) {
|
|
147631
147655
|
D_ASSERT(pipeline);
|
|
147632
|
-
auto pipeline_event = make_shared<PipelineEvent>(pipeline);
|
|
147633
147656
|
|
|
147634
|
-
auto
|
|
147635
|
-
|
|
147657
|
+
auto &event_map = event_data.event_map;
|
|
147658
|
+
auto &events = event_data.events;
|
|
147659
|
+
auto &union_pipelines = event_data.union_pipelines;
|
|
147660
|
+
pipeline->Ready();
|
|
147636
147661
|
|
|
147637
|
-
auto
|
|
147662
|
+
auto pipeline_event = make_shared<PipelineEvent>(pipeline);
|
|
147638
147663
|
|
|
147639
147664
|
PipelineEventStack stack;
|
|
147640
147665
|
stack.pipeline_event = pipeline_event.get();
|
|
147641
|
-
|
|
147642
|
-
|
|
147666
|
+
if (!scheduled_pipelines.empty()) {
|
|
147667
|
+
// this pipeline has a parent pipeline - i.e. it is scheduled as part of a `UNION`
|
|
147668
|
+
// set up the events
|
|
147669
|
+
auto parent = scheduled_pipelines.back();
|
|
147670
|
+
auto parent_stack_entry = event_map.find(parent);
|
|
147671
|
+
D_ASSERT(parent_stack_entry != event_map.end());
|
|
147672
|
+
|
|
147673
|
+
auto &parent_stack = parent_stack_entry->second;
|
|
147674
|
+
stack.pipeline_finish_event = parent_stack.pipeline_finish_event;
|
|
147675
|
+
stack.pipeline_complete_event = parent_stack.pipeline_complete_event;
|
|
147676
|
+
|
|
147677
|
+
stack.pipeline_event->AddDependency(*parent_stack.pipeline_event);
|
|
147678
|
+
parent_stack.pipeline_finish_event->AddDependency(*pipeline_event);
|
|
147679
|
+
} else {
|
|
147680
|
+
// stand-alone pipeline
|
|
147681
|
+
auto pipeline_finish_event = make_shared<PipelineFinishEvent>(pipeline);
|
|
147682
|
+
auto pipeline_complete_event =
|
|
147683
|
+
make_shared<PipelineCompleteEvent>(pipeline->executor, event_data.initial_schedule);
|
|
147684
|
+
|
|
147685
|
+
pipeline_finish_event->AddDependency(*pipeline_event);
|
|
147686
|
+
pipeline_complete_event->AddDependency(*pipeline_finish_event);
|
|
147643
147687
|
|
|
147644
|
-
|
|
147645
|
-
|
|
147688
|
+
stack.pipeline_finish_event = pipeline_finish_event.get();
|
|
147689
|
+
stack.pipeline_complete_event = pipeline_complete_event.get();
|
|
147690
|
+
|
|
147691
|
+
events.push_back(move(pipeline_finish_event));
|
|
147692
|
+
events.push_back(move(pipeline_complete_event));
|
|
147693
|
+
}
|
|
147646
147694
|
|
|
147647
147695
|
events.push_back(move(pipeline_event));
|
|
147648
147696
|
event_map.insert(make_pair(pipeline.get(), stack));
|
|
147649
147697
|
|
|
147650
|
-
|
|
147651
|
-
|
|
147698
|
+
scheduled_pipelines.push_back(pipeline.get());
|
|
147652
147699
|
auto union_entry = union_pipelines.find(pipeline.get());
|
|
147653
147700
|
if (union_entry != union_pipelines.end()) {
|
|
147654
147701
|
for (auto &entry : union_entry->second) {
|
|
147655
|
-
|
|
147702
|
+
SchedulePipeline(entry, event_data, scheduled_pipelines);
|
|
147656
147703
|
}
|
|
147657
147704
|
}
|
|
147658
|
-
|
|
147659
|
-
return parent_pipeline;
|
|
147660
147705
|
}
|
|
147661
147706
|
|
|
147662
|
-
void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline,
|
|
147663
|
-
|
|
147707
|
+
void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline,
|
|
147708
|
+
ScheduleEventData &event_data) {
|
|
147709
|
+
auto &events = event_data.events;
|
|
147710
|
+
auto &child_dependencies = event_data.child_dependencies;
|
|
147664
147711
|
pipeline->Ready();
|
|
147665
147712
|
|
|
147666
147713
|
auto child_ptr = pipeline.get();
|
|
147667
147714
|
auto dependencies = child_dependencies.find(child_ptr);
|
|
147668
|
-
D_ASSERT(union_pipelines.find(child_ptr) == union_pipelines.end());
|
|
147715
|
+
D_ASSERT(event_data.union_pipelines.find(child_ptr) == event_data.union_pipelines.end());
|
|
147669
147716
|
D_ASSERT(dependencies != child_dependencies.end());
|
|
147670
147717
|
// create the pipeline event and the event stack
|
|
147671
147718
|
auto pipeline_event = make_shared<PipelineEvent>(pipeline);
|
|
147672
147719
|
|
|
147720
|
+
auto &event_map = event_data.event_map;
|
|
147673
147721
|
auto parent_entry = event_map.find(parent);
|
|
147674
147722
|
PipelineEventStack stack;
|
|
147675
147723
|
stack.pipeline_event = pipeline_event.get();
|
|
@@ -147678,17 +147726,26 @@ void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline
|
|
|
147678
147726
|
|
|
147679
147727
|
// set up the dependencies for this child pipeline
|
|
147680
147728
|
unordered_set<Event *> finish_events;
|
|
147681
|
-
for (auto &
|
|
147682
|
-
|
|
147683
|
-
|
|
147684
|
-
|
|
147685
|
-
|
|
147729
|
+
for (auto &main_dep : dependencies->second) {
|
|
147730
|
+
vector<Pipeline *> pipeline_dependencies;
|
|
147731
|
+
auto dep_scheduled = event_data.scheduled_pipelines.find(main_dep);
|
|
147732
|
+
if (dep_scheduled == event_data.scheduled_pipelines.end()) {
|
|
147733
|
+
pipeline_dependencies.push_back(main_dep);
|
|
147734
|
+
} else {
|
|
147735
|
+
pipeline_dependencies = dep_scheduled->second;
|
|
147736
|
+
}
|
|
147737
|
+
for (auto &dep : pipeline_dependencies) {
|
|
147738
|
+
auto dep_entry = event_map.find(dep);
|
|
147739
|
+
D_ASSERT(dep_entry != event_map.end());
|
|
147740
|
+
D_ASSERT(dep_entry->second.pipeline_event);
|
|
147741
|
+
D_ASSERT(dep_entry->second.pipeline_finish_event);
|
|
147686
147742
|
|
|
147687
|
-
|
|
147688
|
-
|
|
147689
|
-
|
|
147690
|
-
|
|
147691
|
-
|
|
147743
|
+
auto finish_event = dep_entry->second.pipeline_finish_event;
|
|
147744
|
+
stack.pipeline_event->AddDependency(*dep_entry->second.pipeline_event);
|
|
147745
|
+
if (finish_events.find(finish_event) == finish_events.end()) {
|
|
147746
|
+
finish_event->AddDependency(*stack.pipeline_event);
|
|
147747
|
+
finish_events.insert(finish_event);
|
|
147748
|
+
}
|
|
147692
147749
|
}
|
|
147693
147750
|
}
|
|
147694
147751
|
|
|
@@ -147696,56 +147753,25 @@ void Executor::ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline
|
|
|
147696
147753
|
event_map.insert(make_pair(child_ptr, stack));
|
|
147697
147754
|
}
|
|
147698
147755
|
|
|
147699
|
-
void Executor::
|
|
147700
|
-
|
|
147701
|
-
D_ASSERT(pipeline);
|
|
147702
|
-
|
|
147703
|
-
pipeline->Ready();
|
|
147704
|
-
|
|
147705
|
-
auto pipeline_event = make_shared<PipelineEvent>(pipeline);
|
|
147706
|
-
auto pipeline_finish_event = make_shared<PipelineFinishEvent>(pipeline);
|
|
147707
|
-
auto pipeline_complete_event = make_shared<PipelineCompleteEvent>(pipeline->executor, complete_pipeline);
|
|
147708
|
-
|
|
147709
|
-
PipelineEventStack stack;
|
|
147710
|
-
stack.pipeline_event = pipeline_event.get();
|
|
147711
|
-
stack.pipeline_finish_event = pipeline_finish_event.get();
|
|
147712
|
-
stack.pipeline_complete_event = pipeline_complete_event.get();
|
|
147713
|
-
|
|
147714
|
-
pipeline_finish_event->AddDependency(*pipeline_event);
|
|
147715
|
-
pipeline_complete_event->AddDependency(*pipeline_finish_event);
|
|
147716
|
-
|
|
147717
|
-
events.push_back(move(pipeline_event));
|
|
147718
|
-
events.push_back(move(pipeline_finish_event));
|
|
147719
|
-
events.push_back(move(pipeline_complete_event));
|
|
147720
|
-
|
|
147721
|
-
event_map.insert(make_pair(pipeline.get(), stack));
|
|
147722
|
-
|
|
147723
|
-
auto union_entry = union_pipelines.find(pipeline.get());
|
|
147724
|
-
if (union_entry != union_pipelines.end()) {
|
|
147725
|
-
auto parent_pipeline = pipeline.get();
|
|
147726
|
-
for (auto &entry : union_entry->second) {
|
|
147727
|
-
parent_pipeline = ScheduleUnionPipeline(entry, parent_pipeline, event_map, events);
|
|
147728
|
-
}
|
|
147729
|
-
}
|
|
147730
|
-
}
|
|
147731
|
-
|
|
147732
|
-
void Executor::ScheduleEventsInternal(const vector<shared_ptr<Pipeline>> &pipelines,
|
|
147733
|
-
unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> &child_pipelines,
|
|
147734
|
-
vector<shared_ptr<Event>> &events, bool main_schedule) {
|
|
147756
|
+
void Executor::ScheduleEventsInternal(ScheduleEventData &event_data) {
|
|
147757
|
+
auto &events = event_data.events;
|
|
147735
147758
|
D_ASSERT(events.empty());
|
|
147736
147759
|
// create all the required pipeline events
|
|
147737
|
-
|
|
147738
|
-
for (auto &pipeline : pipelines) {
|
|
147739
|
-
|
|
147760
|
+
auto &event_map = event_data.event_map;
|
|
147761
|
+
for (auto &pipeline : event_data.pipelines) {
|
|
147762
|
+
vector<Pipeline *> scheduled_pipelines;
|
|
147763
|
+
SchedulePipeline(pipeline, event_data, scheduled_pipelines);
|
|
147764
|
+
|
|
147765
|
+
event_data.scheduled_pipelines[pipeline.get()] = move(scheduled_pipelines);
|
|
147740
147766
|
}
|
|
147741
147767
|
// schedule child pipelines
|
|
147742
|
-
for (auto &entry : child_pipelines) {
|
|
147768
|
+
for (auto &entry : event_data.child_pipelines) {
|
|
147743
147769
|
// iterate in reverse order
|
|
147744
147770
|
// since child entries are added from top to bottom
|
|
147745
147771
|
// dependencies are in reverse order (bottom to top)
|
|
147746
147772
|
for (idx_t i = entry.second.size(); i > 0; i--) {
|
|
147747
147773
|
auto &child_entry = entry.second[i - 1];
|
|
147748
|
-
ScheduleChildPipeline(entry.first, child_entry,
|
|
147774
|
+
ScheduleChildPipeline(entry.first, child_entry, event_data);
|
|
147749
147775
|
}
|
|
147750
147776
|
}
|
|
147751
147777
|
// set up the dependencies between pipeline events
|
|
@@ -147770,12 +147796,16 @@ void Executor::ScheduleEventsInternal(const vector<shared_ptr<Pipeline>> &pipeli
|
|
|
147770
147796
|
}
|
|
147771
147797
|
|
|
147772
147798
|
void Executor::ScheduleEvents() {
|
|
147773
|
-
|
|
147799
|
+
ScheduleEventData event_data(pipelines, child_pipelines, union_pipelines, child_dependencies, events, true);
|
|
147800
|
+
ScheduleEventsInternal(event_data);
|
|
147774
147801
|
}
|
|
147775
147802
|
|
|
147776
147803
|
void Executor::ReschedulePipelines(const vector<shared_ptr<Pipeline>> &pipelines, vector<shared_ptr<Event>> &events) {
|
|
147777
147804
|
unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> child_pipelines;
|
|
147778
|
-
|
|
147805
|
+
unordered_map<Pipeline *, vector<shared_ptr<Pipeline>>> union_pipelines;
|
|
147806
|
+
unordered_map<Pipeline *, vector<Pipeline *>> child_dependencies;
|
|
147807
|
+
ScheduleEventData event_data(pipelines, child_pipelines, union_pipelines, child_dependencies, events, false);
|
|
147808
|
+
ScheduleEventsInternal(event_data);
|
|
147779
147809
|
}
|
|
147780
147810
|
|
|
147781
147811
|
void Executor::ExtractPipelines(shared_ptr<Pipeline> &pipeline, vector<shared_ptr<Pipeline>> &result) {
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "cc3979656"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev1544"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -11612,8 +11612,7 @@ class Task;
|
|
|
11612
11612
|
|
|
11613
11613
|
struct PipelineEventStack;
|
|
11614
11614
|
struct ProducerToken;
|
|
11615
|
-
|
|
11616
|
-
using event_map_t = unordered_map<const Pipeline *, PipelineEventStack>;
|
|
11615
|
+
struct ScheduleEventData;
|
|
11617
11616
|
|
|
11618
11617
|
class Executor {
|
|
11619
11618
|
friend class Pipeline;
|
|
@@ -11677,16 +11676,12 @@ private:
|
|
|
11677
11676
|
void InitializeInternal(PhysicalOperator *physical_plan);
|
|
11678
11677
|
|
|
11679
11678
|
void ScheduleEvents();
|
|
11680
|
-
void ScheduleEventsInternal(
|
|
11681
|
-
|
|
11682
|
-
|
|
11683
|
-
|
|
11684
|
-
void
|
|
11685
|
-
|
|
11686
|
-
Pipeline *ScheduleUnionPipeline(const shared_ptr<Pipeline> &pipeline, const Pipeline *parent,
|
|
11687
|
-
event_map_t &event_map, vector<shared_ptr<Event>> &events);
|
|
11688
|
-
void ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline, event_map_t &event_map,
|
|
11689
|
-
vector<shared_ptr<Event>> &events);
|
|
11679
|
+
static void ScheduleEventsInternal(ScheduleEventData &event_data);
|
|
11680
|
+
|
|
11681
|
+
static void SchedulePipeline(const shared_ptr<Pipeline> &pipeline, ScheduleEventData &event_data,
|
|
11682
|
+
vector<Pipeline *> &scheduled_pipelines);
|
|
11683
|
+
static void ScheduleChildPipeline(Pipeline *parent, const shared_ptr<Pipeline> &pipeline,
|
|
11684
|
+
ScheduleEventData &event_data);
|
|
11690
11685
|
void ExtractPipelines(shared_ptr<Pipeline> &pipeline, vector<shared_ptr<Pipeline>> &result);
|
|
11691
11686
|
bool NextExecutor();
|
|
11692
11687
|
|