duckdb 0.5.2-dev1473.0 → 0.5.2-dev1479.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +112 -19
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +37539 -37539
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -65011,10 +65011,9 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
|
|
|
65011
65011
|
|
|
65012
65012
|
//! REGULAR FINALIZE EVENT
|
|
65013
65013
|
|
|
65014
|
-
class
|
|
65014
|
+
class HashAggregateMergeEvent : public BasePipelineEvent {
|
|
65015
65015
|
public:
|
|
65016
|
-
|
|
65017
|
-
Pipeline *pipeline_p)
|
|
65016
|
+
HashAggregateMergeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p, Pipeline *pipeline_p)
|
|
65018
65017
|
: BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
|
|
65019
65018
|
}
|
|
65020
65019
|
|
|
@@ -65036,14 +65035,61 @@ public:
|
|
|
65036
65035
|
}
|
|
65037
65036
|
};
|
|
65038
65037
|
|
|
65038
|
+
//! REGULAR FINALIZE FROM DISTINCT FINALIZE
|
|
65039
|
+
|
|
65040
|
+
class HashAggregateFinalizeTask : public ExecutorTask {
|
|
65041
|
+
public:
|
|
65042
|
+
HashAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
|
|
65043
|
+
ClientContext &context, const PhysicalHashAggregate &op)
|
|
65044
|
+
: ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
|
|
65045
|
+
op(op) {
|
|
65046
|
+
}
|
|
65047
|
+
|
|
65048
|
+
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
|
65049
|
+
op.FinalizeInternal(pipeline, *event, context, gstate, false);
|
|
65050
|
+
D_ASSERT(!gstate.finished);
|
|
65051
|
+
gstate.finished = true;
|
|
65052
|
+
event->FinishTask();
|
|
65053
|
+
return TaskExecutionResult::TASK_FINISHED;
|
|
65054
|
+
}
|
|
65055
|
+
|
|
65056
|
+
private:
|
|
65057
|
+
Pipeline &pipeline;
|
|
65058
|
+
shared_ptr<Event> event;
|
|
65059
|
+
HashAggregateGlobalState &gstate;
|
|
65060
|
+
ClientContext &context;
|
|
65061
|
+
const PhysicalHashAggregate &op;
|
|
65062
|
+
};
|
|
65063
|
+
|
|
65064
|
+
class HashAggregateFinalizeEvent : public BasePipelineEvent {
|
|
65065
|
+
public:
|
|
65066
|
+
HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
|
|
65067
|
+
Pipeline *pipeline_p, ClientContext &context)
|
|
65068
|
+
: BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p), context(context) {
|
|
65069
|
+
}
|
|
65070
|
+
|
|
65071
|
+
const PhysicalHashAggregate &op;
|
|
65072
|
+
HashAggregateGlobalState &gstate;
|
|
65073
|
+
ClientContext &context;
|
|
65074
|
+
|
|
65075
|
+
public:
|
|
65076
|
+
void Schedule() override {
|
|
65077
|
+
vector<unique_ptr<Task>> tasks;
|
|
65078
|
+
tasks.push_back(make_unique<HashAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
|
|
65079
|
+
D_ASSERT(!tasks.empty());
|
|
65080
|
+
SetTasks(move(tasks));
|
|
65081
|
+
}
|
|
65082
|
+
};
|
|
65083
|
+
|
|
65039
65084
|
//! DISTINCT FINALIZE TASK
|
|
65040
65085
|
|
|
65041
65086
|
class HashDistinctAggregateFinalizeTask : public ExecutorTask {
|
|
65042
65087
|
public:
|
|
65043
65088
|
HashDistinctAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
|
|
65044
|
-
ClientContext &context, const PhysicalHashAggregate &op
|
|
65089
|
+
ClientContext &context, const PhysicalHashAggregate &op,
|
|
65090
|
+
vector<vector<unique_ptr<GlobalSourceState>>> &global_sources_p)
|
|
65045
65091
|
: ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
|
|
65046
|
-
op(op) {
|
|
65092
|
+
op(op), global_sources(global_sources_p) {
|
|
65047
65093
|
}
|
|
65048
65094
|
|
|
65049
65095
|
void AggregateDistinctGrouping(DistinctAggregateCollectionInfo &info,
|
|
@@ -65052,6 +65098,7 @@ public:
|
|
|
65052
65098
|
auto &aggregates = info.aggregates;
|
|
65053
65099
|
auto &data = *grouping_data.distinct_data;
|
|
65054
65100
|
auto &state = *grouping_state.distinct_state;
|
|
65101
|
+
auto &table_state = *grouping_state.table_state;
|
|
65055
65102
|
|
|
65056
65103
|
ThreadContext temp_thread_context(context);
|
|
65057
65104
|
ExecutionContext temp_exec_context(context, temp_thread_context, &pipeline);
|
|
@@ -65089,9 +65136,12 @@ public:
|
|
|
65089
65136
|
D_ASSERT(data.info.table_map.count(i));
|
|
65090
65137
|
auto table_idx = data.info.table_map.at(i);
|
|
65091
65138
|
auto &radix_table_p = data.radix_tables[table_idx];
|
|
65092
|
-
auto &output_chunk = *state.distinct_output_chunks[table_idx];
|
|
65093
65139
|
|
|
65094
|
-
|
|
65140
|
+
// Create a duplicate of the output_chunk, because of multi-threading we cant alter the original
|
|
65141
|
+
DataChunk output_chunk;
|
|
65142
|
+
output_chunk.Initialize(context, state.distinct_output_chunks[table_idx]->GetTypes());
|
|
65143
|
+
|
|
65144
|
+
auto &global_source = global_sources[grouping_idx][i];
|
|
65095
65145
|
auto local_source = radix_table_p->GetLocalSourceState(temp_exec_context);
|
|
65096
65146
|
|
|
65097
65147
|
// Fetch all the data from the aggregate ht, and Sink it into the main ht
|
|
@@ -65123,11 +65173,11 @@ public:
|
|
|
65123
65173
|
aggregate_input_chunk.SetCardinality(output_chunk);
|
|
65124
65174
|
|
|
65125
65175
|
// Sink it into the main ht
|
|
65126
|
-
grouping_data.table_data.Sink(temp_exec_context,
|
|
65127
|
-
|
|
65176
|
+
grouping_data.table_data.Sink(temp_exec_context, table_state, *temp_local_state, group_chunk,
|
|
65177
|
+
aggregate_input_chunk, {i});
|
|
65128
65178
|
}
|
|
65129
65179
|
}
|
|
65130
|
-
grouping_data.table_data.Combine(temp_exec_context,
|
|
65180
|
+
grouping_data.table_data.Combine(temp_exec_context, table_state, *temp_local_state);
|
|
65131
65181
|
}
|
|
65132
65182
|
|
|
65133
65183
|
TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
|
|
@@ -65138,9 +65188,6 @@ public:
|
|
|
65138
65188
|
auto &grouping_state = gstate.grouping_states[i];
|
|
65139
65189
|
AggregateDistinctGrouping(info, grouping, grouping_state, i);
|
|
65140
65190
|
}
|
|
65141
|
-
op.FinalizeInternal(pipeline, *event, context, gstate, false);
|
|
65142
|
-
D_ASSERT(!gstate.finished);
|
|
65143
|
-
gstate.finished = true;
|
|
65144
65191
|
event->FinishTask();
|
|
65145
65192
|
return TaskExecutionResult::TASK_FINISHED;
|
|
65146
65193
|
}
|
|
@@ -65151,6 +65198,7 @@ private:
|
|
|
65151
65198
|
HashAggregateGlobalState &gstate;
|
|
65152
65199
|
ClientContext &context;
|
|
65153
65200
|
const PhysicalHashAggregate &op;
|
|
65201
|
+
vector<vector<unique_ptr<GlobalSourceState>>> &global_sources;
|
|
65154
65202
|
};
|
|
65155
65203
|
|
|
65156
65204
|
//! DISTINCT FINALIZE EVENT
|
|
@@ -65165,15 +65213,60 @@ public:
|
|
|
65165
65213
|
const PhysicalHashAggregate &op;
|
|
65166
65214
|
HashAggregateGlobalState &gstate;
|
|
65167
65215
|
ClientContext &context;
|
|
65216
|
+
//! The GlobalSourceStates for all the radix tables of the distinct aggregates
|
|
65217
|
+
vector<vector<unique_ptr<GlobalSourceState>>> global_sources;
|
|
65168
65218
|
|
|
65169
65219
|
public:
|
|
65170
65220
|
void Schedule() override {
|
|
65221
|
+
global_sources = CreateGlobalSources();
|
|
65222
|
+
|
|
65171
65223
|
vector<unique_ptr<Task>> tasks;
|
|
65172
|
-
|
|
65173
|
-
|
|
65224
|
+
auto &scheduler = TaskScheduler::GetScheduler(context);
|
|
65225
|
+
auto number_of_threads = scheduler.NumberOfThreads();
|
|
65226
|
+
tasks.reserve(number_of_threads);
|
|
65227
|
+
for (int32_t i = 0; i < number_of_threads; i++) {
|
|
65228
|
+
tasks.push_back(make_unique<HashDistinctAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate,
|
|
65229
|
+
context, op, global_sources));
|
|
65230
|
+
}
|
|
65174
65231
|
D_ASSERT(!tasks.empty());
|
|
65175
65232
|
SetTasks(move(tasks));
|
|
65176
65233
|
}
|
|
65234
|
+
|
|
65235
|
+
void FinishEvent() override {
|
|
65236
|
+
//! Now that everything is added to the main ht, we can actually finalize
|
|
65237
|
+
auto new_event = make_shared<HashAggregateFinalizeEvent>(op, gstate, pipeline.get(), context);
|
|
65238
|
+
this->InsertEvent(move(new_event));
|
|
65239
|
+
}
|
|
65240
|
+
|
|
65241
|
+
private:
|
|
65242
|
+
vector<vector<unique_ptr<GlobalSourceState>>> CreateGlobalSources() {
|
|
65243
|
+
vector<vector<unique_ptr<GlobalSourceState>>> grouping_sources;
|
|
65244
|
+
grouping_sources.reserve(op.groupings.size());
|
|
65245
|
+
for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
|
|
65246
|
+
auto &grouping = op.groupings[grouping_idx];
|
|
65247
|
+
auto &data = *grouping.distinct_data;
|
|
65248
|
+
|
|
65249
|
+
vector<unique_ptr<GlobalSourceState>> aggregate_sources;
|
|
65250
|
+
aggregate_sources.reserve(op.grouped_aggregate_data.aggregates.size());
|
|
65251
|
+
|
|
65252
|
+
for (idx_t i = 0; i < op.grouped_aggregate_data.aggregates.size(); i++) {
|
|
65253
|
+
auto &aggregate = op.grouped_aggregate_data.aggregates[i];
|
|
65254
|
+
auto &aggr = (BoundAggregateExpression &)*aggregate;
|
|
65255
|
+
|
|
65256
|
+
if (!aggr.IsDistinct()) {
|
|
65257
|
+
aggregate_sources.push_back(nullptr);
|
|
65258
|
+
continue;
|
|
65259
|
+
}
|
|
65260
|
+
|
|
65261
|
+
D_ASSERT(data.info.table_map.count(i));
|
|
65262
|
+
auto table_idx = data.info.table_map.at(i);
|
|
65263
|
+
auto &radix_table_p = data.radix_tables[table_idx];
|
|
65264
|
+
aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
|
|
65265
|
+
}
|
|
65266
|
+
grouping_sources.push_back(move(aggregate_sources));
|
|
65267
|
+
}
|
|
65268
|
+
return grouping_sources;
|
|
65269
|
+
}
|
|
65177
65270
|
};
|
|
65178
65271
|
|
|
65179
65272
|
//! DISTINCT COMBINE EVENT
|
|
@@ -65205,12 +65298,12 @@ public:
|
|
|
65205
65298
|
}
|
|
65206
65299
|
}
|
|
65207
65300
|
|
|
65301
|
+
D_ASSERT(!tasks.empty());
|
|
65302
|
+
SetTasks(move(tasks));
|
|
65303
|
+
|
|
65208
65304
|
//! Now that all tables are combined, it's time to do the distinct aggregations
|
|
65209
65305
|
auto new_event = make_shared<HashDistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
|
|
65210
65306
|
this->InsertEvent(move(new_event));
|
|
65211
|
-
|
|
65212
|
-
D_ASSERT(!tasks.empty());
|
|
65213
|
-
SetTasks(move(tasks));
|
|
65214
65307
|
}
|
|
65215
65308
|
};
|
|
65216
65309
|
|
|
@@ -65274,7 +65367,7 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Eve
|
|
|
65274
65367
|
}
|
|
65275
65368
|
}
|
|
65276
65369
|
if (any_partitioned) {
|
|
65277
|
-
auto new_event = make_shared<
|
|
65370
|
+
auto new_event = make_shared<HashAggregateMergeEvent>(*this, gstate, &pipeline);
|
|
65278
65371
|
event.InsertEvent(move(new_event));
|
|
65279
65372
|
}
|
|
65280
65373
|
return SinkFinalizeType::READY;
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "d95b0d50d3"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev1479"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|