duckdb 0.5.2-dev1473.0 → 0.5.2-dev1479.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1473.0",
5
+ "version": "0.5.2-dev1479.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -65011,10 +65011,9 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
65011
65011
 
65012
65012
  //! REGULAR FINALIZE EVENT
65013
65013
 
65014
- class HashAggregateFinalizeEvent : public BasePipelineEvent {
65014
+ class HashAggregateMergeEvent : public BasePipelineEvent {
65015
65015
  public:
65016
- HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
65017
- Pipeline *pipeline_p)
65016
+ HashAggregateMergeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p, Pipeline *pipeline_p)
65018
65017
  : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
65019
65018
  }
65020
65019
 
@@ -65036,14 +65035,61 @@ public:
65036
65035
  }
65037
65036
  };
65038
65037
 
65038
+ //! REGULAR FINALIZE FROM DISTINCT FINALIZE
65039
+
65040
+ class HashAggregateFinalizeTask : public ExecutorTask {
65041
+ public:
65042
+ HashAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
65043
+ ClientContext &context, const PhysicalHashAggregate &op)
65044
+ : ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
65045
+ op(op) {
65046
+ }
65047
+
65048
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
65049
+ op.FinalizeInternal(pipeline, *event, context, gstate, false);
65050
+ D_ASSERT(!gstate.finished);
65051
+ gstate.finished = true;
65052
+ event->FinishTask();
65053
+ return TaskExecutionResult::TASK_FINISHED;
65054
+ }
65055
+
65056
+ private:
65057
+ Pipeline &pipeline;
65058
+ shared_ptr<Event> event;
65059
+ HashAggregateGlobalState &gstate;
65060
+ ClientContext &context;
65061
+ const PhysicalHashAggregate &op;
65062
+ };
65063
+
65064
+ class HashAggregateFinalizeEvent : public BasePipelineEvent {
65065
+ public:
65066
+ HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
65067
+ Pipeline *pipeline_p, ClientContext &context)
65068
+ : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p), context(context) {
65069
+ }
65070
+
65071
+ const PhysicalHashAggregate &op;
65072
+ HashAggregateGlobalState &gstate;
65073
+ ClientContext &context;
65074
+
65075
+ public:
65076
+ void Schedule() override {
65077
+ vector<unique_ptr<Task>> tasks;
65078
+ tasks.push_back(make_unique<HashAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
65079
+ D_ASSERT(!tasks.empty());
65080
+ SetTasks(move(tasks));
65081
+ }
65082
+ };
65083
+
65039
65084
  //! DISTINCT FINALIZE TASK
65040
65085
 
65041
65086
  class HashDistinctAggregateFinalizeTask : public ExecutorTask {
65042
65087
  public:
65043
65088
  HashDistinctAggregateFinalizeTask(Pipeline &pipeline, shared_ptr<Event> event_p, HashAggregateGlobalState &state_p,
65044
- ClientContext &context, const PhysicalHashAggregate &op)
65089
+ ClientContext &context, const PhysicalHashAggregate &op,
65090
+ vector<vector<unique_ptr<GlobalSourceState>>> &global_sources_p)
65045
65091
  : ExecutorTask(pipeline.executor), pipeline(pipeline), event(move(event_p)), gstate(state_p), context(context),
65046
- op(op) {
65092
+ op(op), global_sources(global_sources_p) {
65047
65093
  }
65048
65094
 
65049
65095
  void AggregateDistinctGrouping(DistinctAggregateCollectionInfo &info,
@@ -65052,6 +65098,7 @@ public:
65052
65098
  auto &aggregates = info.aggregates;
65053
65099
  auto &data = *grouping_data.distinct_data;
65054
65100
  auto &state = *grouping_state.distinct_state;
65101
+ auto &table_state = *grouping_state.table_state;
65055
65102
 
65056
65103
  ThreadContext temp_thread_context(context);
65057
65104
  ExecutionContext temp_exec_context(context, temp_thread_context, &pipeline);
@@ -65089,9 +65136,12 @@ public:
65089
65136
  D_ASSERT(data.info.table_map.count(i));
65090
65137
  auto table_idx = data.info.table_map.at(i);
65091
65138
  auto &radix_table_p = data.radix_tables[table_idx];
65092
- auto &output_chunk = *state.distinct_output_chunks[table_idx];
65093
65139
 
65094
- auto global_source = radix_table_p->GetGlobalSourceState(context);
65140
+ // Create a duplicate of the output_chunk, because of multi-threading we cant alter the original
65141
+ DataChunk output_chunk;
65142
+ output_chunk.Initialize(context, state.distinct_output_chunks[table_idx]->GetTypes());
65143
+
65144
+ auto &global_source = global_sources[grouping_idx][i];
65095
65145
  auto local_source = radix_table_p->GetLocalSourceState(temp_exec_context);
65096
65146
 
65097
65147
  // Fetch all the data from the aggregate ht, and Sink it into the main ht
@@ -65123,11 +65173,11 @@ public:
65123
65173
  aggregate_input_chunk.SetCardinality(output_chunk);
65124
65174
 
65125
65175
  // Sink it into the main ht
65126
- grouping_data.table_data.Sink(temp_exec_context, *grouping_state.table_state, *temp_local_state,
65127
- group_chunk, aggregate_input_chunk, {i});
65176
+ grouping_data.table_data.Sink(temp_exec_context, table_state, *temp_local_state, group_chunk,
65177
+ aggregate_input_chunk, {i});
65128
65178
  }
65129
65179
  }
65130
- grouping_data.table_data.Combine(temp_exec_context, *grouping_state.table_state, *temp_local_state);
65180
+ grouping_data.table_data.Combine(temp_exec_context, table_state, *temp_local_state);
65131
65181
  }
65132
65182
 
65133
65183
  TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
@@ -65138,9 +65188,6 @@ public:
65138
65188
  auto &grouping_state = gstate.grouping_states[i];
65139
65189
  AggregateDistinctGrouping(info, grouping, grouping_state, i);
65140
65190
  }
65141
- op.FinalizeInternal(pipeline, *event, context, gstate, false);
65142
- D_ASSERT(!gstate.finished);
65143
- gstate.finished = true;
65144
65191
  event->FinishTask();
65145
65192
  return TaskExecutionResult::TASK_FINISHED;
65146
65193
  }
@@ -65151,6 +65198,7 @@ private:
65151
65198
  HashAggregateGlobalState &gstate;
65152
65199
  ClientContext &context;
65153
65200
  const PhysicalHashAggregate &op;
65201
+ vector<vector<unique_ptr<GlobalSourceState>>> &global_sources;
65154
65202
  };
65155
65203
 
65156
65204
  //! DISTINCT FINALIZE EVENT
@@ -65165,15 +65213,60 @@ public:
65165
65213
  const PhysicalHashAggregate &op;
65166
65214
  HashAggregateGlobalState &gstate;
65167
65215
  ClientContext &context;
65216
+ //! The GlobalSourceStates for all the radix tables of the distinct aggregates
65217
+ vector<vector<unique_ptr<GlobalSourceState>>> global_sources;
65168
65218
 
65169
65219
  public:
65170
65220
  void Schedule() override {
65221
+ global_sources = CreateGlobalSources();
65222
+
65171
65223
  vector<unique_ptr<Task>> tasks;
65172
- tasks.push_back(
65173
- make_unique<HashDistinctAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate, context, op));
65224
+ auto &scheduler = TaskScheduler::GetScheduler(context);
65225
+ auto number_of_threads = scheduler.NumberOfThreads();
65226
+ tasks.reserve(number_of_threads);
65227
+ for (int32_t i = 0; i < number_of_threads; i++) {
65228
+ tasks.push_back(make_unique<HashDistinctAggregateFinalizeTask>(*pipeline, shared_from_this(), gstate,
65229
+ context, op, global_sources));
65230
+ }
65174
65231
  D_ASSERT(!tasks.empty());
65175
65232
  SetTasks(move(tasks));
65176
65233
  }
65234
+
65235
+ void FinishEvent() override {
65236
+ //! Now that everything is added to the main ht, we can actually finalize
65237
+ auto new_event = make_shared<HashAggregateFinalizeEvent>(op, gstate, pipeline.get(), context);
65238
+ this->InsertEvent(move(new_event));
65239
+ }
65240
+
65241
+ private:
65242
+ vector<vector<unique_ptr<GlobalSourceState>>> CreateGlobalSources() {
65243
+ vector<vector<unique_ptr<GlobalSourceState>>> grouping_sources;
65244
+ grouping_sources.reserve(op.groupings.size());
65245
+ for (idx_t grouping_idx = 0; grouping_idx < op.groupings.size(); grouping_idx++) {
65246
+ auto &grouping = op.groupings[grouping_idx];
65247
+ auto &data = *grouping.distinct_data;
65248
+
65249
+ vector<unique_ptr<GlobalSourceState>> aggregate_sources;
65250
+ aggregate_sources.reserve(op.grouped_aggregate_data.aggregates.size());
65251
+
65252
+ for (idx_t i = 0; i < op.grouped_aggregate_data.aggregates.size(); i++) {
65253
+ auto &aggregate = op.grouped_aggregate_data.aggregates[i];
65254
+ auto &aggr = (BoundAggregateExpression &)*aggregate;
65255
+
65256
+ if (!aggr.IsDistinct()) {
65257
+ aggregate_sources.push_back(nullptr);
65258
+ continue;
65259
+ }
65260
+
65261
+ D_ASSERT(data.info.table_map.count(i));
65262
+ auto table_idx = data.info.table_map.at(i);
65263
+ auto &radix_table_p = data.radix_tables[table_idx];
65264
+ aggregate_sources.push_back(radix_table_p->GetGlobalSourceState(context));
65265
+ }
65266
+ grouping_sources.push_back(move(aggregate_sources));
65267
+ }
65268
+ return grouping_sources;
65269
+ }
65177
65270
  };
65178
65271
 
65179
65272
  //! DISTINCT COMBINE EVENT
@@ -65205,12 +65298,12 @@ public:
65205
65298
  }
65206
65299
  }
65207
65300
 
65301
+ D_ASSERT(!tasks.empty());
65302
+ SetTasks(move(tasks));
65303
+
65208
65304
  //! Now that all tables are combined, it's time to do the distinct aggregations
65209
65305
  auto new_event = make_shared<HashDistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
65210
65306
  this->InsertEvent(move(new_event));
65211
-
65212
- D_ASSERT(!tasks.empty());
65213
- SetTasks(move(tasks));
65214
65307
  }
65215
65308
  };
65216
65309
 
@@ -65274,7 +65367,7 @@ SinkFinalizeType PhysicalHashAggregate::FinalizeInternal(Pipeline &pipeline, Eve
65274
65367
  }
65275
65368
  }
65276
65369
  if (any_partitioned) {
65277
- auto new_event = make_shared<HashAggregateFinalizeEvent>(*this, gstate, &pipeline);
65370
+ auto new_event = make_shared<HashAggregateMergeEvent>(*this, gstate, &pipeline);
65278
65371
  event.InsertEvent(move(new_event));
65279
65372
  }
65280
65373
  return SinkFinalizeType::READY;
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "e8fb74f8e7"
15
- #define DUCKDB_VERSION "v0.5.2-dev1473"
14
+ #define DUCKDB_SOURCE_ID "d95b0d50d3"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1479"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //