duckdb 0.5.2-dev1945.0 → 0.5.2-dev1957.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1945.0",
5
+ "version": "0.5.2-dev1957.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -41325,6 +41325,9 @@ public:
41325
41325
  static void SetMultiScan(GlobalSinkState &state);
41326
41326
 
41327
41327
  private:
41328
+ //! When we only have distinct aggregates, we can delay adding groups to the main ht
41329
+ bool CanSkipRegularSink() const;
41330
+
41328
41331
  //! Finalize the distinct aggregates
41329
41332
  SinkFinalizeType FinalizeDistinct(Pipeline &pipeline, Event &event, ClientContext &context,
41330
41333
  GlobalSinkState &gstate) const;
@@ -41336,6 +41339,9 @@ private:
41336
41339
  //! Sink the distinct aggregates
41337
41340
  void SinkDistinct(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
41338
41341
  DataChunk &input) const;
41342
+ //! Create groups in the main ht for groups that would otherwise get filtered out completely
41343
+ SinkResultType SinkGroupsOnly(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
41344
+ DataChunk &input) const;
41339
41345
  };
41340
41346
 
41341
41347
  } // namespace duckdb
@@ -65630,6 +65636,21 @@ static vector<LogicalType> CreateGroupChunkTypes(vector<unique_ptr<Expression>>
65630
65636
  return types;
65631
65637
  }
65632
65638
 
65639
+ bool PhysicalHashAggregate::CanSkipRegularSink() const {
65640
+ if (!filter_indexes.empty()) {
65641
+ // If we have filters, we can't skip the regular sink, because we might lose groups otherwise.
65642
+ return false;
65643
+ }
65644
+ if (grouped_aggregate_data.aggregates.empty()) {
65645
+ // When there are no aggregates, we have to add to the main ht right away
65646
+ return false;
65647
+ }
65648
+ if (!non_distinct_filter.empty()) {
65649
+ return false;
65650
+ }
65651
+ return true;
65652
+ }
65653
+
65633
65654
  PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
65634
65655
  vector<unique_ptr<Expression>> expressions, idx_t estimated_cardinality)
65635
65656
  : PhysicalHashAggregate(context, move(types), move(expressions), {}, estimated_cardinality) {
@@ -65872,6 +65893,10 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalSink
65872
65893
  SinkDistinct(context, state, lstate, input);
65873
65894
  }
65874
65895
 
65896
+ if (CanSkipRegularSink()) {
65897
+ return SinkResultType::NEED_MORE_INPUT;
65898
+ }
65899
+
65875
65900
  DataChunk &aggregate_input_chunk = llstate.aggregate_input_chunk;
65876
65901
 
65877
65902
  auto &aggregates = grouped_aggregate_data.aggregates;
@@ -65950,6 +65975,9 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
65950
65975
 
65951
65976
  CombineDistinct(context, state, lstate);
65952
65977
 
65978
+ if (CanSkipRegularSink()) {
65979
+ return;
65980
+ }
65953
65981
  for (idx_t i = 0; i < groupings.size(); i++) {
65954
65982
  auto &grouping_gstate = gstate.grouping_states[i];
65955
65983
  auto &grouping_lstate = llstate.grouping_states[i];
@@ -89963,7 +89991,7 @@ public:
89963
89991
  }
89964
89992
 
89965
89993
  vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
89966
- vector<unique_ptr<GroupedAggregateHashTable>> finalized_hts;
89994
+ vector<shared_ptr<GroupedAggregateHashTable>> finalized_hts;
89967
89995
 
89968
89996
  //! Whether or not any tuples were added to the HT
89969
89997
  bool is_empty;
@@ -90140,7 +90168,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
90140
90168
  // schedule additional tasks to combine the partial HTs
90141
90169
  gstate.finalized_hts.resize(gstate.partition_info.n_partitions);
90142
90170
  for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
90143
- gstate.finalized_hts[r] = make_unique<GroupedAggregateHashTable>(
90171
+ gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
90144
90172
  allocator, buffer_manager, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
90145
90173
  }
90146
90174
  gstate.is_partitioned = true;
@@ -90149,7 +90177,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
90149
90177
  // TODO possible optimization, if total count < limit for 32 bit ht, use that one
90150
90178
  // create this ht here so finalize needs no lock on gstate
90151
90179
 
90152
- gstate.finalized_hts.push_back(make_unique<GroupedAggregateHashTable>(
90180
+ gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
90153
90181
  allocator, buffer_manager, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64));
90154
90182
  for (auto &pht : gstate.intermediate_hts) {
90155
90183
  auto unpartitioned = pht->GetUnpartitioned();
@@ -90229,7 +90257,7 @@ public:
90229
90257
  //! Heavy handed for now.
90230
90258
  mutex lock;
90231
90259
  //! The current position to scan the HT for output tuples
90232
- atomic<idx_t> ht_index;
90260
+ idx_t ht_index;
90233
90261
  //! The set of aggregate scan states
90234
90262
  unique_ptr<AggregateHTScanState[]> ht_scan_states;
90235
90263
  atomic<bool> initialized;
@@ -90249,6 +90277,8 @@ public:
90249
90277
 
90250
90278
  //! Materialized GROUP BY expressions & aggregates
90251
90279
  DataChunk scan_chunk;
90280
+ //! A reference to the current HT that we are scanning
90281
+ shared_ptr<GroupedAggregateHashTable> ht;
90252
90282
  };
90253
90283
 
90254
90284
  unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
@@ -90320,6 +90350,7 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
90320
90350
  idx_t elements_found = 0;
90321
90351
 
90322
90352
  lstate.scan_chunk.Reset();
90353
+ lstate.ht.reset();
90323
90354
  if (!state.initialized) {
90324
90355
  lock_guard<mutex> l(state.lock);
90325
90356
  if (!state.ht_scan_states) {
@@ -90331,17 +90362,23 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
90331
90362
  state.initialized = true;
90332
90363
  }
90333
90364
  while (true) {
90334
- idx_t ht_index = state.ht_index;
90335
- if (ht_index >= gstate.finalized_hts.size()) {
90336
- state.finished = true;
90337
- return;
90365
+ idx_t ht_index;
90366
+
90367
+ {
90368
+ lock_guard<mutex> l(state.lock);
90369
+ ht_index = state.ht_index;
90370
+ if (ht_index >= gstate.finalized_hts.size()) {
90371
+ state.finished = true;
90372
+ return;
90373
+ }
90374
+ D_ASSERT(ht_index < gstate.finalized_hts.size());
90375
+ lstate.ht = gstate.finalized_hts[ht_index];
90376
+ D_ASSERT(lstate.ht);
90338
90377
  }
90339
- D_ASSERT(ht_index < gstate.finalized_hts.size());
90340
90378
  D_ASSERT(state.ht_scan_states);
90341
- auto &ht = gstate.finalized_hts[ht_index];
90342
90379
  auto &scan_state = state.ht_scan_states[ht_index];
90343
- D_ASSERT(ht);
90344
- elements_found = ht->Scan(scan_state, lstate.scan_chunk);
90380
+ D_ASSERT(lstate.ht);
90381
+ elements_found = lstate.ht->Scan(scan_state, lstate.scan_chunk);
90345
90382
  if (elements_found > 0) {
90346
90383
  break;
90347
90384
  }
@@ -90351,6 +90388,9 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
90351
90388
  if (ht_index > state.ht_index) {
90352
90389
  // we have not yet worked on the table
90353
90390
  // move the global index forwards
90391
+ if (!gstate.multi_scan) {
90392
+ gstate.finalized_hts[state.ht_index].reset();
90393
+ }
90354
90394
  state.ht_index = ht_index;
90355
90395
  }
90356
90396
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "be1bd1704f"
15
- #define DUCKDB_VERSION "v0.5.2-dev1945"
14
+ #define DUCKDB_SOURCE_ID "866efc2db6"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1957"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //