duckdb 0.5.2-dev1945.0 → 0.5.2-dev1957.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +52 -12
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +34536 -34536
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -41325,6 +41325,9 @@ public:
|
|
|
41325
41325
|
static void SetMultiScan(GlobalSinkState &state);
|
|
41326
41326
|
|
|
41327
41327
|
private:
|
|
41328
|
+
//! When we only have distinct aggregates, we can delay adding groups to the main ht
|
|
41329
|
+
bool CanSkipRegularSink() const;
|
|
41330
|
+
|
|
41328
41331
|
//! Finalize the distinct aggregates
|
|
41329
41332
|
SinkFinalizeType FinalizeDistinct(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
41330
41333
|
GlobalSinkState &gstate) const;
|
|
@@ -41336,6 +41339,9 @@ private:
|
|
|
41336
41339
|
//! Sink the distinct aggregates
|
|
41337
41340
|
void SinkDistinct(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
|
|
41338
41341
|
DataChunk &input) const;
|
|
41342
|
+
//! Create groups in the main ht for groups that would otherwise get filtered out completely
|
|
41343
|
+
SinkResultType SinkGroupsOnly(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
|
|
41344
|
+
DataChunk &input) const;
|
|
41339
41345
|
};
|
|
41340
41346
|
|
|
41341
41347
|
} // namespace duckdb
|
|
@@ -65630,6 +65636,21 @@ static vector<LogicalType> CreateGroupChunkTypes(vector<unique_ptr<Expression>>
|
|
|
65630
65636
|
return types;
|
|
65631
65637
|
}
|
|
65632
65638
|
|
|
65639
|
+
bool PhysicalHashAggregate::CanSkipRegularSink() const {
|
|
65640
|
+
if (!filter_indexes.empty()) {
|
|
65641
|
+
// If we have filters, we can't skip the regular sink, because we might lose groups otherwise.
|
|
65642
|
+
return false;
|
|
65643
|
+
}
|
|
65644
|
+
if (grouped_aggregate_data.aggregates.empty()) {
|
|
65645
|
+
// When there are no aggregates, we have to add to the main ht right away
|
|
65646
|
+
return false;
|
|
65647
|
+
}
|
|
65648
|
+
if (!non_distinct_filter.empty()) {
|
|
65649
|
+
return false;
|
|
65650
|
+
}
|
|
65651
|
+
return true;
|
|
65652
|
+
}
|
|
65653
|
+
|
|
65633
65654
|
PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
|
|
65634
65655
|
vector<unique_ptr<Expression>> expressions, idx_t estimated_cardinality)
|
|
65635
65656
|
: PhysicalHashAggregate(context, move(types), move(expressions), {}, estimated_cardinality) {
|
|
@@ -65872,6 +65893,10 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalSink
|
|
|
65872
65893
|
SinkDistinct(context, state, lstate, input);
|
|
65873
65894
|
}
|
|
65874
65895
|
|
|
65896
|
+
if (CanSkipRegularSink()) {
|
|
65897
|
+
return SinkResultType::NEED_MORE_INPUT;
|
|
65898
|
+
}
|
|
65899
|
+
|
|
65875
65900
|
DataChunk &aggregate_input_chunk = llstate.aggregate_input_chunk;
|
|
65876
65901
|
|
|
65877
65902
|
auto &aggregates = grouped_aggregate_data.aggregates;
|
|
@@ -65950,6 +65975,9 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
|
|
|
65950
65975
|
|
|
65951
65976
|
CombineDistinct(context, state, lstate);
|
|
65952
65977
|
|
|
65978
|
+
if (CanSkipRegularSink()) {
|
|
65979
|
+
return;
|
|
65980
|
+
}
|
|
65953
65981
|
for (idx_t i = 0; i < groupings.size(); i++) {
|
|
65954
65982
|
auto &grouping_gstate = gstate.grouping_states[i];
|
|
65955
65983
|
auto &grouping_lstate = llstate.grouping_states[i];
|
|
@@ -89963,7 +89991,7 @@ public:
|
|
|
89963
89991
|
}
|
|
89964
89992
|
|
|
89965
89993
|
vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
|
|
89966
|
-
vector<
|
|
89994
|
+
vector<shared_ptr<GroupedAggregateHashTable>> finalized_hts;
|
|
89967
89995
|
|
|
89968
89996
|
//! Whether or not any tuples were added to the HT
|
|
89969
89997
|
bool is_empty;
|
|
@@ -90140,7 +90168,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
|
|
|
90140
90168
|
// schedule additional tasks to combine the partial HTs
|
|
90141
90169
|
gstate.finalized_hts.resize(gstate.partition_info.n_partitions);
|
|
90142
90170
|
for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
|
|
90143
|
-
gstate.finalized_hts[r] =
|
|
90171
|
+
gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
|
|
90144
90172
|
allocator, buffer_manager, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
|
|
90145
90173
|
}
|
|
90146
90174
|
gstate.is_partitioned = true;
|
|
@@ -90149,7 +90177,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
|
|
|
90149
90177
|
// TODO possible optimization, if total count < limit for 32 bit ht, use that one
|
|
90150
90178
|
// create this ht here so finalize needs no lock on gstate
|
|
90151
90179
|
|
|
90152
|
-
gstate.finalized_hts.push_back(
|
|
90180
|
+
gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
|
|
90153
90181
|
allocator, buffer_manager, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64));
|
|
90154
90182
|
for (auto &pht : gstate.intermediate_hts) {
|
|
90155
90183
|
auto unpartitioned = pht->GetUnpartitioned();
|
|
@@ -90229,7 +90257,7 @@ public:
|
|
|
90229
90257
|
//! Heavy handed for now.
|
|
90230
90258
|
mutex lock;
|
|
90231
90259
|
//! The current position to scan the HT for output tuples
|
|
90232
|
-
|
|
90260
|
+
idx_t ht_index;
|
|
90233
90261
|
//! The set of aggregate scan states
|
|
90234
90262
|
unique_ptr<AggregateHTScanState[]> ht_scan_states;
|
|
90235
90263
|
atomic<bool> initialized;
|
|
@@ -90249,6 +90277,8 @@ public:
|
|
|
90249
90277
|
|
|
90250
90278
|
//! Materialized GROUP BY expressions & aggregates
|
|
90251
90279
|
DataChunk scan_chunk;
|
|
90280
|
+
//! A reference to the current HT that we are scanning
|
|
90281
|
+
shared_ptr<GroupedAggregateHashTable> ht;
|
|
90252
90282
|
};
|
|
90253
90283
|
|
|
90254
90284
|
unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
|
|
@@ -90320,6 +90350,7 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
|
|
|
90320
90350
|
idx_t elements_found = 0;
|
|
90321
90351
|
|
|
90322
90352
|
lstate.scan_chunk.Reset();
|
|
90353
|
+
lstate.ht.reset();
|
|
90323
90354
|
if (!state.initialized) {
|
|
90324
90355
|
lock_guard<mutex> l(state.lock);
|
|
90325
90356
|
if (!state.ht_scan_states) {
|
|
@@ -90331,17 +90362,23 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
|
|
|
90331
90362
|
state.initialized = true;
|
|
90332
90363
|
}
|
|
90333
90364
|
while (true) {
|
|
90334
|
-
idx_t ht_index
|
|
90335
|
-
|
|
90336
|
-
|
|
90337
|
-
|
|
90365
|
+
idx_t ht_index;
|
|
90366
|
+
|
|
90367
|
+
{
|
|
90368
|
+
lock_guard<mutex> l(state.lock);
|
|
90369
|
+
ht_index = state.ht_index;
|
|
90370
|
+
if (ht_index >= gstate.finalized_hts.size()) {
|
|
90371
|
+
state.finished = true;
|
|
90372
|
+
return;
|
|
90373
|
+
}
|
|
90374
|
+
D_ASSERT(ht_index < gstate.finalized_hts.size());
|
|
90375
|
+
lstate.ht = gstate.finalized_hts[ht_index];
|
|
90376
|
+
D_ASSERT(lstate.ht);
|
|
90338
90377
|
}
|
|
90339
|
-
D_ASSERT(ht_index < gstate.finalized_hts.size());
|
|
90340
90378
|
D_ASSERT(state.ht_scan_states);
|
|
90341
|
-
auto &ht = gstate.finalized_hts[ht_index];
|
|
90342
90379
|
auto &scan_state = state.ht_scan_states[ht_index];
|
|
90343
|
-
D_ASSERT(ht);
|
|
90344
|
-
elements_found = ht->Scan(scan_state, lstate.scan_chunk);
|
|
90380
|
+
D_ASSERT(lstate.ht);
|
|
90381
|
+
elements_found = lstate.ht->Scan(scan_state, lstate.scan_chunk);
|
|
90345
90382
|
if (elements_found > 0) {
|
|
90346
90383
|
break;
|
|
90347
90384
|
}
|
|
@@ -90351,6 +90388,9 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
|
|
|
90351
90388
|
if (ht_index > state.ht_index) {
|
|
90352
90389
|
// we have not yet worked on the table
|
|
90353
90390
|
// move the global index forwards
|
|
90391
|
+
if (!gstate.multi_scan) {
|
|
90392
|
+
gstate.finalized_hts[state.ht_index].reset();
|
|
90393
|
+
}
|
|
90354
90394
|
state.ht_index = ht_index;
|
|
90355
90395
|
}
|
|
90356
90396
|
}
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "866efc2db6"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev1957"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|