npm - duckdb - Versions diffs - 0.5.2-dev1945.0 → 0.5.2-dev1957.0 - Mend

duckdb 0.5.2-dev1945.0 → 0.5.2-dev1957.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +52 -12
package/src/duckdb.hpp +2 -2
package/src/parquet-amalgamation.cpp +34536 -34536

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.5.2-dev1945.0",
+  "version": "0.5.2-dev1957.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -41325,6 +41325,9 @@ public:
 	static void SetMultiScan(GlobalSinkState &state);
 private:
+	//! When we only have distinct aggregates, we can delay adding groups to the main ht
+	bool CanSkipRegularSink() const;
 	//! Finalize the distinct aggregates
 	SinkFinalizeType FinalizeDistinct(Pipeline &pipeline, Event &event, ClientContext &context,
 	                                  GlobalSinkState &gstate) const;
@@ -41336,6 +41339,9 @@ private:
 	//! Sink the distinct aggregates
 	void SinkDistinct(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
 	                  DataChunk &input) const;
+	//! Create groups in the main ht for groups that would otherwise get filtered out completely
+	SinkResultType SinkGroupsOnly(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
+	                              DataChunk &input) const;
 };
 } // namespace duckdb
@@ -65630,6 +65636,21 @@ static vector<LogicalType> CreateGroupChunkTypes(vector<unique_ptr<Expression>>
 	return types;
 }
+bool PhysicalHashAggregate::CanSkipRegularSink() const {
+	if (!filter_indexes.empty()) {
+		// If we have filters, we can't skip the regular sink, because we might lose groups otherwise.
+		return false;
+	}
+	if (grouped_aggregate_data.aggregates.empty()) {
+		// When there are no aggregates, we have to add to the main ht right away
+		return false;
+	}
+	if (!non_distinct_filter.empty()) {
+		return false;
+	}
+	return true;
+}
 PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
                                              vector<unique_ptr<Expression>> expressions, idx_t estimated_cardinality)
     : PhysicalHashAggregate(context, move(types), move(expressions), {}, estimated_cardinality) {
@@ -65872,6 +65893,10 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalSink
 		SinkDistinct(context, state, lstate, input);
 	}
+	if (CanSkipRegularSink()) {
+		return SinkResultType::NEED_MORE_INPUT;
+	}
 	DataChunk &aggregate_input_chunk = llstate.aggregate_input_chunk;
 	auto &aggregates = grouped_aggregate_data.aggregates;
@@ -65950,6 +65975,9 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
 	CombineDistinct(context, state, lstate);
+	if (CanSkipRegularSink()) {
+		return;
+	}
 	for (idx_t i = 0; i < groupings.size(); i++) {
 		auto &grouping_gstate = gstate.grouping_states[i];
 		auto &grouping_lstate = llstate.grouping_states[i];
@@ -89963,7 +89991,7 @@ public:
 	}
 	vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
-	vector<unique_ptr<GroupedAggregateHashTable>> finalized_hts;
+	vector<shared_ptr<GroupedAggregateHashTable>> finalized_hts;
 	//! Whether or not any tuples were added to the HT
 	bool is_empty;
@@ -90140,7 +90168,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
 		// schedule additional tasks to combine the partial HTs
 		gstate.finalized_hts.resize(gstate.partition_info.n_partitions);
 		for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
-			gstate.finalized_hts[r] = make_unique<GroupedAggregateHashTable>(
+			gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
 			    allocator, buffer_manager, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
 		}
 		gstate.is_partitioned = true;
@@ -90149,7 +90177,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
 		     // TODO possible optimization, if total count < limit for 32 bit ht, use that one
 		     // create this ht here so finalize needs no lock on gstate
-		gstate.finalized_hts.push_back(make_unique<GroupedAggregateHashTable>(
+		gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
 		    allocator, buffer_manager, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64));
 		for (auto &pht : gstate.intermediate_hts) {
 			auto unpartitioned = pht->GetUnpartitioned();
@@ -90229,7 +90257,7 @@ public:
 	//! Heavy handed for now.
 	mutex lock;
 	//! The current position to scan the HT for output tuples
-	atomic<idx_t> ht_index;
+	idx_t ht_index;
 	//! The set of aggregate scan states
 	unique_ptr<AggregateHTScanState[]> ht_scan_states;
 	atomic<bool> initialized;
@@ -90249,6 +90277,8 @@ public:
 	//! Materialized GROUP BY expressions & aggregates
 	DataChunk scan_chunk;
+	//! A reference to the current HT that we are scanning
+	shared_ptr<GroupedAggregateHashTable> ht;
 };
 unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
@@ -90320,6 +90350,7 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
 	idx_t elements_found = 0;
 	lstate.scan_chunk.Reset();
+	lstate.ht.reset();
 	if (!state.initialized) {
 		lock_guard<mutex> l(state.lock);
 		if (!state.ht_scan_states) {
@@ -90331,17 +90362,23 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
 		state.initialized = true;
 	}
 	while (true) {
-		idx_t ht_index = state.ht_index;
-		if (ht_index >= gstate.finalized_hts.size()) {
-			state.finished = true;
-			return;
+		idx_t ht_index;
+		{
+			lock_guard<mutex> l(state.lock);
+			ht_index = state.ht_index;
+			if (ht_index >= gstate.finalized_hts.size()) {
+				state.finished = true;
+				return;
+			}
+			D_ASSERT(ht_index < gstate.finalized_hts.size());
+			lstate.ht = gstate.finalized_hts[ht_index];
+			D_ASSERT(lstate.ht);
 		}
-		D_ASSERT(ht_index < gstate.finalized_hts.size());
 		D_ASSERT(state.ht_scan_states);
-		auto &ht = gstate.finalized_hts[ht_index];
 		auto &scan_state = state.ht_scan_states[ht_index];
-		D_ASSERT(ht);
-		elements_found = ht->Scan(scan_state, lstate.scan_chunk);
+		D_ASSERT(lstate.ht);
+		elements_found = lstate.ht->Scan(scan_state, lstate.scan_chunk);
 		if (elements_found > 0) {
 			break;
 		}
@@ -90351,6 +90388,9 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
 		if (ht_index > state.ht_index) {
 			// we have not yet worked on the table
 			// move the global index forwards
+			if (!gstate.multi_scan) {
+				gstate.finalized_hts[state.ht_index].reset();
+			}
 			state.ht_index = ht_index;
 		}
 	}

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "be1bd1704f"
-#define DUCKDB_VERSION "v0.5.2-dev1945"
+#define DUCKDB_SOURCE_ID "866efc2db6"
+#define DUCKDB_VERSION "v0.5.2-dev1957"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //