npm - duckdb - Versions diffs - 0.7.2-dev1901.0 → 0.7.2-dev2233.0 - Mend

duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

package/src/duckdb/src/execution/aggregate_hashtable.cpp CHANGED Viewed

@@ -3,9 +3,10 @@
 #include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp"
 #include "duckdb/common/algorithm.hpp"
 #include "duckdb/common/exception.hpp"
+#include "duckdb/common/radix_partitioning.hpp"
 #include "duckdb/common/row_operations/row_operations.hpp"
 #include "duckdb/common/types/null_value.hpp"
-#include "duckdb/common/types/row_data_collection.hpp"
+#include "duckdb/common/types/row/tuple_data_iterator.hpp"
 #include "duckdb/common/vector_operations/unary_executor.hpp"
 #include "duckdb/common/vector_operations/vector_operations.hpp"
 #include "duckdb/execution/expression_executor.hpp"
@@ -16,7 +17,7 @@
 namespace duckdb {
-using ValidityBytes = RowLayout::ValidityBytes;
+using ValidityBytes = TupleDataLayout::ValidityBytes;
 GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
                                                      vector<LogicalType> group_types, vector<LogicalType> payload_types,
@@ -34,7 +35,8 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
 AggregateHTAppendState::AggregateHTAppendState()
     : ht_offsets(LogicalTypeId::BIGINT), hash_salts(LogicalTypeId::SMALLINT),
       group_compare_vector(STANDARD_VECTOR_SIZE), no_match_vector(STANDARD_VECTOR_SIZE),
-      empty_vector(STANDARD_VECTOR_SIZE), new_groups(STANDARD_VECTOR_SIZE), addresses(LogicalType::POINTER) {
+      empty_vector(STANDARD_VECTOR_SIZE), new_groups(STANDARD_VECTOR_SIZE), addresses(LogicalType::POINTER),
+      chunk_state_initialized(false) {
 }
 GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
@@ -43,19 +45,19 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
                                                      vector<AggregateObject> aggregate_objects_p,
                                                      HtEntryType entry_type, idx_t initial_capacity)
     : BaseAggregateHashTable(context, allocator, aggregate_objects_p, std::move(payload_types_p)),
-      entry_type(entry_type), capacity(0), entries(0), payload_page_offset(0), is_finalized(false),
-      aggregate_allocator(allocator) {
+      entry_type(entry_type), capacity(0), is_finalized(false),
+      aggregate_allocator(make_shared<ArenaAllocator>(allocator)) {
 	// Append hash column to the end and initialise the row layout
 	group_types_p.emplace_back(LogicalType::HASH);
 	layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
+	tuple_size = layout.GetRowWidth();
+	tuples_per_block = Storage::BLOCK_SIZE / tuple_size;
 	// HT layout
 	hash_offset = layout.GetOffsets()[layout.ColumnCount() - 1];
+	data_collection = make_uniq<TupleDataCollection>(buffer_manager, layout);
+	data_collection->InitializeAppend(td_pin_state, TupleDataPinProperties::KEEP_EVERYTHING_PINNED);
-	tuple_size = layout.GetRowWidth();
-	D_ASSERT(tuple_size <= Storage::BLOCK_SIZE);
-	tuples_per_block = Storage::BLOCK_SIZE / tuple_size;
 	hashes_hdl = buffer_manager.Allocate(Storage::BLOCK_SIZE);
 	hashes_hdl_ptr = hashes_hdl.Ptr();
@@ -75,44 +77,18 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
 	}
 	predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_EQUAL);
-	string_heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
 }
 GroupedAggregateHashTable::~GroupedAggregateHashTable() {
 	Destroy();
 }
-template <class FUNC>
-void GroupedAggregateHashTable::PayloadApply(FUNC fun) {
-	if (entries == 0) {
+void GroupedAggregateHashTable::Destroy() {
+	if (data_collection->Count() == 0) {
 		return;
 	}
-	idx_t apply_entries = entries;
-	idx_t page_nr = 0;
-	idx_t page_offset = 0;
-	for (auto &payload_chunk_ptr : payload_hds_ptrs) {
-		auto this_entries = MinValue(tuples_per_block, apply_entries);
-		page_offset = 0;
-		for (data_ptr_t ptr = payload_chunk_ptr, end = payload_chunk_ptr + this_entries * tuple_size; ptr < end;
-		     ptr += tuple_size) {
-			fun(page_nr, page_offset++, ptr);
-		}
-		apply_entries -= this_entries;
-		page_nr++;
-	}
-	D_ASSERT(apply_entries == 0);
-}
-void GroupedAggregateHashTable::NewBlock() {
-	auto pin = buffer_manager.Allocate(Storage::BLOCK_SIZE);
-	payload_hds.push_back(std::move(pin));
-	payload_hds_ptrs.push_back(payload_hds.back().Ptr());
-	payload_page_offset = 0;
-}
-void GroupedAggregateHashTable::Destroy() {
-	// check if there is a destructor
+	// Check if there is an aggregate with a destructor
 	bool has_destructor = false;
 	for (auto &aggr : layout.GetAggregates()) {
 		if (aggr.function.destructor) {
@@ -122,32 +98,25 @@ void GroupedAggregateHashTable::Destroy() {
 	if (!has_destructor) {
 		return;
 	}
-	// there are aggregates with destructors: loop over the hash table
-	// and call the destructor method for each of the aggregates
-	data_ptr_t data_pointers[STANDARD_VECTOR_SIZE];
-	Vector state_vector(LogicalType::POINTER, (data_ptr_t)data_pointers);
-	idx_t count = 0;
-	RowOperationsState state(aggregate_allocator.GetAllocator());
-	PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) {
-		data_pointers[count++] = ptr;
-		if (count == STANDARD_VECTOR_SIZE) {
-			RowOperations::DestroyStates(state, layout, state_vector, count);
-			count = 0;
-		}
-	});
-	RowOperations::DestroyStates(state, layout, state_vector, count);
+	// There are aggregates with destructors: Call the destructor for each of the aggregates
+	RowOperationsState state(aggregate_allocator->GetAllocator());
+	TupleDataChunkIterator iterator(*data_collection, TupleDataPinProperties::DESTROY_AFTER_DONE, false);
+	auto &row_locations = iterator.GetChunkState().row_locations;
+	do {
+		RowOperations::DestroyStates(state, layout, row_locations, iterator.GetCurrentChunkCount());
+	} while (iterator.Next());
+	data_collection->Reset();
 }
 template <class ENTRY>
 void GroupedAggregateHashTable::VerifyInternal() {
 	auto hashes_ptr = (ENTRY *)hashes_hdl_ptr;
-	D_ASSERT(payload_hds.size() == payload_hds_ptrs.size());
 	idx_t count = 0;
 	for (idx_t i = 0; i < capacity; i++) {
 		if (hashes_ptr[i].page_nr > 0) {
 			D_ASSERT(hashes_ptr[i].page_offset < tuples_per_block);
-			D_ASSERT(hashes_ptr[i].page_nr <= payload_hds.size());
+			D_ASSERT(hashes_ptr[i].page_nr <= payload_hds_ptrs.size());
 			auto ptr = payload_hds_ptrs[hashes_ptr[i].page_nr - 1] + ((hashes_ptr[i].page_offset) * tuple_size);
 			auto hash = Load<hash_t>(ptr + hash_offset);
 			D_ASSERT((hashes_ptr[i].salt) == (hash >> hash_prefix_shift));
@@ -156,7 +125,7 @@ void GroupedAggregateHashTable::VerifyInternal() {
 		}
 	}
 	(void)count;
-	D_ASSERT(count == entries);
+	D_ASSERT(count == Count());
 }
 idx_t GroupedAggregateHashTable::InitialCapacity() {
@@ -202,47 +171,65 @@ void GroupedAggregateHashTable::Verify() {
 template <class ENTRY>
 void GroupedAggregateHashTable::Resize(idx_t size) {
-	Verify();
 	D_ASSERT(!is_finalized);
+	D_ASSERT(size >= STANDARD_VECTOR_SIZE);
+	D_ASSERT(IsPowerOfTwo(size));
-	if (size <= capacity) {
+	if (size < capacity) {
 		throw InternalException("Cannot downsize a hash table!");
 	}
-	D_ASSERT(size >= STANDARD_VECTOR_SIZE);
-	// size needs to be a power of 2
-	D_ASSERT((size & (size - 1)) == 0);
-	bitmask = size - 1;
+	capacity = size;
-	auto byte_size = size * sizeof(ENTRY);
+	bitmask = capacity - 1;
+	const auto byte_size = capacity * sizeof(ENTRY);
 	if (byte_size > (idx_t)Storage::BLOCK_SIZE) {
 		hashes_hdl = buffer_manager.Allocate(byte_size);
 		hashes_hdl_ptr = hashes_hdl.Ptr();
 	}
 	memset(hashes_hdl_ptr, 0, byte_size);
-	capacity = size;
-	auto hashes_arr = (ENTRY *)hashes_hdl_ptr;
+	if (Count() != 0) {
+		D_ASSERT(!payload_hds_ptrs.empty());
+		auto hashes_arr = (ENTRY *)hashes_hdl_ptr;
+		idx_t block_id = 0;
+		auto block_pointer = payload_hds_ptrs[block_id];
+		auto block_end = block_pointer + tuples_per_block * tuple_size;
+		TupleDataChunkIterator iterator(*data_collection, TupleDataPinProperties::ALREADY_PINNED, false);
+		const auto row_locations = iterator.GetRowLocations();
+		do {
+			for (idx_t i = 0; i < iterator.GetCurrentChunkCount(); i++) {
+				const auto &row_location = row_locations[i];
+				if (row_location > block_end || row_location < block_pointer) {
+					block_id++;
+					D_ASSERT(block_id < payload_hds_ptrs.size());
+					block_pointer = payload_hds_ptrs[block_id];
+					block_end = block_pointer + tuples_per_block * tuple_size;
+				}
+				D_ASSERT(row_location >= block_pointer && row_location < block_end);
+				D_ASSERT((row_location - block_pointer) % tuple_size == 0);
+				const auto hash = Load<hash_t>(row_location + hash_offset);
+				D_ASSERT((hash & bitmask) == (hash % capacity));
+				D_ASSERT(hash >> hash_prefix_shift <= NumericLimits<uint16_t>::Maximum());
+				auto entry_idx = (idx_t)hash & bitmask;
+				while (hashes_arr[entry_idx].page_nr > 0) {
+					entry_idx++;
+					if (entry_idx >= capacity) {
+						entry_idx = 0;
+					}
+				}
-	PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) {
-		auto hash = Load<hash_t>(ptr + hash_offset);
-		D_ASSERT((hash & bitmask) == (hash % capacity));
-		auto entry_idx = (idx_t)hash & bitmask;
-		while (hashes_arr[entry_idx].page_nr > 0) {
-			entry_idx++;
-			if (entry_idx >= capacity) {
-				entry_idx = 0;
+				auto &ht_entry = hashes_arr[entry_idx];
+				D_ASSERT(!ht_entry.page_nr);
+				ht_entry.salt = hash >> hash_prefix_shift;
+				ht_entry.page_nr = block_id + 1;
+				ht_entry.page_offset = (row_location - block_pointer) / tuple_size;
 			}
-		}
-		D_ASSERT(!hashes_arr[entry_idx].page_nr);
-		D_ASSERT(hash >> hash_prefix_shift <= NumericLimits<uint16_t>::Maximum());
-		hashes_arr[entry_idx].salt = hash >> hash_prefix_shift;
-		hashes_arr[entry_idx].page_nr = page_nr + 1;
-		hashes_arr[entry_idx].page_offset = page_offset;
-	});
+		} while (iterator.Next());
+	}
 	Verify();
 }
@@ -272,26 +259,25 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
 idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
                                           DataChunk &payload, const vector<idx_t> &filter) {
 	D_ASSERT(!is_finalized);
 	if (groups.size() == 0) {
 		return 0;
 	}
+#ifdef DEBUG
 	D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount());
 	for (idx_t i = 0; i < groups.ColumnCount(); i++) {
 		D_ASSERT(groups.GetTypes()[i] == layout.GetTypes()[i]);
 	}
+#endif
 	auto new_group_count = FindOrCreateGroups(state, groups, group_hashes, state.addresses, state.new_groups);
 	VectorOperations::AddInPlace(state.addresses, layout.GetAggrOffset(), payload.size());
-	// now every cell has an entry
-	// update the aggregates
-	idx_t payload_idx = 0;
+	// Now every cell has an entry, update the aggregates
 	auto &aggregates = layout.GetAggregates();
 	idx_t filter_idx = 0;
-	RowOperationsState row_state(aggregate_allocator.GetAllocator());
+	idx_t payload_idx = 0;
+	RowOperationsState row_state(aggregate_allocator->GetAllocator());
 	for (idx_t i = 0; i < aggregates.size(); i++) {
 		auto &aggr = aggregates[i];
 		if (filter_idx >= filter.size() || i < filter[filter_idx]) {
@@ -309,7 +295,7 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
 			RowOperations::UpdateStates(row_state, aggr, state.addresses, payload, payload_idx, payload.size());
 		}
-		// move to the next aggregate
+		// Move to the next aggregate
 		payload_idx += aggr.child_count;
 		VectorOperations::AddInPlace(state.addresses, aggr.payload_size, payload.size());
 		filter_idx++;
@@ -336,7 +322,7 @@ void GroupedAggregateHashTable::FetchAggregates(DataChunk &groups, DataChunk &re
 	Vector addresses(LogicalType::POINTER);
 	FindOrCreateGroups(append_state, groups, addresses);
 	// now fetch the aggregates
-	RowOperationsState row_state(aggregate_allocator.GetAllocator());
+	RowOperationsState row_state(aggregate_allocator->GetAllocator());
 	RowOperations::FinalizeStates(row_state, layout, addresses, result, 0);
 }
@@ -346,42 +332,39 @@ idx_t GroupedAggregateHashTable::ResizeThreshold() {
 template <class ENTRY>
 idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups,
-                                                            Vector &group_hashes, Vector &addresses,
+                                                            Vector &group_hashes_v, Vector &addresses_v,
                                                             SelectionVector &new_groups_out) {
 	D_ASSERT(!is_finalized);
+	D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount());
+	D_ASSERT(group_hashes_v.GetType() == LogicalType::HASH);
+	D_ASSERT(state.ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
+	D_ASSERT(state.ht_offsets.GetType() == LogicalType::BIGINT);
+	D_ASSERT(addresses_v.GetType() == LogicalType::POINTER);
+	D_ASSERT(state.hash_salts.GetType() == LogicalType::SMALLINT);
-	if (entries + groups.size() > MaxCapacity()) {
+	if (Count() + groups.size() > MaxCapacity()) {
 		throw InternalException("Hash table capacity reached");
 	}
-	// resize at 50% capacity, also need to fit the entire vector
-	if (capacity - entries <= groups.size() || entries > ResizeThreshold()) {
+	// Resize at 50% capacity, also need to fit the entire vector
+	if (capacity - Count() <= groups.size() || Count() > ResizeThreshold()) {
+		Verify();
 		Resize<ENTRY>(capacity * 2);
 	}
+	D_ASSERT(capacity - Count() >= groups.size()); // we need to be able to fit at least one vector of data
-	D_ASSERT(capacity - entries >= groups.size());
-	D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount());
-	// we need to be able to fit at least one vector of data
-	D_ASSERT(capacity - entries >= groups.size());
-	D_ASSERT(group_hashes.GetType() == LogicalType::HASH);
+	group_hashes_v.Flatten(groups.size());
+	auto group_hashes = FlatVector::GetData<hash_t>(group_hashes_v);
-	group_hashes.Flatten(groups.size());
-	auto group_hashes_ptr = FlatVector::GetData<hash_t>(group_hashes);
+	addresses_v.Flatten(groups.size());
+	auto addresses = FlatVector::GetData<data_ptr_t>(addresses_v);
-	D_ASSERT(state.ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
-	D_ASSERT(state.ht_offsets.GetType() == LogicalType::BIGINT);
-	D_ASSERT(addresses.GetType() == LogicalType::POINTER);
-	addresses.Flatten(groups.size());
-	auto addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);
-	// compute the entry in the table based on the hash using a modulo
+	// Compute the entry in the table based on the hash using a modulo,
 	// and precompute the hash salts for faster comparison below
-	D_ASSERT(state.hash_salts.GetType() == LogicalType::SMALLINT);
 	auto ht_offsets_ptr = FlatVector::GetData<uint64_t>(state.ht_offsets);
 	auto hash_salts_ptr = FlatVector::GetData<uint16_t>(state.hash_salts);
 	for (idx_t r = 0; r < groups.size(); r++) {
-		auto element = group_hashes_ptr[r];
+		auto element = group_hashes[r];
 		D_ASSERT((element & bitmask) == (element % capacity));
 		ht_offsets_ptr[r] = element & bitmask;
 		hash_salts_ptr[r] = element >> hash_prefix_shift;
@@ -389,9 +372,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
 	// we start out with all entries [0, 1, 2, ..., groups.size()]
 	const SelectionVector *sel_vector = FlatVector::IncrementalSelectionVector();
-	idx_t remaining_entries = groups.size();
-	// make a chunk that references the groups and the hashes
+	// Make a chunk that references the groups and the hashes and convert to unified format
 	if (state.group_chunk.ColumnCount() == 0) {
 		state.group_chunk.InitializeEmpty(layout.GetTypes());
 	}
@@ -399,81 +380,101 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
 	for (idx_t grp_idx = 0; grp_idx < groups.ColumnCount(); grp_idx++) {
 		state.group_chunk.data[grp_idx].Reference(groups.data[grp_idx]);
 	}
-	state.group_chunk.data[groups.ColumnCount()].Reference(group_hashes);
+	state.group_chunk.data[groups.ColumnCount()].Reference(group_hashes_v);
 	state.group_chunk.SetCardinality(groups);
 	// convert all vectors to unified format
+	if (!state.chunk_state_initialized) {
+		data_collection->InitializeAppend(state.chunk_state);
+		state.chunk_state_initialized = true;
+	}
+	TupleDataCollection::ToUnifiedFormat(state.chunk_state, state.group_chunk);
 	if (!state.group_data) {
 		state.group_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[state.group_chunk.ColumnCount()]);
 	}
-	for (idx_t col_idx = 0; col_idx < state.group_chunk.ColumnCount(); col_idx++) {
-		state.group_chunk.data[col_idx].ToUnifiedFormat(state.group_chunk.size(), state.group_data[col_idx]);
-	}
+	TupleDataCollection::GetVectorData(state.chunk_state, state.group_data.get());
 	idx_t new_group_count = 0;
+	idx_t remaining_entries = groups.size();
 	while (remaining_entries > 0) {
 		idx_t new_entry_count = 0;
 		idx_t need_compare_count = 0;
 		idx_t no_match_count = 0;
-		// first figure out for each remaining whether or not it belongs to a full or empty group
+		// For each remaining entry, figure out whether or not it belongs to a full or empty group
 		for (idx_t i = 0; i < remaining_entries; i++) {
 			const idx_t index = sel_vector->get_index(i);
-			const auto ht_entry_ptr = ((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index];
-			if (ht_entry_ptr->page_nr == 0) { // we use page number 0 as a "unused marker"
-				// cell is empty; setup the new entry
-				if (payload_page_offset == tuples_per_block || payload_hds.empty()) {
-					NewBlock();
-				}
-				auto entry_payload_ptr = payload_hds_ptrs.back() + (payload_page_offset * tuple_size);
+			auto &ht_entry = *(((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]);
+			if (ht_entry.page_nr == 0) { // Cell is unoccupied (we use page number 0 as a "unused marker")
+				D_ASSERT(group_hashes[index] >> hash_prefix_shift <= NumericLimits<uint16_t>::Maximum());
+				D_ASSERT(payload_hds_ptrs.size() < NumericLimits<uint32_t>::Maximum());
-				D_ASSERT(group_hashes_ptr[index] >> hash_prefix_shift <= NumericLimits<uint16_t>::Maximum());
-				D_ASSERT(payload_page_offset < tuples_per_block);
-				D_ASSERT(payload_hds.size() < NumericLimits<uint32_t>::Maximum());
-				D_ASSERT(payload_page_offset + 1 < NumericLimits<uint16_t>::Maximum());
+				// Set page nr to 1 for now to mark it as occupied (will be corrected later) and set the salt
+				ht_entry.page_nr = 1;
+				ht_entry.salt = group_hashes[index] >> hash_prefix_shift;
-				ht_entry_ptr->salt = group_hashes_ptr[index] >> hash_prefix_shift;
-				// page numbers start at one so we can use 0 as empty flag
-				// GetPtr undoes this
-				ht_entry_ptr->page_nr = payload_hds.size();
-				ht_entry_ptr->page_offset = payload_page_offset++;
-				// update selection lists for outer loops
+				// Update selection lists for outer loops
 				state.empty_vector.set_index(new_entry_count++, index);
 				new_groups_out.set_index(new_group_count++, index);
-				entries++;
-				addresses_ptr[index] = entry_payload_ptr;
-			} else {
-				// cell is occupied: add to check list
-				// only need to check if hash salt in ptr == prefix of hash in payload
-				if (ht_entry_ptr->salt == hash_salts_ptr[index]) {
+			} else { // Cell is occupied: Compare salts
+				if (ht_entry.salt == hash_salts_ptr[index]) {
 					state.group_compare_vector.set_index(need_compare_count++, index);
-					auto page_ptr = payload_hds_ptrs[ht_entry_ptr->page_nr - 1];
-					auto page_offset = ht_entry_ptr->page_offset * tuple_size;
-					addresses_ptr[index] = page_ptr + page_offset;
 				} else {
 					state.no_match_vector.set_index(no_match_count++, index);
 				}
 			}
 		}
-		// for each of the locations that are empty, serialize the group columns to the locations
-		RowOperations::Scatter(state.group_chunk, state.group_data.get(), layout, addresses, *string_heap,
-		                       state.empty_vector, new_entry_count);
-		RowOperations::InitializeStates(layout, addresses, state.empty_vector, new_entry_count);
+		if (new_entry_count != 0) {
+			// Append everything that belongs to an empty group
+			data_collection->AppendUnified(td_pin_state, state.chunk_state, state.group_chunk, state.empty_vector,
+			                               new_entry_count);
+			RowOperations::InitializeStates(layout, state.chunk_state.row_locations,
+			                                *FlatVector::IncrementalSelectionVector(), new_entry_count);
+			// Get the pointers to the (possibly) newly created blocks of the data collection
+			idx_t block_id = payload_hds_ptrs.empty() ? 0 : payload_hds_ptrs.size() - 1;
+			UpdateBlockPointers();
+			auto block_pointer = payload_hds_ptrs[block_id];
+			auto block_end = block_pointer + tuples_per_block * tuple_size;
+			// Set the page nrs/offsets in the 1st part of the HT now that the data has been appended
+			const auto row_locations = FlatVector::GetData<data_ptr_t>(state.chunk_state.row_locations);
+			for (idx_t new_entry_idx = 0; new_entry_idx < new_entry_count; new_entry_idx++) {
+				const auto &row_location = row_locations[new_entry_idx];
+				if (row_location > block_end || row_location < block_pointer) {
+					block_id++;
+					D_ASSERT(block_id < payload_hds_ptrs.size());
+					block_pointer = payload_hds_ptrs[block_id];
+					block_end = block_pointer + tuples_per_block * tuple_size;
+				}
+				D_ASSERT(row_location >= block_pointer && row_location < block_end);
+				D_ASSERT((row_location - block_pointer) % tuple_size == 0);
+				const auto index = state.empty_vector.get_index(new_entry_idx);
+				auto &ht_entry = *(((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]);
+				ht_entry.page_nr = block_id + 1;
+				ht_entry.page_offset = (row_location - block_pointer) / tuple_size;
+				addresses[index] = row_location;
+			}
+		}
+		if (need_compare_count != 0) {
+			// Get the pointers to the rows that need to be compared
+			for (idx_t need_compare_idx = 0; need_compare_idx < need_compare_count; need_compare_idx++) {
+				const auto index = state.group_compare_vector.get_index(need_compare_idx);
+				const auto &ht_entry = *(((ENTRY *)this->hashes_hdl_ptr) + ht_offsets_ptr[index]);
+				auto page_ptr = payload_hds_ptrs[ht_entry.page_nr - 1];
+				auto page_offset = ht_entry.page_offset * tuple_size;
+				addresses[index] = page_ptr + page_offset;
+			}
-		// now we have only the tuples remaining that might match to an existing group
-		// start performing comparisons with each of the groups
-		RowOperations::Match(state.group_chunk, state.group_data.get(), layout, addresses, predicates,
-		                     state.group_compare_vector, need_compare_count, &state.no_match_vector, no_match_count);
+			// Perform group comparisons
+			RowOperations::Match(state.group_chunk, state.group_data.get(), layout, addresses_v, predicates,
+			                     state.group_compare_vector, need_compare_count, &state.no_match_vector,
+			                     no_match_count);
+		}
-		// each of the entries that do not match we move them to the next entry in the HT
+		// Linear probing: each of the entries that do not match move to the next entry in the HT
 		for (idx_t i = 0; i < no_match_count; i++) {
 			idx_t index = state.no_match_vector.get_index(i);
 			ht_offsets_ptr[index]++;
@@ -488,6 +489,17 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
 	return new_group_count;
 }
+void GroupedAggregateHashTable::UpdateBlockPointers() {
+	for (const auto &id_and_handle : td_pin_state.row_handles) {
+		const auto &id = id_and_handle.first;
+		const auto &handle = id_and_handle.second;
+		if (payload_hds_ptrs.empty() || id > payload_hds_ptrs.size() - 1) {
+			payload_hds_ptrs.resize(id + 1);
+		}
+		payload_hds_ptrs[id] = handle.Ptr();
+	}
+}
 // this is to support distinct aggregations where we need to record whether we
 // have already seen a value for a group
 idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
@@ -517,37 +529,44 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &stat
 }
 struct FlushMoveState {
-	FlushMoveState(Allocator &allocator, RowLayout &layout)
-	    : new_groups(STANDARD_VECTOR_SIZE), group_addresses(LogicalType::POINTER),
+	explicit FlushMoveState(TupleDataCollection &collection_p)
+	    : collection(collection_p), hashes(LogicalType::HASH), group_addresses(LogicalType::POINTER),
 	      new_groups_sel(STANDARD_VECTOR_SIZE) {
-		vector<LogicalType> group_types(layout.GetTypes().begin(), layout.GetTypes().end() - 1);
-		groups.Initialize(allocator, group_types);
+		const auto &layout = collection.GetLayout();
+		vector<column_t> column_ids;
+		column_ids.reserve(layout.ColumnCount() - 1);
+		for (idx_t col_idx = 0; col_idx < layout.ColumnCount() - 1; col_idx++) {
+			column_ids.emplace_back(col_idx);
+		}
+		// FIXME DESTROY_AFTER_DONE if we make it possible to pass a selection vector to RowOperations::DestroyStates?
+		collection.InitializeScan(scan_state, column_ids, TupleDataPinProperties::UNPIN_AFTER_DONE);
+		collection.InitializeScanChunk(scan_state, groups);
+		hash_col_idx = layout.ColumnCount() - 1;
 	}
+	bool Scan();
+	TupleDataCollection &collection;
+	TupleDataScanState scan_state;
 	DataChunk groups;
-	SelectionVector new_groups;
+	idx_t hash_col_idx;
+	Vector hashes;
+	AggregateHTAppendState append_state;
 	Vector group_addresses;
 	SelectionVector new_groups_sel;
 };
-void GroupedAggregateHashTable::FlushMove(FlushMoveState &state, Vector &source_addresses, Vector &source_hashes,
-                                          idx_t count) {
-	D_ASSERT(source_addresses.GetType() == LogicalType::POINTER);
-	D_ASSERT(source_hashes.GetType() == LogicalType::HASH);
-	state.groups.Reset();
-	state.groups.SetCardinality(count);
-	for (idx_t col_no = 0; col_no < state.groups.ColumnCount(); col_no++) {
-		auto &column = state.groups.data[col_no];
-		RowOperations::Gather(source_addresses, *FlatVector::IncrementalSelectionVector(), column,
-		                      *FlatVector::IncrementalSelectionVector(), count, layout, col_no);
+bool FlushMoveState::Scan() {
+	if (collection.Scan(scan_state, groups)) {
+		collection.Gather(scan_state.chunk_state.row_locations, *FlatVector::IncrementalSelectionVector(),
+		                  groups.size(), hash_col_idx, hashes, *FlatVector::IncrementalSelectionVector());
+		return true;
 	}
-	AggregateHTAppendState append_state;
-	FindOrCreateGroups(append_state, state.groups, source_hashes, state.group_addresses, state.new_groups_sel);
-	RowOperationsState row_state(aggregate_allocator.GetAllocator());
-	RowOperations::CombineStates(row_state, layout, source_addresses, state.group_addresses, count);
+	collection.FinalizePinState(scan_state.pin_state);
+	return false;
 }
 void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) {
@@ -556,127 +575,67 @@ void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) {
 	D_ASSERT(other.layout.GetAggrWidth() == layout.GetAggrWidth());
 	D_ASSERT(other.layout.GetDataWidth() == layout.GetDataWidth());
 	D_ASSERT(other.layout.GetRowWidth() == layout.GetRowWidth());
-	D_ASSERT(other.tuples_per_block == tuples_per_block);
-	if (other.entries == 0) {
+	if (other.Count() == 0) {
 		return;
 	}
-	Vector addresses(LogicalType::POINTER);
-	auto addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);
-	Vector hashes(LogicalType::HASH);
-	auto hashes_ptr = FlatVector::GetData<hash_t>(hashes);
-	idx_t group_idx = 0;
-	FlushMoveState state(allocator, layout);
-	other.PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) {
-		auto hash = Load<hash_t>(ptr + hash_offset);
+	FlushMoveState state(*other.data_collection);
+	RowOperationsState row_state(aggregate_allocator->GetAllocator());
+	while (state.Scan()) {
+		FindOrCreateGroups(state.append_state, state.groups, state.hashes, state.group_addresses, state.new_groups_sel);
+		RowOperations::CombineStates(row_state, layout, state.scan_state.chunk_state.row_locations,
+		                             state.group_addresses, state.groups.size());
+	}
-		hashes_ptr[group_idx] = hash;
-		addresses_ptr[group_idx] = ptr;
-		group_idx++;
-		if (group_idx == STANDARD_VECTOR_SIZE) {
-			FlushMove(state, addresses, hashes, group_idx);
-			group_idx = 0;
-		}
-	});
-	FlushMove(state, addresses, hashes, group_idx);
-	string_heap->Merge(*other.string_heap);
 	Verify();
 }
-struct PartitionInfo {
-	PartitionInfo() : addresses(LogicalType::POINTER), hashes(LogicalType::HASH), group_count(0) {
-		addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);
-		hashes_ptr = FlatVector::GetData<hash_t>(hashes);
-	};
-	Vector addresses;
-	Vector hashes;
-	idx_t group_count;
-	data_ptr_t *addresses_ptr;
-	hash_t *hashes_ptr;
-};
-void GroupedAggregateHashTable::Partition(vector<GroupedAggregateHashTable *> &partition_hts, hash_t mask,
-                                          idx_t shift) {
-	D_ASSERT(partition_hts.size() > 1);
-	vector<PartitionInfo> partition_info(partition_hts.size());
-	FlushMoveState state(allocator, layout);
-	PayloadApply([&](idx_t page_nr, idx_t page_offset, data_ptr_t ptr) {
-		auto hash = Load<hash_t>(ptr + hash_offset);
-		idx_t partition = (hash & mask) >> shift;
-		D_ASSERT(partition < partition_hts.size());
-		auto &info = partition_info[partition];
-		info.hashes_ptr[info.group_count] = hash;
-		info.addresses_ptr[info.group_count] = ptr;
-		info.group_count++;
-		if (info.group_count == STANDARD_VECTOR_SIZE) {
-			D_ASSERT(partition_hts[partition]);
-			partition_hts[partition]->FlushMove(state, info.addresses, info.hashes, info.group_count);
-			info.group_count = 0;
-		}
-	});
-	idx_t info_idx = 0;
-	idx_t total_count = 0;
-	for (auto &partition_entry : partition_hts) {
-		auto &info = partition_info[info_idx++];
-		partition_entry->FlushMove(state, info.addresses, info.hashes, info.group_count);
-		partition_entry->string_heap->Merge(*string_heap);
-		partition_entry->Verify();
-		total_count += partition_entry->Size();
+void GroupedAggregateHashTable::Partition(vector<GroupedAggregateHashTable *> &partition_hts, idx_t radix_bits) {
+	const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
+	D_ASSERT(partition_hts.size() == num_partitions);
+	// Partition the data
+	auto partitioned_data =
+	    make_uniq<RadixPartitionedTupleData>(buffer_manager, layout, radix_bits, layout.ColumnCount() - 1);
+	partitioned_data->Partition(*data_collection, TupleDataPinProperties::KEEP_EVERYTHING_PINNED);
+	D_ASSERT(partitioned_data->GetPartitions().size() == num_partitions);
+	// Move the partitioned data collections to the partitioned hash tables and initialize the 1st part of the HT
+	auto &partitions = partitioned_data->GetPartitions();
+	for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
+		auto &partition_ht = *partition_hts[partition_idx];
+		partition_ht.data_collection = std::move(partitions[partition_idx]);
+		partition_ht.aggregate_allocator = aggregate_allocator;
+		partition_ht.InitializeFirstPart();
+		partition_ht.Verify();
 	}
-	(void)total_count;
-	D_ASSERT(total_count == entries);
 }
-idx_t GroupedAggregateHashTable::Scan(AggregateHTScanState &scan_state, DataChunk &result) {
-	idx_t this_n;
-	Vector addresses(LogicalType::POINTER);
-	auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
-	{
-		lock_guard<mutex> l(scan_state.lock);
-		if (scan_state.scan_position >= entries) {
-			return 0;
-		}
-		auto remaining = entries - scan_state.scan_position;
-		this_n = MinValue((idx_t)STANDARD_VECTOR_SIZE, remaining);
-		auto chunk_idx = scan_state.scan_position / tuples_per_block;
-		auto chunk_offset = (scan_state.scan_position % tuples_per_block) * tuple_size;
-		D_ASSERT(chunk_offset + tuple_size <= Storage::BLOCK_SIZE);
-		auto read_ptr = payload_hds_ptrs[chunk_idx++];
-		for (idx_t i = 0; i < this_n; i++) {
-			data_pointers[i] = read_ptr + chunk_offset;
-			chunk_offset += tuple_size;
-			if (chunk_offset >= tuples_per_block * tuple_size) {
-				read_ptr = payload_hds_ptrs[chunk_idx++];
-				chunk_offset = 0;
-			}
-		}
-		scan_state.scan_position += this_n;
+void GroupedAggregateHashTable::InitializeFirstPart() {
+	data_collection->GetBlockPointers(payload_hds_ptrs);
+	auto size = MaxValue<idx_t>(NextPowerOfTwo(Count() * 2L), capacity);
+	switch (entry_type) {
+	case HtEntryType::HT_WIDTH_64:
+		Resize<aggr_ht_entry_64>(size);
+		break;
+	case HtEntryType::HT_WIDTH_32:
+		Resize<aggr_ht_entry_32>(size);
+		break;
+	default:
+		throw InternalException("Unknown HT entry width");
 	}
+}
-	result.SetCardinality(this_n);
-	// fetch the group columns (ignoring the final hash column
+idx_t GroupedAggregateHashTable::Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate,
+                                      DataChunk &result) {
+	data_collection->Scan(gstate, lstate, result);
+	RowOperationsState row_state(aggregate_allocator->GetAllocator());
 	const auto group_cols = layout.ColumnCount() - 1;
-	for (idx_t col_no = 0; col_no < group_cols; col_no++) {
-		auto &column = result.data[col_no];
-		RowOperations::Gather(addresses, *FlatVector::IncrementalSelectionVector(), column,
-		                      *FlatVector::IncrementalSelectionVector(), result.size(), layout, col_no);
-	}
+	RowOperations::FinalizeStates(row_state, layout, lstate.chunk_state.row_locations, result, group_cols);
-	RowOperationsState row_state(aggregate_allocator.GetAllocator());
-	RowOperations::FinalizeStates(row_state, layout, addresses, result, group_cols);
-	return this_n;
+	return result.size();
 }
 void GroupedAggregateHashTable::Finalize() {
@@ -684,8 +643,11 @@ void GroupedAggregateHashTable::Finalize() {
 		return;
 	}
-	// early release hashes, not needed for partition/scan
+	// Early release hashes (not needed for partition/scan) and data collection (will be pinned again when scanning)
 	hashes_hdl.Destroy();
+	data_collection->FinalizePinState(td_pin_state);
+	data_collection->Unpin();
 	is_finalized = true;
 }