npm - duckdb - Versions diffs - 0.8.2-dev2356.0 → 0.8.2-dev2509.0 - Mend

duckdb 0.8.2-dev2356.0 → 0.8.2-dev2509.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp CHANGED Viewed

@@ -1,14 +1,62 @@
 #include "duckdb/core_functions/scalar/list_functions.hpp"
-#include "duckdb/common/pair.hpp"
 #include "duckdb/common/string_util.hpp"
+#include "duckdb/common/swap.hpp"
 #include "duckdb/common/types/data_chunk.hpp"
 #include "duckdb/function/scalar/nested_functions.hpp"
 #include "duckdb/function/scalar/string_functions.hpp"
-#include "duckdb/parser/expression/bound_expression.hpp"
 #include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "duckdb/planner/expression/bound_constant_expression.hpp"
 namespace duckdb {
+struct ListSliceBindData : public FunctionData {
+	ListSliceBindData(const LogicalType &return_type_p, bool begin_is_empty_p, bool end_is_empty_p)
+	    : return_type(return_type_p), begin_is_empty(begin_is_empty_p), end_is_empty(end_is_empty_p) {
+	}
+	~ListSliceBindData() override;
+	LogicalType return_type;
+	bool begin_is_empty;
+	bool end_is_empty;
+public:
+	bool Equals(const FunctionData &other_p) const override;
+	unique_ptr<FunctionData> Copy() const override;
+};
+ListSliceBindData::~ListSliceBindData() {
+}
+bool ListSliceBindData::Equals(const FunctionData &other_p) const {
+	auto &other = other_p.Cast<ListSliceBindData>();
+	return return_type == other.return_type && begin_is_empty == other.begin_is_empty &&
+	       end_is_empty == other.end_is_empty;
+}
+unique_ptr<FunctionData> ListSliceBindData::Copy() const {
+	return make_uniq<ListSliceBindData>(return_type, begin_is_empty, end_is_empty);
+}
+template <typename INDEX_TYPE>
+static int CalculateSliceLength(idx_t begin, idx_t end, INDEX_TYPE step, bool svalid) {
+	if (step < 0) {
+		step = abs(step);
+	}
+	if (step == 0 && svalid) {
+		throw InvalidInputException("Slice step cannot be zero");
+	}
+	if (step == 1) {
+		return end - begin;
+	} else if (static_cast<idx_t>(step) >= (end - begin)) {
+		return 1;
+	}
+	if ((end - begin) % step != 0) {
+		return (end - begin) / step + 1;
+	}
+	return (end - begin) / step;
+}
 template <typename INPUT_TYPE, typename INDEX_TYPE>
 INDEX_TYPE ValueLength(const INPUT_TYPE &value) {
 	return 0;
@@ -20,33 +68,44 @@ int64_t ValueLength(const list_entry_t &value) {
 }
 template <>
-int32_t ValueLength(const string_t &value) {
-	return LengthFun::Length<string_t, int32_t>(value);
+int64_t ValueLength(const string_t &value) {
+	return LengthFun::Length<string_t, int64_t>(value);
 }
 template <typename INPUT_TYPE, typename INDEX_TYPE>
-bool ClampIndex(INDEX_TYPE &index, const INPUT_TYPE &value) {
-	const auto length = ValueLength<INPUT_TYPE, INDEX_TYPE>(value);
+static void ClampIndex(INDEX_TYPE &index, const INPUT_TYPE &value, const INDEX_TYPE length, bool is_min) {
 	if (index < 0) {
-		if (index < -length) {
-			return false;
-		}
+		index = (!is_min) ? index + 1 : index;
 		index = length + index;
+		return;
 	} else if (index > length) {
 		index = length;
 	}
-	return true;
+	return;
 }
 template <typename INPUT_TYPE, typename INDEX_TYPE>
-static bool ClampSlice(const INPUT_TYPE &value, INDEX_TYPE &begin, INDEX_TYPE &end, bool begin_valid, bool end_valid) {
+static bool ClampSlice(const INPUT_TYPE &value, INDEX_TYPE &begin, INDEX_TYPE &end) {
 	// Clamp offsets
-	begin = begin_valid ? begin : 0;
-	begin = (begin > 0) ? begin - 1 : begin;
-	end = end_valid ? end : ValueLength<INPUT_TYPE, INDEX_TYPE>(value);
-	if (!ClampIndex(begin, value) || !ClampIndex(end, value)) {
-		return false;
+	begin = (begin != 0 && begin != (INDEX_TYPE)NumericLimits<int64_t>::Minimum()) ? begin - 1 : begin;
+	bool is_min = false;
+	if (begin == (INDEX_TYPE)NumericLimits<int64_t>::Minimum()) {
+		begin++;
+		is_min = true;
+	}
+	const auto length = ValueLength<INPUT_TYPE, INDEX_TYPE>(value);
+	if (begin < 0 && -begin > length && end < 0 && -end > length) {
+		begin = 0;
+		end = 0;
+		return true;
+	}
+	if (begin < 0 && -begin > length) {
+		begin = 0;
 	}
+	ClampIndex(begin, value, length, is_min);
+	ClampIndex(end, value, length, false);
 	end = MaxValue<INDEX_TYPE>(begin, end);
 	return true;
@@ -65,108 +124,262 @@ list_entry_t SliceValue(Vector &result, list_entry_t input, int64_t begin, int64
 }
 template <>
-string_t SliceValue(Vector &result, string_t input, int32_t begin, int32_t end) {
+string_t SliceValue(Vector &result, string_t input, int64_t begin, int64_t end) {
 	// one-based - zero has strange semantics
 	return SubstringFun::SubstringUnicode(result, input, begin + 1, end - begin);
 }
 template <typename INPUT_TYPE, typename INDEX_TYPE>
-static void ExecuteSlice(Vector &result, Vector &s, Vector &b, Vector &e, const idx_t count) {
-	if (result.GetVectorType() == VectorType::CONSTANT_VECTOR) {
-		auto rdata = ConstantVector::GetData<INPUT_TYPE>(result);
-		auto sdata = ConstantVector::GetData<INPUT_TYPE>(s);
-		auto bdata = ConstantVector::GetData<INDEX_TYPE>(b);
-		auto edata = ConstantVector::GetData<INDEX_TYPE>(e);
-		auto sliced = sdata[0];
-		auto begin = bdata[0];
-		auto end = edata[0];
-		auto svalid = !ConstantVector::IsNull(s);
-		auto bvalid = !ConstantVector::IsNull(b);
-		auto evalid = !ConstantVector::IsNull(e);
-		// Try to slice
-		if (!svalid || !ClampSlice(sliced, begin, end, bvalid, evalid)) {
-			ConstantVector::SetNull(result, true);
-		} else {
-			rdata[0] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
-		}
+INPUT_TYPE SliceValueWithSteps(Vector &result, SelectionVector &sel, INPUT_TYPE input, INDEX_TYPE begin, INDEX_TYPE end,
+                               INDEX_TYPE step, idx_t &sel_idx) {
+	return input;
+}
+template <>
+list_entry_t SliceValueWithSteps(Vector &result, SelectionVector &sel, list_entry_t input, int64_t begin, int64_t end,
+                                 int64_t step, idx_t &sel_idx) {
+	if (end - begin == 0) {
+		input.length = 0;
+		input.offset = sel_idx;
+		return input;
+	}
+	input.length = CalculateSliceLength(begin, end, step, true);
+	idx_t child_idx = input.offset + begin;
+	if (step < 0) {
+		child_idx = input.offset + end - 1;
+	}
+	input.offset = sel_idx;
+	for (idx_t i = 0; i < input.length; i++) {
+		sel.set_index(sel_idx, child_idx);
+		child_idx += step;
+		sel_idx++;
+	}
+	return input;
+}
+template <typename INPUT_TYPE, typename INDEX_TYPE>
+static void ExecuteConstantSlice(Vector &result, Vector &str_vector, Vector &begin_vector, Vector &end_vector,
+                                 optional_ptr<Vector> step_vector, const idx_t count, SelectionVector &sel,
+                                 idx_t &sel_idx, optional_ptr<Vector> result_child_vector, bool begin_is_empty,
+                                 bool end_is_empty) {
+	auto result_data = ConstantVector::GetData<INPUT_TYPE>(result);
+	auto str_data = ConstantVector::GetData<INPUT_TYPE>(str_vector);
+	auto begin_data = ConstantVector::GetData<INDEX_TYPE>(begin_vector);
+	auto end_data = ConstantVector::GetData<INDEX_TYPE>(end_vector);
+	auto step_data = step_vector ? ConstantVector::GetData<INDEX_TYPE>(*step_vector) : nullptr;
+	auto str = str_data[0];
+	auto begin = begin_is_empty ? 0 : begin_data[0];
+	auto end = end_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(str) : end_data[0];
+	auto step = step_data ? step_data[0] : 1;
+	if (step < 0) {
+		swap(begin, end);
+		begin = end_is_empty ? 0 : begin;
+		end = begin_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(str) : end;
+	}
+	auto str_valid = !ConstantVector::IsNull(str_vector);
+	auto begin_valid = !ConstantVector::IsNull(begin_vector);
+	auto end_valid = !ConstantVector::IsNull(end_vector);
+	auto step_valid = step_vector && !ConstantVector::IsNull(*step_vector);
+	// Clamp offsets
+	bool clamp_result = false;
+	if (str_valid && begin_valid && end_valid && (step_valid || step == 1)) {
+		clamp_result = ClampSlice(str, begin, end);
+	}
+	auto sel_length = 0;
+	if (step_vector && step_valid && str_valid && begin_valid && end_valid && step != 1 && end - begin > 0) {
+		sel_length = CalculateSliceLength(begin, end, step, step_valid);
+		sel.Initialize(sel_length);
+	}
+	// Try to slice
+	if (!str_valid || !begin_valid || !end_valid || (step_vector && !step_valid) || !clamp_result) {
+		ConstantVector::SetNull(result, true);
+	} else if (step == 1) {
+		result_data[0] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, str, begin, end);
 	} else {
-		UnifiedVectorFormat sdata, bdata, edata;
+		result_data[0] = SliceValueWithSteps<INPUT_TYPE, INDEX_TYPE>(result, sel, str, begin, end, step, sel_idx);
+	}
+	if (step_vector && step != 0 && end - begin > 0) {
+		result_child_vector->Slice(sel, sel_length);
+	}
+}
+template <typename INPUT_TYPE, typename INDEX_TYPE>
+static void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_vector, Vector &end_vector,
+                             optional_ptr<Vector> step_vector, const idx_t count, SelectionVector &sel, idx_t &sel_idx,
+                             optional_ptr<Vector> result_child_vector, bool begin_is_empty, bool end_is_empty) {
+	UnifiedVectorFormat list_data, begin_data, end_data, step_data;
+	idx_t sel_length = 0;
-		s.ToUnifiedFormat(count, sdata);
-		b.ToUnifiedFormat(count, bdata);
-		e.ToUnifiedFormat(count, edata);
+	list_vector.ToUnifiedFormat(count, list_data);
+	begin_vector.ToUnifiedFormat(count, begin_data);
+	end_vector.ToUnifiedFormat(count, end_data);
+	if (step_vector) {
+		step_vector->ToUnifiedFormat(count, step_data);
+		sel.Initialize(ListVector::GetListSize(list_vector));
+	}
-		auto rdata = FlatVector::GetData<INPUT_TYPE>(result);
-		auto &rmask = FlatVector::Validity(result);
+	auto result_data = FlatVector::GetData<INPUT_TYPE>(result);
+	auto &result_mask = FlatVector::Validity(result);
-		for (idx_t i = 0; i < count; ++i) {
-			auto sidx = sdata.sel->get_index(i);
-			auto bidx = bdata.sel->get_index(i);
-			auto eidx = edata.sel->get_index(i);
+	for (idx_t i = 0; i < count; ++i) {
+		auto list_idx = list_data.sel->get_index(i);
+		auto begin_idx = begin_data.sel->get_index(i);
+		auto end_idx = end_data.sel->get_index(i);
+		auto step_idx = step_vector ? step_data.sel->get_index(i) : 0;
-			auto sliced = (UnifiedVectorFormat::GetData<INPUT_TYPE>(sdata))[sidx];
-			auto begin = (UnifiedVectorFormat::GetData<INDEX_TYPE>(bdata))[bidx];
-			auto end = (UnifiedVectorFormat::GetData<INDEX_TYPE>(edata))[eidx];
+		auto sliced = reinterpret_cast<INPUT_TYPE *>(list_data.data)[list_idx];
+		auto begin = begin_is_empty ? 0 : reinterpret_cast<INDEX_TYPE *>(begin_data.data)[begin_idx];
+		auto end = end_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced)
+		                        : reinterpret_cast<INDEX_TYPE *>(end_data.data)[end_idx];
+		auto step = step_vector ? reinterpret_cast<INDEX_TYPE *>(step_data.data)[step_idx] : 1;
-			auto svalid = sdata.validity.RowIsValid(sidx);
-			auto bvalid = bdata.validity.RowIsValid(bidx);
-			auto evalid = edata.validity.RowIsValid(eidx);
+		if (step < 0) {
+			swap(begin, end);
+			begin = end_is_empty ? 0 : begin;
+			end = begin_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced) : end;
+		}
+		auto list_valid = list_data.validity.RowIsValid(list_idx);
+		auto begin_valid = begin_data.validity.RowIsValid(begin_idx);
+		auto end_valid = end_data.validity.RowIsValid(end_idx);
+		auto step_valid = step_vector && step_data.validity.RowIsValid(step_idx);
-			// Try to slice
-			if (!svalid || !ClampSlice(sliced, begin, end, bvalid, evalid)) {
-				rmask.SetInvalid(i);
-			} else {
-				rdata[i] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
-			}
+		bool clamp_result = false;
+		if (list_valid && begin_valid && end_valid && (step_valid || step == 1)) {
+			clamp_result = ClampSlice(sliced, begin, end);
 		}
+		auto length = 0;
+		if (step_vector && step_valid && list_valid && begin_valid && end_valid && end - begin > 0) {
+			length = CalculateSliceLength(begin, end, step, step_valid);
+		}
+		sel_length += length;
+		if (!list_valid || !begin_valid || !end_valid || (step_vector && !step_valid) || !clamp_result) {
+			result_mask.SetInvalid(i);
+		} else if (!step_vector) {
+			result_data[i] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
+		} else {
+			result_data[i] =
+			    SliceValueWithSteps<INPUT_TYPE, INDEX_TYPE>(result, sel, sliced, begin, end, step, sel_idx);
+		}
+	}
+	if (step_vector) {
+		SelectionVector new_sel(sel_length);
+		for (idx_t i = 0; i < sel_length; ++i) {
+			new_sel.set_index(i, sel.get_index(i));
+		}
+		result_child_vector->Slice(new_sel, sel_length);
+	}
+}
+template <typename INPUT_TYPE, typename INDEX_TYPE>
+static void ExecuteSlice(Vector &result, Vector &list_or_str_vector, Vector &begin_vector, Vector &end_vector,
+                         optional_ptr<Vector> step_vector, const idx_t count, bool begin_is_empty, bool end_is_empty) {
+	optional_ptr<Vector> result_child_vector;
+	if (step_vector) {
+		result_child_vector = &ListVector::GetEntry(result);
 	}
+	SelectionVector sel;
+	idx_t sel_idx = 0;
+	if (result.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+		ExecuteConstantSlice<INPUT_TYPE, INDEX_TYPE>(result, list_or_str_vector, begin_vector, end_vector, step_vector,
+		                                             count, sel, sel_idx, result_child_vector, begin_is_empty,
+		                                             end_is_empty);
+	} else {
+		ExecuteFlatSlice<INPUT_TYPE, INDEX_TYPE>(result, list_or_str_vector, begin_vector, end_vector, step_vector,
+		                                         count, sel, sel_idx, result_child_vector, begin_is_empty,
+		                                         end_is_empty);
+	}
 	result.Verify(count);
 }
 static void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector &result) {
-	D_ASSERT(args.ColumnCount() == 3);
-	D_ASSERT(args.data.size() == 3);
+	D_ASSERT(args.ColumnCount() == 3 || args.ColumnCount() == 4);
+	D_ASSERT(args.data.size() == 3 || args.data.size() == 4);
 	auto count = args.size();
-	Vector &s = args.data[0];
-	Vector &b = args.data[1];
-	Vector &e = args.data[2];
+	Vector &list_or_str_vector = args.data[0];
+	Vector &begin_vector = args.data[1];
+	Vector &end_vector = args.data[2];
+	optional_ptr<Vector> step_vector;
+	if (args.ColumnCount() == 4) {
+		step_vector = &args.data[3];
+	}
+	auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+	auto &info = func_expr.bind_info->Cast<ListSliceBindData>();
+	auto begin_is_empty = info.begin_is_empty;
+	auto end_is_empty = info.end_is_empty;
 	result.SetVectorType(args.AllConstant() ? VectorType::CONSTANT_VECTOR : VectorType::FLAT_VECTOR);
 	switch (result.GetType().id()) {
-	case LogicalTypeId::LIST:
+	case LogicalTypeId::LIST: {
 		// Share the value dictionary as we are just going to slice it
-		if (s.GetVectorType() != VectorType::FLAT_VECTOR && s.GetVectorType() != VectorType::CONSTANT_VECTOR) {
-			s.Flatten(count);
+		if (list_or_str_vector.GetVectorType() != VectorType::FLAT_VECTOR &&
+		    list_or_str_vector.GetVectorType() != VectorType::CONSTANT_VECTOR) {
+			list_or_str_vector.Flatten(count);
 		}
-		ListVector::ReferenceEntry(result, s);
-		ExecuteSlice<list_entry_t, int64_t>(result, s, b, e, count);
+		ListVector::ReferenceEntry(result, list_or_str_vector);
+		ExecuteSlice<list_entry_t, int64_t>(result, list_or_str_vector, begin_vector, end_vector, step_vector, count,
+		                                    begin_is_empty, end_is_empty);
 		break;
-	case LogicalTypeId::VARCHAR:
-		ExecuteSlice<string_t, int32_t>(result, s, b, e, count);
+	}
+	case LogicalTypeId::VARCHAR: {
+		ExecuteSlice<string_t, int64_t>(result, list_or_str_vector, begin_vector, end_vector, step_vector, count,
+		                                begin_is_empty, end_is_empty);
 		break;
+	}
 	default:
 		throw NotImplementedException("Specifier type not implemented");
 	}
 }
+static bool CheckIfParamIsEmpty(duckdb::unique_ptr<duckdb::Expression> &param) {
+	bool is_empty = false;
+	if (param->return_type.id() == LogicalTypeId::LIST) {
+		auto empty_list = make_uniq<BoundConstantExpression>(Value::LIST(LogicalType::INTEGER, vector<Value>()));
+		is_empty = param->Equals(*empty_list);
+		if (!is_empty) {
+			// if the param is not empty, the user has entered a list instead of a BIGINT
+			throw BinderException("The upper and lower bounds of the slice must be a BIGINT");
+		}
+	}
+	return is_empty;
+}
 static unique_ptr<FunctionData> ArraySliceBind(ClientContext &context, ScalarFunction &bound_function,
                                                vector<unique_ptr<Expression>> &arguments) {
-	D_ASSERT(bound_function.arguments.size() == 3);
+	D_ASSERT(arguments.size() == 3 || arguments.size() == 4);
+	D_ASSERT(bound_function.arguments.size() == 3 || bound_function.arguments.size() == 4);
 	switch (arguments[0]->return_type.id()) {
 	case LogicalTypeId::LIST:
 		// The result is the same type
 		bound_function.return_type = arguments[0]->return_type;
 		break;
 	case LogicalTypeId::VARCHAR:
-		// string slice returns a string, but can only accept 32 bit integers
+		// string slice returns a string
+		if (bound_function.arguments.size() == 4) {
+			throw NotImplementedException(
+			    "Slice with steps has not been implemented for string types, you can consider rewriting your query as "
+			    "follows:\n SELECT array_to_string((str_split(string, '')[begin:end:step], '');");
+		}
 		bound_function.return_type = arguments[0]->return_type;
-		bound_function.arguments[1] = LogicalType::INTEGER;
-		bound_function.arguments[2] = LogicalType::INTEGER;
+		for (idx_t i = 1; i < 3; i++) {
+			if (arguments[i]->return_type.id() != LogicalTypeId::LIST) {
+				bound_function.arguments[i] = LogicalType::BIGINT;
+			}
+		}
 		break;
 	case LogicalTypeId::SQLNULL:
 	case LogicalTypeId::UNKNOWN:
@@ -177,16 +390,29 @@ static unique_ptr<FunctionData> ArraySliceBind(ClientContext &context, ScalarFun
 		throw BinderException("ARRAY_SLICE can only operate on LISTs and VARCHARs");
 	}
-	return make_uniq<VariableReturnBindData>(bound_function.return_type);
+	bool begin_is_empty = CheckIfParamIsEmpty(arguments[1]);
+	if (!begin_is_empty) {
+		bound_function.arguments[1] = LogicalType::BIGINT;
+	}
+	bool end_is_empty = CheckIfParamIsEmpty(arguments[2]);
+	if (!end_is_empty) {
+		bound_function.arguments[2] = LogicalType::BIGINT;
+	}
+	return make_uniq<ListSliceBindData>(bound_function.return_type, begin_is_empty, end_is_empty);
 }
-ScalarFunction ListSliceFun::GetFunction() {
+ScalarFunctionSet ListSliceFun::GetFunctions() {
 	// the arguments and return types are actually set in the binder function
-	ScalarFunction fun({LogicalType::ANY, LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::ANY,
-	                   ArraySliceFunction, ArraySliceBind);
-	fun.varargs = LogicalType::ANY;
+	ScalarFunction fun({LogicalType::ANY, LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, ArraySliceFunction,
+	                   ArraySliceBind);
 	fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
-	return fun;
+	ScalarFunctionSet set;
+	set.AddFunction(fun);
+	fun.arguments.push_back(LogicalType::BIGINT);
+	set.AddFunction(fun);
+	return set;
 }
 } // namespace duckdb

package/src/duckdb/src/execution/aggregate_hashtable.cpp CHANGED Viewed

@@ -584,6 +584,12 @@ void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) {
 	}
 	Verify();
+	// if we combine states, then we also need to combine the arena allocators
+	for (auto &stored_allocator : other.stored_allocators) {
+		stored_allocators.push_back(stored_allocator);
+	}
+	stored_allocators.push_back(other.aggregate_allocator);
 }
 void GroupedAggregateHashTable::Append(GroupedAggregateHashTable &other) {

package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp CHANGED Viewed

@@ -12,7 +12,8 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
                                                      vector<Value> group_minima_p, vector<idx_t> required_bits_p)
     : BaseAggregateHashTable(context, allocator, aggregate_objects_p, std::move(payload_types_p)),
       addresses(LogicalType::POINTER), required_bits(std::move(required_bits_p)), total_required_bits(0),
-      group_minima(std::move(group_minima_p)), sel(STANDARD_VECTOR_SIZE), aggregate_allocator(allocator) {
+      group_minima(std::move(group_minima_p)), sel(STANDARD_VECTOR_SIZE),
+      aggregate_allocator(make_uniq<ArenaAllocator>(allocator)) {
 	for (auto &group_bits : required_bits) {
 		total_required_bits += group_bits;
 	}
@@ -136,7 +137,7 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
 	// after finding the group location we update the aggregates
 	idx_t payload_idx = 0;
 	auto &aggregates = layout.GetAggregates();
-	RowOperationsState row_state(aggregate_allocator);
+	RowOperationsState row_state(*aggregate_allocator);
 	for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) {
 		auto &aggregate = aggregates[aggr_idx];
 		auto input_count = (idx_t)aggregate.child_count;
@@ -165,7 +166,7 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
 	data_ptr_t source_ptr = other.data;
 	data_ptr_t target_ptr = data;
 	idx_t combine_count = 0;
-	RowOperationsState row_state(aggregate_allocator);
+	RowOperationsState row_state(*aggregate_allocator);
 	for (idx_t i = 0; i < total_groups; i++) {
 		auto has_entry_source = other.group_is_set[i];
 		// we only have any work to do if the source has an entry for this group
@@ -183,6 +184,11 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
 		target_ptr += tuple_size;
 	}
 	RowOperations::CombineStates(row_state, layout, source_addresses, target_addresses, combine_count);
+	// FIXME: after moving the arena allocator, we currently have to ensure that the pointer is not nullptr, because the
+	// FIXME: Destroy()-function of the hash table expects an allocator in some cases (e.g., for sorted aggregates)
+	stored_allocators.push_back(std::move(other.aggregate_allocator));
+	other.aggregate_allocator = make_uniq<ArenaAllocator>(allocator);
 }
 template <class T>
@@ -268,7 +274,7 @@ void PerfectAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) {
 	}
 	// then construct the payloads
 	result.SetCardinality(entry_count);
-	RowOperationsState row_state(aggregate_allocator);
+	RowOperationsState row_state(*aggregate_allocator);
 	RowOperations::FinalizeStates(row_state, layout, addresses, result, grouping_columns);
 }
@@ -289,7 +295,7 @@ void PerfectAggregateHashTable::Destroy() {
 	idx_t count = 0;
 	// iterate over all initialised slots of the hash table
-	RowOperationsState row_state(aggregate_allocator);
+	RowOperationsState row_state(*aggregate_allocator);
 	data_ptr_t payload_ptr = data;
 	for (idx_t i = 0; i < total_groups; i++) {
 		if (group_is_set[i]) {

package/src/duckdb/src/execution/window_executor.cpp CHANGED Viewed

@@ -204,19 +204,19 @@ static idx_t FindTypedRangeBound(const WindowInputColumn &over, const idx_t orde
 	WindowColumnIterator<T> begin(over, order_begin);
 	WindowColumnIterator<T> end(over, order_end);
-	if (order_begin < prev.first && prev.first < order_end) {
-		const auto first = over.GetCell<T>(prev.first);
+	if (order_begin < prev.start && prev.start < order_end) {
+		const auto first = over.GetCell<T>(prev.start);
 		if (!comp(val, first)) {
 			//	prev.first <= val, so we can start further forward
-			begin += (prev.first - order_begin);
+			begin += (prev.start - order_begin);
 		}
 	}
-	if (order_begin <= prev.second && prev.second < order_end) {
-		const auto second = over.GetCell<T>(prev.second);
+	if (order_begin <= prev.end && prev.end < order_end) {
+		const auto second = over.GetCell<T>(prev.end);
 		if (!comp(second, val)) {
 			//	val <= prev.second, so we can end further back
 			// (prev.second is the largest peer)
-			end -= (order_end - prev.second - 1);
+			end -= (order_end - prev.end - 1);
 		}
 	}
@@ -278,8 +278,6 @@ static idx_t FindOrderedRangeBound(const WindowInputColumn &over, const OrderTyp
 }
 struct WindowBoundariesState {
-	using FrameBounds = std::pair<idx_t, idx_t>;
 	static inline bool IsScalar(const unique_ptr<Expression> &expr) {
 		return expr ? expr->IsScalar() : true;
 	}
@@ -375,8 +373,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
 				}
 				//	Reset range hints
-				prev.first = valid_start;
-				prev.second = valid_end;
+				prev.start = valid_start;
+				prev.end = valid_end;
 			}
 		} else if (!is_peer) {
 			peer_start = row_idx;
@@ -427,9 +425,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
 		if (boundary_start.CellIsNull(chunk_idx)) {
 			window_start = peer_start;
 		} else {
-			prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
+			prev.start = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
 			                                         boundary_start, chunk_idx, prev);
-			window_start = prev.first;
+			window_start = prev.start;
 		}
 		break;
 	}
@@ -437,9 +435,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
 		if (boundary_start.CellIsNull(chunk_idx)) {
 			window_start = peer_start;
 		} else {
-			prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
+			prev.start = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
 			                                         chunk_idx, prev);
-			window_start = prev.first;
+			window_start = prev.start;
 		}
 		break;
 	}
@@ -472,9 +470,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
 		if (boundary_end.CellIsNull(chunk_idx)) {
 			window_end = peer_end;
 		} else {
-			prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx,
-			                                           boundary_end, chunk_idx, prev);
-			window_end = prev.second;
+			prev.end = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx, boundary_end,
+			                                        chunk_idx, prev);
+			window_end = prev.end;
 		}
 		break;
 	}
@@ -482,9 +480,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
 		if (boundary_end.CellIsNull(chunk_idx)) {
 			window_end = peer_end;
 		} else {
-			prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
-			                                           chunk_idx, prev);
-			window_end = prev.second;
+			prev.end = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
+			                                        chunk_idx, prev);
+			window_end = prev.end;
 		}
 		break;
 	}

package/src/duckdb/src/function/aggregate/distributive/count.cpp CHANGED Viewed

@@ -39,8 +39,8 @@ struct CountStarFunction : public BaseCountFunction {
 	                   Vector &result, idx_t rid, idx_t bias) {
 		D_ASSERT(input_count == 0);
 		auto data = FlatVector::GetData<RESULT_TYPE>(result);
-		const auto begin = frame.first;
-		const auto end = frame.second;
+		const auto begin = frame.start;
+		const auto end = frame.end;
 		// Slice to any filtered rows
 		if (!filter_mask.AllValid()) {
 			RESULT_TYPE filtered = 0;

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.8.2-dev2356"
+#define DUCKDB_VERSION "0.8.2-dev2509"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "ef2efd1b9d"
+#define DUCKDB_SOURCE_ID "785b11edd5"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"