npm - duckdb - Versions diffs - 0.8.2-dev4711.0 → 0.8.2-dev5002.0 - Mend

duckdb 0.8.2-dev4711.0 → 0.8.2-dev5002.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp CHANGED Viewed

@@ -236,9 +236,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
 static unique_ptr<FunctionData> ListSortBind(ClientContext &context, ScalarFunction &bound_function,
                                              vector<unique_ptr<Expression>> &arguments, OrderType &order,
                                              OrderByNullType &null_order) {
+	LogicalType child_type;
+	if (arguments[0]->return_type == LogicalTypeId::UNKNOWN) {
+		bound_function.arguments[0] = LogicalTypeId::UNKNOWN;
+		bound_function.return_type = LogicalType::SQLNULL;
+		child_type = bound_function.return_type;
+		return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
+	}
 	bound_function.arguments[0] = arguments[0]->return_type;
 	bound_function.return_type = arguments[0]->return_type;
-	auto child_type = ListType::GetChildType(arguments[0]->return_type);
+	child_type = ListType::GetChildType(arguments[0]->return_type);
 	return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
 }

package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp CHANGED Viewed

@@ -97,7 +97,6 @@ static void MapConcatFunction(DataChunk &args, ExpressionState &state, Vector &r
 			auto &values = MapVector::GetValues(map);
 			values_list.push_back(values.GetValue(mapping.key_index));
 		}
-		idx_t entries_count = keys_list.size();
 		D_ASSERT(values_list.size() == keys_list.size());
 		result_entry.offset = ListVector::GetListSize(result);
 		result_entry.length = values_list.size();
@@ -105,7 +104,6 @@ static void MapConcatFunction(DataChunk &args, ExpressionState &state, Vector &r
 		for (auto &list_entry : list_entries) {
 			ListVector::PushBack(result, list_entry);
 		}
-		ListVector::SetListSize(result, ListVector::GetListSize(result) + entries_count);
 	}
 	if (args.AllConstant()) {

package/src/duckdb/src/core_functions/scalar/string/repeat.cpp CHANGED Viewed

@@ -1,10 +1,9 @@
-#include "duckdb/core_functions/scalar/string_functions.hpp"
 #include "duckdb/common/exception.hpp"
 #include "duckdb/common/vector_operations/binary_executor.hpp"
+#include "duckdb/core_functions/scalar/string_functions.hpp"
-#include <string.h>
 #include <ctype.h>
+#include <string.h>
 namespace duckdb {
@@ -33,8 +32,12 @@ static void RepeatFunction(DataChunk &args, ExpressionState &state, Vector &resu
 	    });
 }
-ScalarFunction RepeatFun::GetFunction() {
-	return ScalarFunction({LogicalType::VARCHAR, LogicalType::BIGINT}, LogicalType::VARCHAR, RepeatFunction);
+ScalarFunctionSet RepeatFun::GetFunctions() {
+	ScalarFunctionSet repeat;
+	for (const auto &type : {LogicalType::VARCHAR, LogicalType::BLOB}) {
+		repeat.AddFunction(ScalarFunction({type, LogicalType::BIGINT}, type, RepeatFunction));
+	}
+	return repeat;
 }
 } // namespace duckdb

package/src/duckdb/src/execution/aggregate_hashtable.cpp CHANGED Viewed

@@ -45,6 +45,7 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
 	// Append hash column to the end and initialise the row layout
 	group_types_p.emplace_back(LogicalType::HASH);
 	layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
 	hash_offset = layout.GetOffsets()[layout.ColumnCount() - 1];
 	// Partitioned data and pointer table
@@ -52,7 +53,8 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
 	Resize(initial_capacity);
 	// Predicates
-	predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_EQUAL);
+	predicates.resize(layout.ColumnCount() - 1, ExpressionType::COMPARE_NOT_DISTINCT_FROM);
+	row_matcher.Initialize(true, layout, predicates);
 }
 void GroupedAggregateHashTable::InitializePartitionedData() {
@@ -414,9 +416,8 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
 			}
 			// Perform group comparisons
-			RowOperations::Match(state.group_chunk, state.group_data.get(), layout, addresses_v, predicates,
-			                     state.group_compare_vector, need_compare_count, &state.no_match_vector,
-			                     no_match_count);
+			row_matcher.Match(state.group_chunk, chunk_state.vector_data, state.group_compare_vector,
+			                  need_compare_count, layout, addresses_v, &state.no_match_vector, no_match_count);
 		}
 		// Linear probing: each of the entries that do not match move to the next entry in the HT

package/src/duckdb/src/execution/index/fixed_size_allocator.cpp CHANGED Viewed

@@ -173,6 +173,19 @@ bool FixedSizeAllocator::InitializeVacuum() {
 		return false;
 	}
+	// remove all empty buffers
+	auto buffer_it = buffers.begin();
+	while (buffer_it != buffers.end()) {
+		if (!buffer_it->second.segment_count) {
+			buffers_with_free_space.erase(buffer_it->first);
+			buffer_it->second.Destroy();
+			buffer_it = buffers.erase(buffer_it);
+		} else {
+			buffer_it++;
+		}
+	}
+	// determine if a vacuum is necessary
 	multimap<idx_t, idx_t> temporary_vacuum_buffers;
 	D_ASSERT(vacuum_buffers.empty());
 	idx_t available_segments_in_memory = 0;

package/src/duckdb/src/execution/join_hashtable.cpp CHANGED Viewed

@@ -19,15 +19,15 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
     : buffer_manager(buffer_manager_p), conditions(conditions_p), build_types(std::move(btypes)), entry_size(0),
       tuple_size(0), vfound(Value::BOOLEAN(false)), join_type(type_p), finalized(false), has_null(false),
       external(false), radix_bits(4), partition_start(0), partition_end(0) {
 	for (auto &condition : conditions) {
 		D_ASSERT(condition.left->return_type == condition.right->return_type);
 		auto type = condition.left->return_type;
 		if (condition.comparison == ExpressionType::COMPARE_EQUAL ||
-		    condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM ||
-		    condition.comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
-			// all equality conditions should be at the front
-			// all other conditions at the back
-			// this assert checks that
+		    condition.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
+			// ensure that all equality conditions are at the front,
+			// and that all other conditions are at the back
 			D_ASSERT(equality_types.size() == condition_types.size());
 			equality_types.push_back(type);
 		}
@@ -51,6 +51,8 @@ JoinHashTable::JoinHashTable(BufferManager &buffer_manager_p, const vector<JoinC
 	}
 	layout_types.emplace_back(LogicalType::HASH);
 	layout.Initialize(layout_types, false);
+	row_matcher.Initialize(false, layout, predicates);
+	row_matcher_no_match_sel.Initialize(true, layout, predicates);
 	const auto &offsets = layout.GetOffsets();
 	tuple_size = offsets[condition_types.size() + build_types.size()];
@@ -142,30 +144,6 @@ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector
 	return result_count;
 }
-idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
-                                 const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
-	key_data = keys.ToUnifiedFormat();
-	// figure out which keys are NULL, and create a selection vector out of them
-	current_sel = FlatVector::IncrementalSelectionVector();
-	idx_t added_count = keys.size();
-	if (build_side && IsRightOuterJoin(join_type)) {
-		// in case of a right or full outer join, we cannot remove NULL keys from the build side
-		return added_count;
-	}
-	for (idx_t i = 0; i < keys.ColumnCount(); i++) {
-		if (!null_values_are_equal[i]) {
-			if (key_data[i].validity.AllValid()) {
-				continue;
-			}
-			added_count = FilterNullValues(key_data[i], *current_sel, added_count, sel);
-			// null values are NOT equal for this column, filter them out
-			current_sel = &sel;
-		}
-	}
-	return added_count;
-}
 void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChunk &keys, DataChunk &payload) {
 	D_ASSERT(!finalized);
 	D_ASSERT(keys.size() == payload.size());
@@ -194,23 +172,6 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
 		info.correlated_counts->AddChunk(info.group_chunk, info.correlated_payload, AggregateType::NON_DISTINCT);
 	}
-	// prepare the keys for processing
-	unsafe_unique_array<UnifiedVectorFormat> key_data;
-	const SelectionVector *current_sel;
-	SelectionVector sel(STANDARD_VECTOR_SIZE);
-	idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
-	if (added_count < keys.size()) {
-		has_null = true;
-	}
-	if (added_count == 0) {
-		return;
-	}
-	// hash the keys and obtain an entry in the list
-	// note that we only hash the keys used in the equality comparison
-	Vector hash_values(LogicalType::HASH);
-	Hash(keys, *current_sel, added_count, hash_values);
 	// build a chunk to append to the data collection [keys, payload, (optional "found" boolean), hash]
 	DataChunk source_chunk;
 	source_chunk.InitializeEmpty(layout.GetTypes());
@@ -228,13 +189,58 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
 		source_chunk.data[col_offset].Reference(vfound);
 		col_offset++;
 	}
+	Vector hash_values(LogicalType::HASH);
 	source_chunk.data[col_offset].Reference(hash_values);
 	source_chunk.SetCardinality(keys);
+	// ToUnifiedFormat the source chunk
+	TupleDataCollection::ToUnifiedFormat(append_state.chunk_state, source_chunk);
+	// prepare the keys for processing
+	const SelectionVector *current_sel;
+	SelectionVector sel(STANDARD_VECTOR_SIZE);
+	idx_t added_count = PrepareKeys(keys, append_state.chunk_state.vector_data, current_sel, sel, true);
 	if (added_count < keys.size()) {
-		source_chunk.Slice(*current_sel, added_count);
+		has_null = true;
+	}
+	if (added_count == 0) {
+		return;
 	}
-	sink_collection->Append(append_state, source_chunk);
+	// hash the keys and obtain an entry in the list
+	// note that we only hash the keys used in the equality comparison
+	Hash(keys, *current_sel, added_count, hash_values);
+	// Re-reference and ToUnifiedFormat the hash column after computing it
+	source_chunk.data[col_offset].Reference(hash_values);
+	hash_values.ToUnifiedFormat(source_chunk.size(), append_state.chunk_state.vector_data.back().unified);
+	// We already called TupleDataCollection::ToUnifiedFormat, so we can AppendUnified here
+	sink_collection->AppendUnified(append_state, source_chunk, *current_sel, added_count);
+}
+idx_t JoinHashTable::PrepareKeys(DataChunk &keys, vector<TupleDataVectorFormat> &vector_data,
+                                 const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
+	// figure out which keys are NULL, and create a selection vector out of them
+	current_sel = FlatVector::IncrementalSelectionVector();
+	idx_t added_count = keys.size();
+	if (build_side && IsRightOuterJoin(join_type)) {
+		// in case of a right or full outer join, we cannot remove NULL keys from the build side
+		return added_count;
+	}
+	for (idx_t col_idx = 0; col_idx < keys.ColumnCount(); col_idx++) {
+		if (!null_values_are_equal[col_idx]) {
+			auto &col_key_data = vector_data[col_idx].unified;
+			if (col_key_data.validity.AllValid()) {
+				continue;
+			}
+			added_count = FilterNullValues(col_key_data, *current_sel, added_count, sel);
+			// null values are NOT equal for this column, filter them out
+			current_sel = &sel;
+		}
+	}
+	return added_count;
 }
 template <bool PARALLEL>
@@ -322,12 +328,13 @@ void JoinHashTable::Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool para
 	} while (iterator.Next());
 }
-unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys, const SelectionVector *&current_sel) {
+unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys, TupleDataChunkState &key_state,
+                                                                 const SelectionVector *&current_sel) {
 	D_ASSERT(Count() > 0); // should be handled before
 	D_ASSERT(finalized);
 	// set up the scan structure
-	auto ss = make_uniq<ScanStructure>(*this);
+	auto ss = make_uniq<ScanStructure>(*this, key_state);
 	if (join_type != JoinType::INNER) {
 		ss->found_match = make_unsafe_uniq_array<bool>(STANDARD_VECTOR_SIZE);
@@ -335,13 +342,15 @@ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys
 	}
 	// first prepare the keys for probing
-	ss->count = PrepareKeys(keys, ss->key_data, current_sel, ss->sel_vector, false);
+	TupleDataCollection::ToUnifiedFormat(key_state, keys);
+	ss->count = PrepareKeys(keys, key_state.vector_data, current_sel, ss->sel_vector, false);
 	return ss;
 }
-unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, Vector *precomputed_hashes) {
+unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, TupleDataChunkState &key_state,
+                                               Vector *precomputed_hashes) {
 	const SelectionVector *current_sel;
-	auto ss = InitializeScanStructure(keys, current_sel);
+	auto ss = InitializeScanStructure(keys, key_state, current_sel);
 	if (ss->count == 0) {
 		return ss;
 	}
@@ -363,8 +372,9 @@ unique_ptr<ScanStructure> JoinHashTable::Probe(DataChunk &keys, Vector *precompu
 	return ss;
 }
-ScanStructure::ScanStructure(JoinHashTable &ht)
-    : pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(ht), finished(false) {
+ScanStructure::ScanStructure(JoinHashTable &ht_p, TupleDataChunkState &key_state_p)
+    : key_state(key_state_p), pointers(LogicalType::POINTER), sel_vector(STANDARD_VECTOR_SIZE), ht(ht_p),
+      finished(false) {
 }
 void ScanStructure::Next(DataChunk &keys, DataChunk &left, DataChunk &result) {
@@ -404,8 +414,9 @@ idx_t ScanStructure::ResolvePredicates(DataChunk &keys, SelectionVector &match_s
 	}
 	idx_t no_match_count = 0;
-	return RowOperations::Match(keys, key_data.get(), ht.layout, pointers, ht.predicates, match_sel, this->count,
-	                            no_match_sel, no_match_count);
+	auto &matcher = no_match_sel ? ht.row_matcher_no_match_sel : ht.row_matcher;
+	return matcher.Match(keys, key_state.vector_data, match_sel, this->count, ht.layout, pointers, no_match_sel,
+	                     no_match_count);
 }
 idx_t ScanStructure::ScanInnerJoin(DataChunk &keys, SelectionVector &result_vector) {
@@ -990,7 +1001,8 @@ static void CreateSpillChunk(DataChunk &spill_chunk, DataChunk &keys, DataChunk
 	spill_chunk.data[spill_col_idx].Reference(hashes);
 }
-unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, DataChunk &payload, ProbeSpill &probe_spill,
+unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, TupleDataChunkState &key_state,
+                                                       DataChunk &payload, ProbeSpill &probe_spill,
                                                        ProbeSpillLocalAppendState &spill_state,
                                                        DataChunk &spill_chunk) {
 	// hash all the keys
@@ -1019,7 +1031,7 @@ unique_ptr<ScanStructure> JoinHashTable::ProbeAndSpill(DataChunk &keys, DataChun
 	payload.Slice(true_sel, true_count);
 	const SelectionVector *current_sel;
-	auto ss = InitializeScanStructure(keys, current_sel);
+	auto ss = InitializeScanStructure(keys, key_state, current_sel);
 	if (ss->count == 0) {
 		return ss;
 	}

package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp CHANGED Viewed

@@ -3,21 +3,12 @@
 namespace duckdb {
-template <class OP>
-struct ComparisonOperationWrapper {
-	template <class T>
-	static inline bool Operation(T left, T right, bool left_is_null, bool right_is_null) {
-		if (left_is_null || right_is_null) {
-			return false;
-		}
-		return OP::Operation(left, right);
-	}
-};
 struct InitialNestedLoopJoin {
 	template <class T, class OP>
 	static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
 	                       SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
+		using MATCH_OP = ComparisonOperationWrapper<OP>;
 		// initialize phase of nested loop join
 		// fill lvector and rvector with matches from the base vectors
 		UnifiedVectorFormat left_data, right_data;
@@ -37,7 +28,7 @@ struct InitialNestedLoopJoin {
 				}
 				idx_t left_position = left_data.sel->get_index(lpos);
 				bool left_is_valid = left_data.validity.RowIsValid(left_position);
-				if (OP::Operation(ldata[left_position], rdata[right_position], !left_is_valid, !right_is_valid)) {
+				if (MATCH_OP::Operation(ldata[left_position], rdata[right_position], !left_is_valid, !right_is_valid)) {
 					// emit tuple
 					lvector.set_index(result_count, lpos);
 					rvector.set_index(result_count, rpos);
@@ -54,6 +45,8 @@ struct RefineNestedLoopJoin {
 	template <class T, class OP>
 	static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
 	                       SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
+		using MATCH_OP = ComparisonOperationWrapper<OP>;
 		UnifiedVectorFormat left_data, right_data;
 		left.ToUnifiedFormat(left_size, left_data);
 		right.ToUnifiedFormat(right_size, right_data);
@@ -72,7 +65,7 @@ struct RefineNestedLoopJoin {
 			auto right_idx = right_data.sel->get_index(ridx);
 			bool left_is_valid = left_data.validity.RowIsValid(left_idx);
 			bool right_is_valid = right_data.validity.RowIsValid(right_idx);
-			if (OP::Operation(ldata[left_idx], rdata[right_idx], !left_is_valid, !right_is_valid)) {
+			if (MATCH_OP::Operation(ldata[left_idx], rdata[right_idx], !left_is_valid, !right_is_valid)) {
 				lvector.set_index(result_count, lidx);
 				rvector.set_index(result_count, ridx);
 				result_count++;
@@ -139,26 +132,26 @@ idx_t NestedLoopJoinComparisonSwitch(Vector &left, Vector &right, idx_t left_siz
 	D_ASSERT(left.GetType() == right.GetType());
 	switch (comparison_type) {
 	case ExpressionType::COMPARE_EQUAL:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::Equals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, Equals>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                rvector, current_match_count);
 	case ExpressionType::COMPARE_NOTEQUAL:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::NotEquals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, NotEquals>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                   rvector, current_match_count);
 	case ExpressionType::COMPARE_LESSTHAN:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::LessThan>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, LessThan>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                  rvector, current_match_count);
 	case ExpressionType::COMPARE_GREATERTHAN:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::GreaterThan>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, GreaterThan>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                     rvector, current_match_count);
 	case ExpressionType::COMPARE_LESSTHANOREQUALTO:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::LessThanEquals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, LessThanEquals>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                        rvector, current_match_count);
 	case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::GreaterThanEquals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, GreaterThanEquals>(left, right, left_size, right_size, lpos, rpos,
+		                                                           lvector, rvector, current_match_count);
 	case ExpressionType::COMPARE_DISTINCT_FROM:
-		return NestedLoopJoinTypeSwitch<NLTYPE, duckdb::DistinctFrom>(left, right, left_size, right_size, lpos, rpos,
-		                                                              lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, DistinctFrom>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                      rvector, current_match_count);
 	default:
 		throw NotImplementedException("Unimplemented comparison type for join!");
 	}

package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp CHANGED Viewed

@@ -6,6 +6,8 @@ namespace duckdb {
 template <class T, class OP>
 static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) {
+	using MATCH_OP = ComparisonOperationWrapper<OP>;
 	UnifiedVectorFormat left_data, right_data;
 	left.ToUnifiedFormat(lcount, left_data);
 	right.ToUnifiedFormat(rcount, right_data);
@@ -17,15 +19,17 @@ static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t r
 			continue;
 		}
 		auto lidx = left_data.sel->get_index(i);
-		if (!left_data.validity.RowIsValid(lidx)) {
+		const auto left_null = !left_data.validity.RowIsValid(lidx);
+		if (!MATCH_OP::COMPARE_NULL && left_null) {
 			continue;
 		}
 		for (idx_t j = 0; j < rcount; j++) {
 			auto ridx = right_data.sel->get_index(j);
-			if (!right_data.validity.RowIsValid(ridx)) {
+			const auto right_null = !right_data.validity.RowIsValid(ridx);
+			if (!MATCH_OP::COMPARE_NULL && right_null) {
 				continue;
 			}
-			if (OP::Operation(ldata[lidx], rdata[ridx])) {
+			if (MATCH_OP::template Operation<T>(ldata[lidx], rdata[ridx], left_null, right_null)) {
 				found_match[i] = true;
 				break;
 			}
@@ -62,6 +66,12 @@ static void MarkJoinNested(Vector &left, Vector &right, idx_t lcount, idx_t rcou
 		case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
 			count = VectorOperations::GreaterThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
 			break;
+		case ExpressionType::COMPARE_DISTINCT_FROM:
+			count = VectorOperations::DistinctFrom(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
+			count = VectorOperations::NotDistinctFrom(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
 		default:
 			throw InternalException("Unsupported comparison type for MarkJoinNested");
 		}
@@ -116,17 +126,19 @@ static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount,
 	D_ASSERT(left.GetType() == right.GetType());
 	switch (comparison_type) {
 	case ExpressionType::COMPARE_EQUAL:
-		return MarkJoinSwitch<duckdb::Equals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<Equals>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_NOTEQUAL:
-		return MarkJoinSwitch<duckdb::NotEquals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<NotEquals>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_LESSTHAN:
-		return MarkJoinSwitch<duckdb::LessThan>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<LessThan>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_GREATERTHAN:
-		return MarkJoinSwitch<duckdb::GreaterThan>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<GreaterThan>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_LESSTHANOREQUALTO:
-		return MarkJoinSwitch<duckdb::LessThanEquals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<LessThanEquals>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
-		return MarkJoinSwitch<duckdb::GreaterThanEquals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<GreaterThanEquals>(left, right, lcount, rcount, found_match);
+	case ExpressionType::COMPARE_DISTINCT_FROM:
+		return MarkJoinSwitch<DistinctFrom>(left, right, lcount, rcount, found_match);
 	default:
 		throw NotImplementedException("Unimplemented comparison type for join!");
 	}

package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp CHANGED Viewed

@@ -343,8 +343,8 @@ void PhysicalHashAggregate::SinkDistinct(ExecutionContext &context, DataChunk &c
 SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk &chunk,
                                            OperatorSinkInput &input) const {
-	auto &llstate = input.local_state.Cast<HashAggregateLocalSinkState>();
-	auto &gstate = input.global_state.Cast<HashAggregateGlobalSinkState>();
+	auto &local_state = input.local_state.Cast<HashAggregateLocalSinkState>();
+	auto &global_state = input.global_state.Cast<HashAggregateGlobalSinkState>();
 	if (distinct_collection_info) {
 		SinkDistinct(context, chunk, input);
@@ -354,8 +354,7 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
 		return SinkResultType::NEED_MORE_INPUT;
 	}
-	DataChunk &aggregate_input_chunk = llstate.aggregate_input_chunk;
+	DataChunk &aggregate_input_chunk = local_state.aggregate_input_chunk;
 	auto &aggregates = grouped_aggregate_data.aggregates;
 	idx_t aggregate_input_idx = 0;
@@ -385,10 +384,11 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, DataChunk
 	// For every grouping set there is one radix_table
 	for (idx_t i = 0; i < groupings.size(); i++) {
-		auto &grouping_gstate = gstate.grouping_states[i];
-		auto &grouping_lstate = llstate.grouping_states[i];
+		auto &grouping_local_state = global_state.grouping_states[i];
+		auto &grouping_global_state = local_state.grouping_states[i];
 		InterruptState interrupt_state;
-		OperatorSinkInput sink_input {*grouping_gstate.table_state, *grouping_lstate.table_state, interrupt_state};
+		OperatorSinkInput sink_input {*grouping_local_state.table_state, *grouping_global_state.table_state,
+		                              interrupt_state};
 		auto &grouping = groupings[i];
 		auto &table = grouping.table_data;

package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp CHANGED Viewed

@@ -437,7 +437,7 @@ void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientCont
 			}
 			sql_type_list.reserve(sql_type_names.size());
 			for (auto &sql_type : sql_type_names) {
-				auto def_type = TransformStringToLogicalType(sql_type);
+				auto def_type = TransformStringToLogicalType(sql_type, context);
 				if (def_type.id() == LogicalTypeId::USER) {
 					throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
 					                      kv.first);

package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp CHANGED Viewed

@@ -420,6 +420,8 @@ public:
 	}
 	DataChunk join_keys;
+	TupleDataChunkState join_key_state;
 	ExpressionExecutor probe_executor;
 	unique_ptr<JoinHashTable::ScanStructure> scan_structure;
 	unique_ptr<OperatorState> perfect_hash_join_state;
@@ -446,6 +448,7 @@ unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &c
 		for (auto &cond : conditions) {
 			state->probe_executor.AddExpression(*cond.left);
 		}
+		TupleDataCollection::InitializeChunkState(state->join_key_state, condition_types);
 	}
 	if (sink.external) {
 		state->spill_chunk.Initialize(allocator, sink.probe_types);
@@ -502,10 +505,10 @@ OperatorResultType PhysicalHashJoin::ExecuteInternal(ExecutionContext &context,
 	// perform the actual probe
 	if (sink.external) {
-		state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys, input, *sink.probe_spill,
-		                                                      state.spill_state, state.spill_chunk);
+		state.scan_structure = sink.hash_table->ProbeAndSpill(state.join_keys, state.join_key_state, input,
+		                                                      *sink.probe_spill, state.spill_state, state.spill_chunk);
 	} else {
-		state.scan_structure = sink.hash_table->Probe(state.join_keys);
+		state.scan_structure = sink.hash_table->Probe(state.join_keys, state.join_key_state);
 	}
 	state.scan_structure->Next(state.join_keys, input, chunk);
 	return OperatorResultType::HAVE_MORE_OUTPUT;
@@ -605,6 +608,7 @@ public:
 	DataChunk probe_chunk;
 	DataChunk join_keys;
 	DataChunk payload;
+	TupleDataChunkState join_key_state;
 	//! Column indices to easily reference the join keys/payload columns in probe_chunk
 	vector<idx_t> join_key_indices;
 	vector<idx_t> payload_indices;
@@ -782,6 +786,7 @@ HashJoinLocalSourceState::HashJoinLocalSourceState(const PhysicalHashJoin &op, A
 	probe_chunk.Initialize(allocator, sink.probe_types);
 	join_keys.Initialize(allocator, op.condition_types);
 	payload.Initialize(allocator, op.children[0]->types);
+	TupleDataCollection::InitializeChunkState(join_key_state, op.condition_types);
 	// Store the indices of the columns to reference them easily
 	idx_t col_idx = 0;
@@ -871,7 +876,7 @@ void HashJoinLocalSourceState::ExternalProbe(HashJoinGlobalSinkState &sink, Hash
 	}
 	// Perform the probe
-	scan_structure = sink.hash_table->Probe(join_keys, precomputed_hashes);
+	scan_structure = sink.hash_table->Probe(join_keys, join_key_state, precomputed_hashes);
 	scan_structure->Next(join_keys, payload, chunk);
 }

package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp CHANGED Viewed

@@ -254,7 +254,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
 	}
 	bool has_equality = false;
-	// bool has_inequality = false;
 	size_t has_range = 0;
 	for (size_t c = 0; c < op.conditions.size(); ++c) {
 		auto &cond = op.conditions[c];
@@ -271,7 +270,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanComparisonJoin(LogicalCo
 			break;
 		case ExpressionType::COMPARE_NOTEQUAL:
 		case ExpressionType::COMPARE_DISTINCT_FROM:
-			// has_inequality = true;
 			break;
 		default:
 			throw NotImplementedException("Unimplemented comparison join");

package/src/duckdb/src/execution/reservoir_sample.cpp CHANGED Viewed

@@ -107,25 +107,19 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
 			if (append_to_next_sample > 0) {
 				// we need to also add to the next sample
 				DataChunk new_chunk;
-				new_chunk.Initialize(allocator, input.GetTypes());
-				SelectionVector sel(append_to_current_sample_count);
-				for (idx_t r = 0; r < append_to_current_sample_count; r++) {
-					sel.set_index(r, r);
-				}
-				new_chunk.Slice(sel, append_to_current_sample_count);
+				new_chunk.InitializeEmpty(input.GetTypes());
+				new_chunk.Slice(input, *FlatVector::IncrementalSelectionVector(), append_to_current_sample_count);
 				new_chunk.Flatten();
 				current_sample->AddToReservoir(new_chunk);
 			} else {
 				input.Flatten();
 				input.SetCardinality(append_to_current_sample_count);
 				current_sample->AddToReservoir(input);
 			}
 		}
 		if (append_to_next_sample > 0) {
 			// slice the input for the remainder
-			SelectionVector sel(STANDARD_VECTOR_SIZE);
+			SelectionVector sel(append_to_next_sample);
 			for (idx_t i = 0; i < append_to_next_sample; i++) {
 				sel.set_index(i, append_to_current_sample_count + i);
 			}