npm - duckdb - Versions diffs - 0.8.2-dev4871.0 → 0.8.2-dev5080.0 - Mend

duckdb 0.8.2-dev4871.0 → 0.8.2-dev5080.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.8.2-dev4871.0",
+  "version": "0.8.2-dev5080.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/icu/icu-strptime.cpp CHANGED Viewed

@@ -75,6 +75,7 @@ struct ICUStrptime : public ICUDateFunc {
 		calendar->set(UCAL_MINUTE, parsed.data[4]);
 		calendar->set(UCAL_SECOND, parsed.data[5]);
 		calendar->set(UCAL_MILLISECOND, parsed.data[6] / Interval::MICROS_PER_MSEC);
+		micros = parsed.data[6] % Interval::MICROS_PER_MSEC;
 		// This overrides the TZ setting, so only use it if an offset was parsed.
 		// Note that we don't bother/worry about the DST setting because the two just combine.

package/src/duckdb/extension/json/json_functions/copy_json.cpp CHANGED Viewed

@@ -101,7 +101,7 @@ static BoundStatement CopyToJSONPlan(Binder &binder, CopyStatement &stmt) {
 	info.options["quote"] = {""};
 	info.options["escape"] = {""};
 	info.options["delimiter"] = {"\n"};
-	info.options["header"] = {0};
+	info.options["header"] = {{0}};
 	return binder.Bind(*stmt_copy);
 }

package/src/duckdb/src/common/enum_util.cpp CHANGED Viewed

@@ -5503,6 +5503,8 @@ const char* EnumUtil::ToChars<StrTimeSpecifier>(StrTimeSpecifier value) {
 		return "LOCALE_APPROPRIATE_DATE";
 	case StrTimeSpecifier::LOCALE_APPROPRIATE_TIME:
 		return "LOCALE_APPROPRIATE_TIME";
+	case StrTimeSpecifier::NANOSECOND_PADDED:
+		return "NANOSECOND_PADDED";
 	default:
 		throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
 	}
@@ -5606,6 +5608,9 @@ StrTimeSpecifier EnumUtil::FromString<StrTimeSpecifier>(const char *value) {
 	if (StringUtil::Equals(value, "LOCALE_APPROPRIATE_TIME")) {
 		return StrTimeSpecifier::LOCALE_APPROPRIATE_TIME;
 	}
+	if (StringUtil::Equals(value, "NANOSECOND_PADDED")) {
+		return StrTimeSpecifier::NANOSECOND_PADDED;
+	}
 	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
 }

package/src/duckdb/src/common/operator/cast_operators.cpp CHANGED Viewed

@@ -1373,6 +1373,12 @@ timestamp_t CastTimestampMsToUs::Operation(timestamp_t input) {
 	return Timestamp::FromEpochMs(input.value);
 }
+template <>
+timestamp_t CastTimestampMsToNs::Operation(timestamp_t input) {
+	auto us = CastTimestampMsToUs::Operation<timestamp_t, timestamp_t>(input);
+	return CastTimestampUsToNs::Operation<timestamp_t, timestamp_t>(us);
+}
 template <>
 timestamp_t CastTimestampNsToUs::Operation(timestamp_t input) {
 	return Timestamp::FromEpochNanoSeconds(input.value);
@@ -1383,6 +1389,18 @@ timestamp_t CastTimestampSecToUs::Operation(timestamp_t input) {
 	return Timestamp::FromEpochSeconds(input.value);
 }
+template <>
+timestamp_t CastTimestampSecToMs::Operation(timestamp_t input) {
+	auto us = CastTimestampSecToUs::Operation<timestamp_t, timestamp_t>(input);
+	return CastTimestampUsToMs::Operation<timestamp_t, timestamp_t>(us);
+}
+template <>
+timestamp_t CastTimestampSecToNs::Operation(timestamp_t input) {
+	auto us = CastTimestampSecToUs::Operation<timestamp_t, timestamp_t>(input);
+	return CastTimestampUsToNs::Operation<timestamp_t, timestamp_t>(us);
+}
 //===--------------------------------------------------------------------===//
 // Cast To Timestamp
 //===--------------------------------------------------------------------===//

package/src/duckdb/src/common/row_operations/row_matcher.cpp CHANGED Viewed

@@ -8,44 +8,11 @@ namespace duckdb {
 using ValidityBytes = TupleDataLayout::ValidityBytes;
-template <class OP>
-struct RowMatchOperator {
-	static constexpr const bool COMPARE_NULL = false;
-	template <class T>
-	static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
-		if (right_null || left_null) {
-			return false;
-		}
-		return OP::template Operation<T>(left, right);
-	}
-};
-template <>
-struct RowMatchOperator<DistinctFrom> {
-	static constexpr const bool COMPARE_NULL = true;
-	template <class T>
-	static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
-		return DistinctFrom::template Operation<T>(left, right, left_null, right_null);
-	}
-};
-template <>
-struct RowMatchOperator<NotDistinctFrom> {
-	static constexpr const bool COMPARE_NULL = true;
-	template <class T>
-	static inline bool Operation(const T &left, const T &right, bool left_null, bool right_null) {
-		return NotDistinctFrom::template Operation<T>(left, right, left_null, right_null);
-	}
-};
 template <bool NO_MATCH_SEL, class T, class OP>
 static idx_t TemplatedMatch(Vector &, const TupleDataVectorFormat &lhs_format, SelectionVector &sel, const idx_t count,
                             const TupleDataLayout &rhs_layout, Vector &rhs_row_locations, const idx_t col_idx,
                             const vector<MatchFunction> &, SelectionVector *no_match_sel, idx_t &no_match_count) {
-	using MATCH_OP = RowMatchOperator<OP>;
+	using COMPARISON_OP = ComparisonOperationWrapper<OP>;
 	// LHS
 	const auto &lhs_sel = *lhs_format.unified.sel;
@@ -70,8 +37,8 @@ static idx_t TemplatedMatch(Vector &, const TupleDataVectorFormat &lhs_format, S
 		const ValidityBytes rhs_mask(rhs_location);
 		const auto rhs_null = !rhs_mask.RowIsValid(rhs_mask.GetValidityEntryUnsafe(entry_idx), idx_in_entry);
-		if (MATCH_OP::template Operation<T>(lhs_data[lhs_idx], Load<T>(rhs_location + rhs_offset_in_row), lhs_null,
-		                                    rhs_null)) {
+		if (COMPARISON_OP::template Operation<T>(lhs_data[lhs_idx], Load<T>(rhs_location + rhs_offset_in_row), lhs_null,
+		                                         rhs_null)) {
 			sel.set_index(match_count++, idx);
 		} else if (NO_MATCH_SEL) {
 			no_match_sel->set_index(no_match_count++, idx);
@@ -85,7 +52,7 @@ static idx_t StructMatchEquality(Vector &lhs_vector, const TupleDataVectorFormat
                                  const idx_t count, const TupleDataLayout &rhs_layout, Vector &rhs_row_locations,
                                  const idx_t col_idx, const vector<MatchFunction> &child_functions,
                                  SelectionVector *no_match_sel, idx_t &no_match_count) {
-	using MATCH_OP = RowMatchOperator<OP>;
+	using COMPARISON_OP = ComparisonOperationWrapper<OP>;
 	// LHS
 	const auto &lhs_sel = *lhs_format.unified.sel;
@@ -111,7 +78,7 @@ static idx_t StructMatchEquality(Vector &lhs_vector, const TupleDataVectorFormat
 		// For structs there is no value to compare, here we match NULLs and let recursion do the rest
 		// So we use the comparison only if rhs or LHS is NULL and COMPARE_NULL is true
 		if (!(lhs_null || rhs_null) ||
-		    (MATCH_OP::COMPARE_NULL && MATCH_OP::template Operation<uint32_t>(0, 0, lhs_null, rhs_null))) {
+		    (COMPARISON_OP::COMPARE_NULL && COMPARISON_OP::template Operation<uint32_t>(0, 0, lhs_null, rhs_null))) {
 			sel.set_index(match_count++, idx);
 		} else if (NO_MATCH_SEL) {
 			no_match_sel->set_index(no_match_count++, idx);

package/src/duckdb/src/common/types/data_chunk.cpp CHANGED Viewed

@@ -13,6 +13,10 @@
 #include "duckdb/common/vector_operations/vector_operations.hpp"
 #include "duckdb/execution/execution_context.hpp"
+#include "duckdb/common/serializer/memory_stream.hpp"
+#include "duckdb/common/serializer/binary_serializer.hpp"
+#include "duckdb/common/serializer/binary_deserializer.hpp"
 namespace duckdb {
 DataChunk::DataChunk() : count(0), capacity(STANDARD_VECTOR_SIZE) {
@@ -231,16 +235,20 @@ string DataChunk::ToString() const {
 }
 void DataChunk::Serialize(Serializer &serializer) const {
 	// write the count
 	auto row_count = size();
 	serializer.WriteProperty<sel_t>(100, "rows", row_count);
+	// we should never try to serialize empty data chunks
 	auto column_count = ColumnCount();
+	D_ASSERT(column_count);
-	// Write the types
+	// write the types
 	serializer.WriteList(101, "types", column_count,
 	                     [&](Serializer::List &list, idx_t i) { list.WriteElement(data[i].GetType()); });
-	// Write the data
+	// write the data
 	serializer.WriteList(102, "columns", column_count, [&](Serializer::List &list, idx_t i) {
 		list.WriteObject([&](Serializer &object) {
 			// Reference the vector to avoid potentially mutating it during serialization
@@ -252,21 +260,23 @@ void DataChunk::Serialize(Serializer &serializer) const {
 }
 void DataChunk::Deserialize(Deserializer &deserializer) {
-	// read the count
+	// read and set the row count
 	auto row_count = deserializer.ReadProperty<sel_t>(100, "rows");
-	// Read the types
+	// read the types
 	vector<LogicalType> types;
 	deserializer.ReadList(101, "types", [&](Deserializer::List &list, idx_t i) {
 		auto type = list.ReadElement<LogicalType>();
 		types.push_back(type);
 	});
-	Initialize(Allocator::DefaultAllocator(), types);
-	// now load the column data
+	// initialize the data chunk
+	D_ASSERT(!types.empty());
+	Initialize(Allocator::DefaultAllocator(), types);
 	SetCardinality(row_count);
-	// Read the data
+	// read the data
 	deserializer.ReadList(102, "columns", [&](Deserializer::List &list, idx_t i) {
 		list.ReadObject([&](Deserializer &object) { data[i].Deserialize(object, row_count); });
 	});
@@ -296,11 +306,11 @@ void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count_
 }
 unsafe_unique_array<UnifiedVectorFormat> DataChunk::ToUnifiedFormat() {
-	auto orrified_data = make_unsafe_uniq_array<UnifiedVectorFormat>(ColumnCount());
+	auto unified_data = make_unsafe_uniq_array<UnifiedVectorFormat>(ColumnCount());
 	for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) {
-		data[col_idx].ToUnifiedFormat(size(), orrified_data[col_idx]);
+		data[col_idx].ToUnifiedFormat(size(), unified_data[col_idx]);
 	}
-	return orrified_data;
+	return unified_data;
 }
 void DataChunk::Hash(Vector &result) {
@@ -324,10 +334,37 @@ void DataChunk::Hash(vector<idx_t> &column_ids, Vector &result) {
 void DataChunk::Verify() {
 #ifdef DEBUG
 	D_ASSERT(size() <= capacity);
 	// verify that all vectors in this chunk have the chunk selection vector
 	for (idx_t i = 0; i < ColumnCount(); i++) {
 		data[i].Verify(size());
 	}
+	if (!ColumnCount()) {
+		// don't try to round-trip dummy data chunks with no data
+		// e.g., these exist in queries like 'SELECT distinct(col0, col1) FROM tbl', where we have groups, but no
+		// payload so the payload will be such an empty data chunk
+		return;
+	}
+	// verify that we can round-trip chunk serialization
+	MemoryStream mem_stream;
+	BinarySerializer serializer(mem_stream);
+	serializer.Begin();
+	Serialize(serializer);
+	serializer.End();
+	mem_stream.Rewind();
+	BinaryDeserializer deserializer(mem_stream);
+	DataChunk new_chunk;
+	deserializer.Begin();
+	new_chunk.Deserialize(deserializer);
+	deserializer.End();
+	D_ASSERT(size() == new_chunk.size());
 #endif
 }

package/src/duckdb/src/common/types/vector.cpp CHANGED Viewed

@@ -960,7 +960,6 @@ void Vector::Serialize(Serializer &serializer, idx_t count) {
 			break;
 		}
 		case PhysicalType::STRUCT: {
-			Flatten(count);
 			auto &entries = StructVector::GetEntries(*this);
 			// Serialize entries as a list

package/src/duckdb/src/common/types.cpp CHANGED Viewed

@@ -434,7 +434,7 @@ LogicalType TransformStringToLogicalType(const string &str) {
 LogicalType GetUserTypeRecursive(const LogicalType &type, ClientContext &context) {
 	if (type.id() == LogicalTypeId::USER && type.HasAlias()) {
-		return Catalog::GetSystemCatalog(context).GetType(context, SYSTEM_CATALOG, DEFAULT_SCHEMA, type.GetAlias());
+		return Catalog::GetType(context, INVALID_CATALOG, INVALID_SCHEMA, type.GetAlias());
 	}
 	// Look for LogicalTypeId::USER in nested types
 	if (type.id() == LogicalTypeId::STRUCT) {
@@ -659,6 +659,10 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
 		return right;
 	} else if (right.id() == LogicalTypeId::UNKNOWN) {
 		return left;
+	} else if ((right.id() == LogicalTypeId::ENUM || left.id() == LogicalTypeId::ENUM) && right.id() != left.id()) {
+		// if one is an enum and the other is not, compare strings, not enums
+		// see https://github.com/duckdb/duckdb/issues/8561
+		return LogicalTypeId::VARCHAR;
 	} else if (left.id() < right.id()) {
 		return right;
 	}
@@ -911,6 +915,11 @@ const string &StructType::GetChildName(const LogicalType &type, idx_t index) {
 idx_t StructType::GetChildCount(const LogicalType &type) {
 	return StructType::GetChildTypes(type).size();
 }
+bool StructType::IsUnnamed(const LogicalType &type) {
+	auto &child_types = StructType::GetChildTypes(type);
+	D_ASSERT(child_types.size() > 0);
+	return child_types[0].first.empty();
+}
 LogicalType LogicalType::STRUCT(child_list_t<LogicalType> children) {
 	auto info = make_shared<StructTypeInfo>(std::move(children));

package/src/duckdb/src/core_functions/scalar/date/strftime.cpp CHANGED Viewed

@@ -183,7 +183,7 @@ struct StrpTimeFunction {
 		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
 		auto &info = func_expr.bind_info->Cast<StrpTimeBindData>();
-		if (ConstantVector::IsNull(args.data[1])) {
+		if (args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR && ConstantVector::IsNull(args.data[1])) {
 			result.SetVectorType(VectorType::CONSTANT_VECTOR);
 			ConstantVector::SetNull(result, true);
 			return;
@@ -203,7 +203,7 @@ struct StrpTimeFunction {
 		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
 		auto &info = func_expr.bind_info->Cast<StrpTimeBindData>();
-		if (ConstantVector::IsNull(args.data[1])) {
+		if (args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR && ConstantVector::IsNull(args.data[1])) {
 			result.SetVectorType(VectorType::CONSTANT_VECTOR);
 			ConstantVector::SetNull(result, true);
 			return;

package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp CHANGED Viewed

@@ -191,9 +191,11 @@ static void ExecuteConstantSlice(Vector &result, Vector &str_vector, Vector &beg
 	}
 	auto sel_length = 0;
+	bool sel_valid = false;
 	if (step_vector && step_valid && str_valid && begin_valid && end_valid && step != 1 && end - begin > 0) {
 		sel_length = CalculateSliceLength(begin, end, step, step_valid);
 		sel.Initialize(sel_length);
+		sel_valid = true;
 	}
 	// Try to slice
@@ -205,8 +207,9 @@ static void ExecuteConstantSlice(Vector &result, Vector &str_vector, Vector &beg
 		result_data[0] = SliceValueWithSteps<INPUT_TYPE, INDEX_TYPE>(result, sel, str, begin, end, step, sel_idx);
 	}
-	if (step_vector && step != 0 && end - begin > 0) {
+	if (sel_valid) {
 		result_child_vector->Slice(sel, sel_length);
+		ListVector::SetListSize(result, sel_length);
 	}
 }
@@ -234,6 +237,16 @@ static void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_
 		auto end_idx = end_data.sel->get_index(i);
 		auto step_idx = step_vector ? step_data.sel->get_index(i) : 0;
+		auto list_valid = list_data.validity.RowIsValid(list_idx);
+		auto begin_valid = begin_data.validity.RowIsValid(begin_idx);
+		auto end_valid = end_data.validity.RowIsValid(end_idx);
+		auto step_valid = step_vector && step_data.validity.RowIsValid(step_idx);
+		if (!list_valid || !begin_valid || !end_valid || (step_vector && !step_valid)) {
+			result_mask.SetInvalid(i);
+			continue;
+		}
 		auto sliced = reinterpret_cast<INPUT_TYPE *>(list_data.data)[list_idx];
 		auto begin = begin_is_empty ? 0 : reinterpret_cast<INDEX_TYPE *>(begin_data.data)[begin_idx];
 		auto end = end_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced)
@@ -245,23 +258,19 @@ static void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_
 			begin = end_is_empty ? 0 : begin;
 			end = begin_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced) : end;
 		}
-		auto list_valid = list_data.validity.RowIsValid(list_idx);
-		auto begin_valid = begin_data.validity.RowIsValid(begin_idx);
-		auto end_valid = end_data.validity.RowIsValid(end_idx);
-		auto step_valid = step_vector && step_data.validity.RowIsValid(step_idx);
 		bool clamp_result = false;
-		if (list_valid && begin_valid && end_valid && (step_valid || step == 1)) {
+		if (step_valid || step == 1) {
 			clamp_result = ClampSlice(sliced, begin, end);
 		}
 		auto length = 0;
-		if (step_vector && step_valid && list_valid && begin_valid && end_valid && end - begin > 0) {
+		if (end - begin > 0) {
 			length = CalculateSliceLength(begin, end, step, step_valid);
 		}
 		sel_length += length;
-		if (!list_valid || !begin_valid || !end_valid || (step_vector && !step_valid) || !clamp_result) {
+		if (!clamp_result) {
 			result_mask.SetInvalid(i);
 		} else if (!step_vector) {
 			result_data[i] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
@@ -276,6 +285,7 @@ static void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_
 			new_sel.set_index(i, sel.get_index(i));
 		}
 		result_child_vector->Slice(new_sel, sel_length);
+		ListVector::SetListSize(result, sel_length);
 	}
 }

package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp CHANGED Viewed

@@ -236,9 +236,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
 static unique_ptr<FunctionData> ListSortBind(ClientContext &context, ScalarFunction &bound_function,
                                              vector<unique_ptr<Expression>> &arguments, OrderType &order,
                                              OrderByNullType &null_order) {
+	LogicalType child_type;
+	if (arguments[0]->return_type == LogicalTypeId::UNKNOWN) {
+		bound_function.arguments[0] = LogicalTypeId::UNKNOWN;
+		bound_function.return_type = LogicalType::SQLNULL;
+		child_type = bound_function.return_type;
+		return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
+	}
 	bound_function.arguments[0] = arguments[0]->return_type;
 	bound_function.return_type = arguments[0]->return_type;
-	auto child_type = ListType::GetChildType(arguments[0]->return_type);
+	child_type = ListType::GetChildType(arguments[0]->return_type);
 	return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
 }

package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp CHANGED Viewed

@@ -97,7 +97,6 @@ static void MapConcatFunction(DataChunk &args, ExpressionState &state, Vector &r
 			auto &values = MapVector::GetValues(map);
 			values_list.push_back(values.GetValue(mapping.key_index));
 		}
-		idx_t entries_count = keys_list.size();
 		D_ASSERT(values_list.size() == keys_list.size());
 		result_entry.offset = ListVector::GetListSize(result);
 		result_entry.length = values_list.size();
@@ -105,7 +104,6 @@ static void MapConcatFunction(DataChunk &args, ExpressionState &state, Vector &r
 		for (auto &list_entry : list_entries) {
 			ListVector::PushBack(result, list_entry);
 		}
-		ListVector::SetListSize(result, ListVector::GetListSize(result) + entries_count);
 	}
 	if (args.AllConstant()) {

package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp CHANGED Viewed

@@ -39,15 +39,21 @@ static unique_ptr<FunctionData> StructPackBind(ClientContext &context, ScalarFun
 		throw Exception("Can't pack nothing into a struct");
 	}
 	child_list_t<LogicalType> struct_children;
+	bool unnamed = false;
 	for (idx_t i = 0; i < arguments.size(); i++) {
 		auto &child = arguments[i];
-		if (child->alias.empty() && bound_function.name == "struct_pack") {
-			throw BinderException("Need named argument for struct pack, e.g. STRUCT_PACK(a := b)");
+		if (child->alias.empty()) {
+			if (bound_function.name == "struct_pack") {
+				throw BinderException("Need named argument for struct pack, e.g. STRUCT_PACK(a := b)");
+			} else {
+				D_ASSERT(bound_function.name == "row");
+				if (i > 1) {
+					D_ASSERT(unnamed);
+				}
+				unnamed = true;
+			}
 		}
-		if (child->alias.empty() && bound_function.name == "row") {
-			child->alias = "v" + std::to_string(i + 1);
-		}
-		if (name_collision_set.find(child->alias) != name_collision_set.end()) {
+		if (!child->alias.empty() && name_collision_set.find(child->alias) != name_collision_set.end()) {
 			throw BinderException("Duplicate struct entry name \"%s\"", child->alias);
 		}
 		name_collision_set.insert(child->alias);

package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_inner.cpp CHANGED Viewed

@@ -3,21 +3,12 @@
 namespace duckdb {
-template <class OP>
-struct ComparisonOperationWrapper {
-	template <class T>
-	static inline bool Operation(T left, T right, bool left_is_null, bool right_is_null) {
-		if (left_is_null || right_is_null) {
-			return false;
-		}
-		return OP::Operation(left, right);
-	}
-};
 struct InitialNestedLoopJoin {
 	template <class T, class OP>
 	static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
 	                       SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
+		using MATCH_OP = ComparisonOperationWrapper<OP>;
 		// initialize phase of nested loop join
 		// fill lvector and rvector with matches from the base vectors
 		UnifiedVectorFormat left_data, right_data;
@@ -37,7 +28,7 @@ struct InitialNestedLoopJoin {
 				}
 				idx_t left_position = left_data.sel->get_index(lpos);
 				bool left_is_valid = left_data.validity.RowIsValid(left_position);
-				if (OP::Operation(ldata[left_position], rdata[right_position], !left_is_valid, !right_is_valid)) {
+				if (MATCH_OP::Operation(ldata[left_position], rdata[right_position], !left_is_valid, !right_is_valid)) {
 					// emit tuple
 					lvector.set_index(result_count, lpos);
 					rvector.set_index(result_count, rpos);
@@ -54,6 +45,8 @@ struct RefineNestedLoopJoin {
 	template <class T, class OP>
 	static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
 	                       SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
+		using MATCH_OP = ComparisonOperationWrapper<OP>;
 		UnifiedVectorFormat left_data, right_data;
 		left.ToUnifiedFormat(left_size, left_data);
 		right.ToUnifiedFormat(right_size, right_data);
@@ -72,7 +65,7 @@ struct RefineNestedLoopJoin {
 			auto right_idx = right_data.sel->get_index(ridx);
 			bool left_is_valid = left_data.validity.RowIsValid(left_idx);
 			bool right_is_valid = right_data.validity.RowIsValid(right_idx);
-			if (OP::Operation(ldata[left_idx], rdata[right_idx], !left_is_valid, !right_is_valid)) {
+			if (MATCH_OP::Operation(ldata[left_idx], rdata[right_idx], !left_is_valid, !right_is_valid)) {
 				lvector.set_index(result_count, lidx);
 				rvector.set_index(result_count, ridx);
 				result_count++;
@@ -139,26 +132,26 @@ idx_t NestedLoopJoinComparisonSwitch(Vector &left, Vector &right, idx_t left_siz
 	D_ASSERT(left.GetType() == right.GetType());
 	switch (comparison_type) {
 	case ExpressionType::COMPARE_EQUAL:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::Equals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, Equals>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                rvector, current_match_count);
 	case ExpressionType::COMPARE_NOTEQUAL:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::NotEquals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, NotEquals>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                   rvector, current_match_count);
 	case ExpressionType::COMPARE_LESSTHAN:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::LessThan>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, LessThan>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                  rvector, current_match_count);
 	case ExpressionType::COMPARE_GREATERTHAN:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::GreaterThan>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, GreaterThan>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                     rvector, current_match_count);
 	case ExpressionType::COMPARE_LESSTHANOREQUALTO:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::LessThanEquals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, LessThanEquals>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                        rvector, current_match_count);
 	case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
-		return NestedLoopJoinTypeSwitch<NLTYPE, ComparisonOperationWrapper<duckdb::GreaterThanEquals>>(
-		    left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, GreaterThanEquals>(left, right, left_size, right_size, lpos, rpos,
+		                                                           lvector, rvector, current_match_count);
 	case ExpressionType::COMPARE_DISTINCT_FROM:
-		return NestedLoopJoinTypeSwitch<NLTYPE, duckdb::DistinctFrom>(left, right, left_size, right_size, lpos, rpos,
-		                                                              lvector, rvector, current_match_count);
+		return NestedLoopJoinTypeSwitch<NLTYPE, DistinctFrom>(left, right, left_size, right_size, lpos, rpos, lvector,
+		                                                      rvector, current_match_count);
 	default:
 		throw NotImplementedException("Unimplemented comparison type for join!");
 	}

package/src/duckdb/src/execution/nested_loop_join/nested_loop_join_mark.cpp CHANGED Viewed

@@ -6,6 +6,8 @@ namespace duckdb {
 template <class T, class OP>
 static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) {
+	using MATCH_OP = ComparisonOperationWrapper<OP>;
 	UnifiedVectorFormat left_data, right_data;
 	left.ToUnifiedFormat(lcount, left_data);
 	right.ToUnifiedFormat(rcount, right_data);
@@ -17,15 +19,17 @@ static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t r
 			continue;
 		}
 		auto lidx = left_data.sel->get_index(i);
-		if (!left_data.validity.RowIsValid(lidx)) {
+		const auto left_null = !left_data.validity.RowIsValid(lidx);
+		if (!MATCH_OP::COMPARE_NULL && left_null) {
 			continue;
 		}
 		for (idx_t j = 0; j < rcount; j++) {
 			auto ridx = right_data.sel->get_index(j);
-			if (!right_data.validity.RowIsValid(ridx)) {
+			const auto right_null = !right_data.validity.RowIsValid(ridx);
+			if (!MATCH_OP::COMPARE_NULL && right_null) {
 				continue;
 			}
-			if (OP::Operation(ldata[lidx], rdata[ridx])) {
+			if (MATCH_OP::template Operation<T>(ldata[lidx], rdata[ridx], left_null, right_null)) {
 				found_match[i] = true;
 				break;
 			}
@@ -62,6 +66,12 @@ static void MarkJoinNested(Vector &left, Vector &right, idx_t lcount, idx_t rcou
 		case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
 			count = VectorOperations::GreaterThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
 			break;
+		case ExpressionType::COMPARE_DISTINCT_FROM:
+			count = VectorOperations::DistinctFrom(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
+			count = VectorOperations::NotDistinctFrom(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
 		default:
 			throw InternalException("Unsupported comparison type for MarkJoinNested");
 		}
@@ -116,17 +126,19 @@ static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount,
 	D_ASSERT(left.GetType() == right.GetType());
 	switch (comparison_type) {
 	case ExpressionType::COMPARE_EQUAL:
-		return MarkJoinSwitch<duckdb::Equals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<Equals>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_NOTEQUAL:
-		return MarkJoinSwitch<duckdb::NotEquals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<NotEquals>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_LESSTHAN:
-		return MarkJoinSwitch<duckdb::LessThan>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<LessThan>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_GREATERTHAN:
-		return MarkJoinSwitch<duckdb::GreaterThan>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<GreaterThan>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_LESSTHANOREQUALTO:
-		return MarkJoinSwitch<duckdb::LessThanEquals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<LessThanEquals>(left, right, lcount, rcount, found_match);
 	case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
-		return MarkJoinSwitch<duckdb::GreaterThanEquals>(left, right, lcount, rcount, found_match);
+		return MarkJoinSwitch<GreaterThanEquals>(left, right, lcount, rcount, found_match);
+	case ExpressionType::COMPARE_DISTINCT_FROM:
+		return MarkJoinSwitch<DistinctFrom>(left, right, lcount, rcount, found_match);
 	default:
 		throw NotImplementedException("Unimplemented comparison type for join!");
 	}