npm - duckdb - Versions diffs - 0.4.1-dev1514.0 → 0.4.1-dev1527.0 - Mend

duckdb 0.4.1-dev1514.0 → 0.4.1-dev1527.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +151 -122
package/src/duckdb.hpp +2 -2
package/src/parquet-amalgamation.cpp +37379 -37379

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "duckdb",
   "main": "./lib/duckdb.js",
-  "version": "0.4.1-dev1514.0",
+  "version": "0.4.1-dev1527.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -5657,6 +5657,7 @@ static DefaultMacro internal_macros[] = {
 	{DEFAULT_SCHEMA, "list_entropy", {"l", nullptr}, "list_aggr(l, 'entropy')"},
 	{DEFAULT_SCHEMA, "list_last", {"l", nullptr}, "list_aggr(l, 'last')"},
 	{DEFAULT_SCHEMA, "list_first", {"l", nullptr}, "list_aggr(l, 'first')"},
+	{DEFAULT_SCHEMA, "list_any_value", {"l", nullptr}, "list_aggr(l, 'any_value')"},
 	{DEFAULT_SCHEMA, "list_kurtosis", {"l", nullptr}, "list_aggr(l, 'kurtosis')"},
 	{DEFAULT_SCHEMA, "list_min", {"l", nullptr}, "list_aggr(l, 'min')"},
 	{DEFAULT_SCHEMA, "list_max", {"l", nullptr}, "list_aggr(l, 'max')"},
@@ -63484,9 +63485,11 @@ static void PrepareInputExpression(Expression *expr, ExpressionExecutor &executo
 }
 struct WindowInputExpression {
-	WindowInputExpression(Expression *expr_p, Allocator &allocator) : expr(expr_p), scalar(true), executor(allocator) {
+	WindowInputExpression(Expression *expr_p, Allocator &allocator)
+	    : expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(allocator) {
 		if (expr) {
 			PrepareInputExpression(expr, executor, chunk);
+			ptype = expr->return_type.InternalType();
 			scalar = expr->IsScalar();
 		}
 	}
@@ -63519,25 +63522,51 @@ struct WindowInputExpression {
 	}
 	Expression *expr;
+	PhysicalType ptype;
 	bool scalar;
 	ExpressionExecutor executor;
 	DataChunk chunk;
 };
-struct WindowInputCollection {
-	WindowInputCollection(Expression *expr_p, Allocator &allocator)
-	    : input_expr(expr_p, allocator), collection(allocator) {
+struct WindowInputColumn {
+	WindowInputColumn(Expression *expr_p, Allocator &allocator, idx_t capacity_p)
+	    : input_expr(expr_p, allocator), count(0), capacity(capacity_p) {
+		if (input_expr.expr) {
+			target = make_unique<Vector>(input_expr.chunk.data[0].GetType(), capacity);
+		}
 	}
 	void Append(DataChunk &input_chunk) {
-		if (input_expr.expr && (!input_expr.scalar || collection.Count() == 0)) {
+		if (input_expr.expr && (!input_expr.scalar || !count)) {
 			input_expr.Execute(input_chunk);
-			collection.Append(input_expr.chunk);
+			auto &source = input_expr.chunk.data[0];
+			const auto source_count = input_expr.chunk.size();
+			D_ASSERT(count + source_count <= capacity);
+			VectorOperations::Copy(source, *target, source_count, 0, count);
+			count += source_count;
 		}
 	}
+	inline bool CellIsNull(idx_t i) {
+		D_ASSERT(target);
+		D_ASSERT(i < count);
+		return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
+	}
+	template <typename T>
+	inline T GetCell(idx_t i) {
+		D_ASSERT(target);
+		D_ASSERT(i < count);
+		const auto data = FlatVector::GetData<T>(*target);
+		return data[input_expr.scalar ? 0 : i];
+	}
 	WindowInputExpression input_expr;
-	ChunkCollection collection;
+private:
+	unique_ptr<Vector> target;
+	idx_t count;
+	idx_t capacity;
 };
 static void ScanRowCollection(RowDataCollection &rows, RowDataCollection &heap, ChunkCollection &cols,
@@ -63591,7 +63620,7 @@ struct WindowBoundariesState {
 	      needs_peer(BoundaryNeedsPeer(wexpr->end) || wexpr->type == ExpressionType::WINDOW_CUME_DIST) {
 	}
-	void Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t source_offset,
+	void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t source_offset,
 	            WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
 	            const ValidityMask &partition_mask, const ValidityMask &order_mask);
@@ -63643,20 +63672,19 @@ static bool CellIsNull(ChunkCollection &collection, idx_t column, idx_t index) {
 }
 template <typename T>
-struct ChunkCollectionIterator {
-	using iterator = ChunkCollectionIterator<T>;
+struct WindowColumnIterator {
+	using iterator = WindowColumnIterator<T>;
 	using iterator_category = std::forward_iterator_tag;
 	using difference_type = std::ptrdiff_t;
 	using value_type = T;
 	using reference = T;
 	using pointer = idx_t;
-	ChunkCollectionIterator(ChunkCollection &coll_p, idx_t col_no_p, pointer pos_p = 0)
-	    : coll(&coll_p), col_no(col_no_p), pos(pos_p) {
+	explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
 	}
 	inline reference operator*() const {
-		return GetCell<T>(*coll, col_no, pos);
+		return coll->GetCell<T>(pos);
 	}
 	inline explicit operator pointer() const {
 		return pos;
@@ -63680,8 +63708,7 @@ struct ChunkCollectionIterator {
 	}
 private:
-	ChunkCollection *coll;
-	idx_t col_no;
+	WindowInputColumn *coll;
 	pointer pos;
 };
@@ -63693,14 +63720,14 @@ struct OperationCompare : public std::function<bool(T, T)> {
 };
 template <typename T, typename OP, bool FROM>
-static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
-                                 const idx_t order_end, WindowInputExpression &boundary, const idx_t boundary_row) {
+static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
+                                 WindowInputExpression &boundary, const idx_t boundary_row) {
 	D_ASSERT(!boundary.CellIsNull(boundary_row));
 	const auto val = boundary.GetCell<T>(boundary_row);
 	OperationCompare<T, OP> comp;
-	ChunkCollectionIterator<T> begin(over, order_col, order_begin);
-	ChunkCollectionIterator<T> end(over, order_col, order_end);
+	WindowColumnIterator<T> begin(over, order_begin);
+	WindowColumnIterator<T> end(over, order_end);
 	if (FROM) {
 		return idx_t(std::lower_bound(begin, end, val, comp));
 	} else {
@@ -63709,58 +63736,55 @@ static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, c
 }
 template <typename OP, bool FROM>
-static idx_t FindRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
-                            const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
-	const auto &over_types = over.Types();
-	D_ASSERT(over_types.size() > order_col);
+static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
+                            WindowInputExpression &boundary, const idx_t expr_idx) {
 	D_ASSERT(boundary.chunk.ColumnCount() == 1);
-	D_ASSERT(boundary.chunk.data[0].GetType() == over_types[order_col]);
+	D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
-	switch (over_types[order_col].InternalType()) {
+	switch (over.input_expr.ptype) {
 	case PhysicalType::INT8:
-		return FindTypedRangeBound<int8_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::INT16:
-		return FindTypedRangeBound<int16_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::INT32:
-		return FindTypedRangeBound<int32_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::INT64:
-		return FindTypedRangeBound<int64_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::UINT8:
-		return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::UINT16:
-		return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::UINT32:
-		return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::UINT64:
-		return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::INT128:
-		return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::FLOAT:
-		return FindTypedRangeBound<float, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::DOUBLE:
-		return FindTypedRangeBound<double, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case PhysicalType::INTERVAL:
-		return FindTypedRangeBound<interval_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	default:
 		throw InternalException("Unsupported column type for RANGE");
 	}
 }
 template <bool FROM>
-static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col, const OrderType range_sense,
-                                   const idx_t order_begin, const idx_t order_end, WindowInputExpression &boundary,
-                                   const idx_t expr_idx) {
+static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
+                                   const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
 	switch (range_sense) {
 	case OrderType::ASCENDING:
-		return FindRangeBound<LessThan, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	case OrderType::DESCENDING:
-		return FindRangeBound<GreaterThan, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
+		return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
 	default:
 		throw InternalException("Unsupported ORDER BY sense for RANGE");
 	}
 }
-void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t expr_idx,
+void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t expr_idx,
                                    WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
                                    const ValidityMask &partition_mask, const ValidityMask &order_mask) {
@@ -63790,7 +63814,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
 			if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
 				// Exclude any leading NULLs
-				if (CellIsNull(range_collection, 0, bounds.valid_start)) {
+				if (range_collection.CellIsNull(bounds.valid_start)) {
 					idx_t n = 1;
 					bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
 				}
@@ -63798,7 +63822,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
 			if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
 				// Exclude any trailing NULLs
-				if (CellIsNull(range_collection, 0, bounds.valid_end - 1)) {
+				if (range_collection.CellIsNull(bounds.valid_end - 1)) {
 					idx_t n = 1;
 					bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
 				}
@@ -63849,8 +63873,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
 		if (boundary_start.CellIsNull(expr_idx)) {
 			bounds.window_start = bounds.peer_start;
 		} else {
-			bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense,
-			                                                  bounds.valid_start, row_idx, boundary_start, expr_idx);
+			bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, bounds.valid_start,
+			                                                  row_idx, boundary_start, expr_idx);
 		}
 		break;
 	}
@@ -63858,7 +63882,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
 		if (boundary_start.CellIsNull(expr_idx)) {
 			bounds.window_start = bounds.peer_start;
 		} else {
-			bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense, row_idx,
+			bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, row_idx,
 			                                                  bounds.valid_end, boundary_start, expr_idx);
 		}
 		break;
@@ -63887,8 +63911,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
 		if (boundary_end.CellIsNull(expr_idx)) {
 			bounds.window_end = bounds.peer_end;
 		} else {
-			bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense,
-			                                                 bounds.valid_start, row_idx, boundary_end, expr_idx);
+			bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, bounds.valid_start,
+			                                                 row_idx, boundary_end, expr_idx);
 		}
 		break;
 	}
@@ -63896,7 +63920,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
 		if (boundary_end.CellIsNull(expr_idx)) {
 			bounds.window_end = bounds.peer_end;
 		} else {
-			bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense, row_idx,
+			bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, row_idx,
 			                                                 bounds.valid_end, boundary_end, expr_idx);
 		}
 		break;
@@ -63950,7 +63974,7 @@ struct WindowExecutor {
 	WindowInputExpression boundary_end;
 	// evaluate RANGE expressions, if needed
-	WindowInputCollection range;
+	WindowInputColumn range;
 	// IGNORE NULLS
 	ValidityMask ignore_nulls;
@@ -63969,7 +63993,7 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, ChunkCollection &in
       boundary_start(wexpr->start_expr.get(), input.GetAllocator()),
       boundary_end(wexpr->end_expr.get(), input.GetAllocator()),
       range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr->orders[0].expression.get() : nullptr,
-            input.GetAllocator())
+            input.GetAllocator(), input.Count())
 {
 	auto &allocator = input.GetAllocator();
@@ -64082,8 +64106,7 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
 	// this is the main loop, go through all sorted rows and compute window function result
 	for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
 		// special case, OVER (), aggregate over everything
-		bounds.Update(row_idx, range.collection, output_offset, boundary_start, boundary_end, partition_mask,
-		              order_mask);
+		bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
 		if (WindowNeedsRank(wexpr)) {
 			if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
 				dense_rank = 1;
@@ -64328,7 +64351,7 @@ public:
 		CreateMergeTasks(pipeline, *this, gstate, hash_group);
 	}
-	static bool CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
+	static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
 	                             WindowGlobalHashGroup &hash_group) {
 		// Multiple blocks remaining in the group: Schedule the next round
@@ -64336,28 +64359,6 @@ public:
 			hash_group.global_sort->InitializeMergeRound();
 			auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
 			event.InsertEvent(move(new_event));
-			return true;
-		}
-		//	Find the next group to sort
-		for (;;) {
-			auto group = state.GetNextSortGroup();
-			if (group >= state.hash_groups.size()) {
-				//	Out of groups
-				return false;
-			}
-			auto &hash_group = *state.hash_groups[group];
-			auto &global_sort = *hash_group.global_sort;
-			// Prepare for merge sort phase
-			hash_group.PrepareMergePhase();
-			if (global_sort.sorted_blocks.size() > 1) {
-				global_sort.InitializeMergeRound();
-				auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
-				event.InsertEvent(move(new_event));
-				return true;
-			}
 		}
 	}
 };
@@ -64381,11 +64382,14 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
 		return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
 	}
-	auto &hash_group = *state.hash_groups[group];
+	// Schedule all the sorts for maximum thread utilisation
+	for (; group < state.hash_groups.size(); group = state.GetNextSortGroup()) {
+		auto &hash_group = *state.hash_groups[group];
-	// Prepare for merge sort phase
-	hash_group.PrepareMergePhase();
-	WindowMergeEvent::CreateMergeTasks(pipeline, event, state, hash_group);
+		// Prepare for merge sort phase
+		hash_group.PrepareMergePhase();
+		WindowMergeEvent::CreateMergeTasks(pipeline, event, state, hash_group);
+	}
 	return SinkFinalizeType::READY;
 }
@@ -64475,7 +64479,13 @@ void WindowLocalSourceState::MaterializeInput(const vector<LogicalType> &payload
 	// scan the sorted row data
 	D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
-	PayloadScanner scanner(*global_sort_state.sorted_blocks[0]->payload_data, global_sort_state);
+	auto &sb = *global_sort_state.sorted_blocks[0];
+	// Free up some memory before allocating more
+	sb.radix_sorting_data.clear();
+	sb.blob_sorting_data = nullptr;
+	PayloadScanner scanner(*sb.payload_data, global_sort_state);
 	DataChunk payload_chunk;
 	payload_chunk.Initialize(allocator, payload_types);
 	for (;;) {
@@ -85233,15 +85243,18 @@ struct FirstFunctionBase {
 	}
 };
-template <bool LAST>
+template <bool LAST, bool SKIP_NULLS>
 struct FirstFunction : public FirstFunctionBase {
 	template <class INPUT_TYPE, class STATE, class OP>
 	static void Operation(STATE *state, AggregateInputData &, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) {
 		if (LAST || !state->is_set) {
-			state->is_set = true;
 			if (!mask.RowIsValid(idx)) {
+				if (!SKIP_NULLS) {
+					state->is_set = true;
+				}
 				state->is_null = true;
 			} else {
+				state->is_set = true;
 				state->is_null = false;
 				state->value = input[idx];
 			}
@@ -85271,14 +85284,17 @@ struct FirstFunction : public FirstFunctionBase {
 	}
 };
-template <bool LAST>
+template <bool LAST, bool SKIP_NULLS>
 struct FirstFunctionString : public FirstFunctionBase {
 	template <class STATE>
 	static void SetValue(STATE *state, string_t value, bool is_null) {
-		state->is_set = true;
 		if (is_null) {
-			state->is_null = true;
+			if (!SKIP_NULLS) {
+				state->is_set = true;
+				state->is_null = true;
+			}
 		} else {
+			state->is_set = true;
 			if (value.IsInlined()) {
 				state->value = value;
 			} else {
@@ -85333,7 +85349,7 @@ struct FirstStateVector {
 	Vector *value;
 };
-template <bool LAST>
+template <bool LAST, bool SKIP_NULLS>
 struct FirstVectorFunction {
 	template <class STATE>
 	static void Initialize(STATE *state) {
@@ -85347,7 +85363,7 @@ struct FirstVectorFunction {
 		}
 	}
 	static bool IgnoreNull() {
-		return false;
+		return SKIP_NULLS;
 	}
 	template <class STATE>
@@ -85363,11 +85379,18 @@ struct FirstVectorFunction {
 	static void Update(Vector inputs[], AggregateInputData &, idx_t input_count, Vector &state_vector, idx_t count) {
 		auto &input = inputs[0];
+		UnifiedVectorFormat idata;
+		input.ToUnifiedFormat(count, idata);
 		UnifiedVectorFormat sdata;
 		state_vector.ToUnifiedFormat(count, sdata);
 		auto states = (FirstStateVector **)sdata.data;
 		for (idx_t i = 0; i < count; i++) {
+			const auto idx = idata.sel->get_index(i);
+			if (SKIP_NULLS && !idata.validity.RowIsValid(idx)) {
+				continue;
+			}
 			auto state = states[sdata.sel->get_index(i)];
 			if (LAST || !state->value) {
 				SetValue(state, input, i);
@@ -85406,79 +85429,79 @@ struct FirstVectorFunction {
 	}
 };
-template <class T, bool LAST>
+template <class T, bool LAST, bool SKIP_NULLS>
 static AggregateFunction GetFirstAggregateTemplated(LogicalType type) {
-	auto agg = AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST>>(type, type);
+	auto agg = AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST, SKIP_NULLS>>(type, type);
 	return agg;
 }
-template <bool LAST>
+template <bool LAST, bool SKIP_NULLS>
 static AggregateFunction GetFirstFunction(const LogicalType &type);
-template <bool LAST>
+template <bool LAST, bool SKIP_NULLS>
 AggregateFunction GetDecimalFirstFunction(const LogicalType &type) {
 	D_ASSERT(type.id() == LogicalTypeId::DECIMAL);
 	switch (type.InternalType()) {
 	case PhysicalType::INT16:
-		return GetFirstFunction<LAST>(LogicalType::SMALLINT);
+		return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::SMALLINT);
 	case PhysicalType::INT32:
-		return GetFirstFunction<LAST>(LogicalType::INTEGER);
+		return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::INTEGER);
 	case PhysicalType::INT64:
-		return GetFirstFunction<LAST>(LogicalType::BIGINT);
+		return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::BIGINT);
 	default:
-		return GetFirstFunction<LAST>(LogicalType::HUGEINT);
+		return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::HUGEINT);
 	}
 }
-template <bool LAST>
+template <bool LAST, bool SKIP_NULLS>
 static AggregateFunction GetFirstFunction(const LogicalType &type) {
 	switch (type.id()) {
 	case LogicalTypeId::BOOLEAN:
-		return GetFirstAggregateTemplated<int8_t, LAST>(type);
+		return GetFirstAggregateTemplated<int8_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::TINYINT:
-		return GetFirstAggregateTemplated<int8_t, LAST>(type);
+		return GetFirstAggregateTemplated<int8_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::SMALLINT:
-		return GetFirstAggregateTemplated<int16_t, LAST>(type);
+		return GetFirstAggregateTemplated<int16_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::INTEGER:
 	case LogicalTypeId::DATE:
-		return GetFirstAggregateTemplated<int32_t, LAST>(type);
+		return GetFirstAggregateTemplated<int32_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::BIGINT:
 	case LogicalTypeId::TIME:
 	case LogicalTypeId::TIMESTAMP:
 	case LogicalTypeId::TIME_TZ:
 	case LogicalTypeId::TIMESTAMP_TZ:
-		return GetFirstAggregateTemplated<int64_t, LAST>(type);
+		return GetFirstAggregateTemplated<int64_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::UTINYINT:
-		return GetFirstAggregateTemplated<uint8_t, LAST>(type);
+		return GetFirstAggregateTemplated<uint8_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::USMALLINT:
-		return GetFirstAggregateTemplated<uint16_t, LAST>(type);
+		return GetFirstAggregateTemplated<uint16_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::UINTEGER:
-		return GetFirstAggregateTemplated<uint32_t, LAST>(type);
+		return GetFirstAggregateTemplated<uint32_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::UBIGINT:
-		return GetFirstAggregateTemplated<uint64_t, LAST>(type);
+		return GetFirstAggregateTemplated<uint64_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::HUGEINT:
-		return GetFirstAggregateTemplated<hugeint_t, LAST>(type);
+		return GetFirstAggregateTemplated<hugeint_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::FLOAT:
-		return GetFirstAggregateTemplated<float, LAST>(type);
+		return GetFirstAggregateTemplated<float, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::DOUBLE:
-		return GetFirstAggregateTemplated<double, LAST>(type);
+		return GetFirstAggregateTemplated<double, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::INTERVAL:
-		return GetFirstAggregateTemplated<interval_t, LAST>(type);
+		return GetFirstAggregateTemplated<interval_t, LAST, SKIP_NULLS>(type);
 	case LogicalTypeId::VARCHAR:
 	case LogicalTypeId::BLOB: {
 		auto agg = AggregateFunction::UnaryAggregateDestructor<FirstState<string_t>, string_t, string_t,
-		                                                       FirstFunctionString<LAST>>(type, type);
+		                                                       FirstFunctionString<LAST, SKIP_NULLS>>(type, type);
 		return agg;
 	}
 	case LogicalTypeId::DECIMAL: {
 		type.Verify();
-		AggregateFunction function = GetDecimalFirstFunction<LAST>(type);
+		AggregateFunction function = GetDecimalFirstFunction<LAST, SKIP_NULLS>(type);
 		function.arguments[0] = type;
 		function.return_type = type;
 		return function;
 	}
 	default: {
-		using OP = FirstVectorFunction<LAST>;
+		using OP = FirstVectorFunction<LAST, SKIP_NULLS>;
 		return AggregateFunction({type}, type, AggregateFunction::StateSize<FirstStateVector>,
 		                         AggregateFunction::StateInitialize<FirstStateVector, OP>, OP::Update,
 		                         AggregateFunction::StateCombine<FirstStateVector, OP>,
@@ -85489,16 +85512,16 @@ static AggregateFunction GetFirstFunction(const LogicalType &type) {
 }
 AggregateFunction FirstFun::GetFunction(const LogicalType &type) {
-	auto fun = GetFirstFunction<false>(type);
+	auto fun = GetFirstFunction<false, false>(type);
 	fun.name = "first";
 	return fun;
 }
-template <bool LAST>
+template <bool LAST, bool SKIP_NULLS>
 unique_ptr<FunctionData> BindDecimalFirst(ClientContext &context, AggregateFunction &function,
                                           vector<unique_ptr<Expression>> &arguments) {
 	auto decimal_type = arguments[0]->return_type;
-	function = GetFirstFunction<LAST>(decimal_type);
+	function = GetFirstFunction<LAST, SKIP_NULLS>(decimal_type);
 	function.name = "first";
 	function.return_type = decimal_type;
 	return nullptr;
@@ -85507,15 +85530,19 @@ unique_ptr<FunctionData> BindDecimalFirst(ClientContext &context, AggregateFunct
 void FirstFun::RegisterFunction(BuiltinFunctions &set) {
 	AggregateFunctionSet first("first");
 	AggregateFunctionSet last("last");
+	AggregateFunctionSet any_value("any_value");
 	for (auto &type : LogicalType::AllTypes()) {
 		if (type.id() == LogicalTypeId::DECIMAL) {
 			first.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
-			                                    BindDecimalFirst<false>, nullptr, nullptr, nullptr));
+			                                    BindDecimalFirst<false, false>, nullptr, nullptr, nullptr));
 			last.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
-			                                   BindDecimalFirst<true>, nullptr, nullptr, nullptr));
+			                                   BindDecimalFirst<true, false>, nullptr, nullptr, nullptr));
+			any_value.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+			                                        BindDecimalFirst<false, true>, nullptr, nullptr, nullptr));
 		} else {
-			first.AddFunction(GetFirstFunction<false>(type));
-			last.AddFunction(GetFirstFunction<true>(type));
+			first.AddFunction(GetFirstFunction<false, false>(type));
+			last.AddFunction(GetFirstFunction<true, false>(type));
+			any_value.AddFunction(GetFirstFunction<false, true>(type));
 		}
 	}
 	set.AddFunction(first);
@@ -85523,6 +85550,8 @@ void FirstFun::RegisterFunction(BuiltinFunctions &set) {
 	set.AddFunction(first);
 	set.AddFunction(last);
+	set.AddFunction(any_value);
 }
 } // namespace duckdb

package/src/duckdb.hpp CHANGED Viewed

@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #pragma once
 #define DUCKDB_AMALGAMATION 1
 #define DUCKDB_AMALGAMATION_EXTENDED 1
-#define DUCKDB_SOURCE_ID "50951241d"
-#define DUCKDB_VERSION "v0.4.1-dev1514"
+#define DUCKDB_SOURCE_ID "17ec2ab20"
+#define DUCKDB_VERSION "v0.4.1-dev1527"
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //