npm - duckdb - Versions diffs - 1.4.2 → 1.4.3 - Mend

duckdb 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/src/duckdb/src/optimizer/filter_combiner.cpp CHANGED Viewed

@@ -2,6 +2,7 @@
 #include "duckdb/common/enums/expression_type.hpp"
 #include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/function/scalar/string_common.hpp"
 #include "duckdb/optimizer/optimizer.hpp"
 #include "duckdb/planner/expression.hpp"
 #include "duckdb/planner/expression/bound_between_expression.hpp"
@@ -24,6 +25,7 @@
 #include "duckdb/optimizer/column_lifetime_analyzer.hpp"
 #include "duckdb/planner/expression_iterator.hpp"
 #include "duckdb/planner/operator/logical_get.hpp"
+#include "utf8proc_wrapper.hpp"
 namespace duckdb {
@@ -282,6 +284,35 @@ static bool SupportedFilterComparison(ExpressionType expression_type) {
 	}
 }
+bool FilterCombiner::FindNextLegalUTF8(string &prefix_string) {
+	// find the start of the last codepoint
+	idx_t last_codepoint_start;
+	for (last_codepoint_start = prefix_string.size(); last_codepoint_start > 0; last_codepoint_start--) {
+		if (IsCharacter(prefix_string[last_codepoint_start - 1])) {
+			break;
+		}
+	}
+	if (last_codepoint_start == 0) {
+		throw InvalidInputException("Invalid UTF8 found in string \"%s\"", prefix_string);
+	}
+	last_codepoint_start--;
+	int codepoint_size;
+	auto codepoint = Utf8Proc::UTF8ToCodepoint(prefix_string.c_str() + last_codepoint_start, codepoint_size) + 1;
+	if (codepoint >= 0xD800 && codepoint <= 0xDFFF) {
+		// next codepoint falls within surrogate range increment to next valid character
+		codepoint = 0xE000;
+	}
+	char next_codepoint_text[4];
+	int next_codepoint_size;
+	if (!Utf8Proc::CodepointToUtf8(codepoint, next_codepoint_size, next_codepoint_text)) {
+		// invalid codepoint
+		return false;
+	}
+	auto s = static_cast<idx_t>(next_codepoint_size);
+	prefix_string = prefix_string.substr(0, last_codepoint_start) + string(next_codepoint_text, s);
+	return true;
+}
 bool TypeSupportsConstantFilter(const LogicalType &type) {
 	if (TypeIsNumeric(type.InternalType())) {
 		return true;
@@ -397,11 +428,14 @@ FilterPushdownResult FilterCombiner::TryPushdownPrefixFilter(TableFilterSet &tab
 	auto &column_index = column_ids[column_ref.binding.column_index];
 	//! Replace prefix with a set of comparisons
 	auto lower_bound = make_uniq<ConstantFilter>(ExpressionType::COMPARE_GREATERTHANOREQUALTO, Value(prefix_string));
-	prefix_string[prefix_string.size() - 1]++;
-	auto upper_bound = make_uniq<ConstantFilter>(ExpressionType::COMPARE_LESSTHAN, Value(prefix_string));
 	table_filters.PushFilter(column_index, std::move(lower_bound));
-	table_filters.PushFilter(column_index, std::move(upper_bound));
-	return FilterPushdownResult::PUSHED_DOWN_FULLY;
+	if (FilterCombiner::FindNextLegalUTF8(prefix_string)) {
+		auto upper_bound = make_uniq<ConstantFilter>(ExpressionType::COMPARE_LESSTHAN, Value(prefix_string));
+		table_filters.PushFilter(column_index, std::move(upper_bound));
+		return FilterPushdownResult::PUSHED_DOWN_FULLY;
+	}
+	// could not find next legal utf8 string - skip upper bound
+	return FilterPushdownResult::NO_PUSHDOWN;
 }
 FilterPushdownResult FilterCombiner::TryPushdownLikeFilter(TableFilterSet &table_filters,

package/src/duckdb/src/optimizer/join_order/relation_manager.cpp CHANGED Viewed

@@ -54,12 +54,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr<LogicalOpera
 	auto relation_id = relations.size();
 	auto table_indexes = op.GetTableIndex();
-	bool is_unnest_or_get_with_unnest = op.type == LogicalOperatorType::LOGICAL_UNNEST;
+	bool get_all_child_bindings = op.type == LogicalOperatorType::LOGICAL_UNNEST;
 	if (op.type == LogicalOperatorType::LOGICAL_GET) {
-		auto &get = op.Cast<LogicalGet>();
-		if (get.function.name == "unnest") {
-			is_unnest_or_get_with_unnest = true;
-		}
+		get_all_child_bindings = !op.children.empty();
 	}
 	if (table_indexes.empty()) {
 		// relation represents a non-reorderable relation, most likely a join relation
@@ -72,9 +69,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr<LogicalOpera
 			D_ASSERT(relation_mapping.find(reference) == relation_mapping.end());
 			relation_mapping[reference] = relation_id;
 		}
-	} else if (is_unnest_or_get_with_unnest) {
-		// logical unnest has a logical_unnest index, but other bindings can refer to
-		// columns that are not unnested.
+	} else if (get_all_child_bindings) {
+		// logical get has a logical_get index, but if a function is present other bindings can refer to
+		// columns that are not unnested, and from the child of the logical get.
 		auto bindings = op.GetColumnBindings();
 		for (auto &binding : bindings) {
 			relation_mapping[binding.table_index] = relation_id;
@@ -189,10 +186,10 @@ static void ModifyStatsIfLimit(optional_ptr<LogicalOperator> limit_op, RelationS
 	}
 }
-void RelationManager::AddUnnestRelation(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op,
-                                        optional_ptr<LogicalOperator> parent, RelationStats &child_stats,
-                                        optional_ptr<LogicalOperator> limit_op,
-                                        vector<reference<LogicalOperator>> &datasource_filters) {
+void RelationManager::AddRelationWithChildren(JoinOrderOptimizer &optimizer, LogicalOperator &op,
+                                              LogicalOperator &input_op, optional_ptr<LogicalOperator> parent,
+                                              RelationStats &child_stats, optional_ptr<LogicalOperator> limit_op,
+                                              vector<reference<LogicalOperator>> &datasource_filters) {
 	D_ASSERT(!op.children.empty());
 	auto child_optimizer = optimizer.CreateChildOptimizer();
 	op.children[0] = child_optimizer.Optimize(std::move(op.children[0]), &child_stats);
@@ -301,7 +298,7 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica
 	case LogicalOperatorType::LOGICAL_UNNEST: {
 		// optimize children of unnest
 		RelationStats child_stats;
-		AddUnnestRelation(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
+		AddRelationWithChildren(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
 		return true;
 	}
 	case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
@@ -359,9 +356,12 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica
 	case LogicalOperatorType::LOGICAL_GET: {
 		// TODO: Get stats from a logical GET
 		auto &get = op->Cast<LogicalGet>();
-		if (get.function.name == "unnest" && !op->children.empty()) {
+		// this is a get that *most likely* has a function (like unnest or json_each).
+		// there are new bindings for output of the function, but child bindings also exist, and can
+		// be used in joins
+		if (!op->children.empty()) {
 			RelationStats child_stats;
-			AddUnnestRelation(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
+			AddRelationWithChildren(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
 			return true;
 		}
 		auto stats = RelationStatisticsHelper::ExtractGetStats(get, context);

package/src/duckdb/src/optimizer/late_materialization.cpp CHANGED Viewed

@@ -432,6 +432,11 @@ bool LateMaterialization::OptimizeLargeLimit(LogicalLimit &limit, idx_t limit_va
 		}
 		current_op = *current_op.get().children[0];
 	}
+	// if there are any filters we shouldn't do large limit optimization
+	auto &get = current_op.get().Cast<LogicalGet>();
+	if (!get.table_filters.filters.empty()) {
+		return false;
+	}
 	return true;
 }

package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp CHANGED Viewed

@@ -17,7 +17,9 @@ OrderedAggregateOptimizer::OrderedAggregateOptimizer(ExpressionRewriter &rewrite
 }
 unique_ptr<Expression> OrderedAggregateOptimizer::Apply(ClientContext &context, BoundAggregateExpression &aggr,
-                                                        vector<unique_ptr<Expression>> &groups, bool &changes_made) {
+                                                        vector<unique_ptr<Expression>> &groups,
+                                                        optional_ptr<vector<GroupingSet>> grouping_sets,
+                                                        bool &changes_made) {
 	if (!aggr.order_bys) {
 		// no ORDER BYs defined
 		return nullptr;
@@ -30,7 +32,7 @@ unique_ptr<Expression> OrderedAggregateOptimizer::Apply(ClientContext &context,
 	}
 	// Remove unnecessary ORDER BY clauses and return if nothing remains
-	if (aggr.order_bys->Simplify(groups)) {
+	if (aggr.order_bys->Simplify(groups, grouping_sets)) {
 		aggr.order_bys.reset();
 		changes_made = true;
 		return nullptr;
@@ -90,7 +92,8 @@ unique_ptr<Expression> OrderedAggregateOptimizer::Apply(ClientContext &context,
 unique_ptr<Expression> OrderedAggregateOptimizer::Apply(LogicalOperator &op, vector<reference<Expression>> &bindings,
                                                         bool &changes_made, bool is_root) {
 	auto &aggr = bindings[0].get().Cast<BoundAggregateExpression>();
-	return Apply(rewriter.context, aggr, op.Cast<LogicalAggregate>().groups, changes_made);
+	return Apply(rewriter.context, aggr, op.Cast<LogicalAggregate>().groups, op.Cast<LogicalAggregate>().grouping_sets,
+	             changes_made);
 }
 } // namespace duckdb

package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp CHANGED Viewed

@@ -44,8 +44,9 @@ unique_ptr<SampleOptions> Transformer::TransformSampleOptions(optional_ptr<duckd
 	} else {
 		// sample size is given in rows: use reservoir sampling
 		auto rows = sample_value.GetValue<int64_t>();
-		if (rows < 0) {
-			throw ParserException("Sample rows %lld out of range, must be bigger than or equal to 0", rows);
+		if (rows < 0 || sample_value.GetValue<uint64_t>() > SampleOptions::MAX_SAMPLE_ROWS) {
+			throw ParserException("Sample rows %lld out of range, must be between 0 and %lld", rows,
+			                      SampleOptions::MAX_SAMPLE_ROWS);
 		}
 		result->sample_size = Value::BIGINT(rows);
 		result->method = SampleMethod::RESERVOIR_SAMPLE;

package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp CHANGED Viewed

@@ -216,7 +216,7 @@ void TryTransformStarLike(unique_ptr<ParsedExpression> &root) {
 		child_expr = std::move(list_filter);
 	}
-	auto columns_expr = make_uniq<StarExpression>();
+	auto columns_expr = make_uniq<StarExpression>(star.relation_name);
 	columns_expr->columns = true;
 	columns_expr->expr = std::move(child_expr);
 	columns_expr->SetAlias(std::move(original_alias));

package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp CHANGED Viewed

@@ -30,7 +30,7 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundSelectNode &statement) {
 		root = PlanFilter(std::move(statement.where_clause), std::move(root));
 	}
-	if (!statement.aggregates.empty() || !statement.groups.group_expressions.empty()) {
+	if (!statement.aggregates.empty() || !statement.groups.group_expressions.empty() || statement.having) {
 		if (!statement.groups.group_expressions.empty()) {
 			// visit the groups
 			for (auto &group : statement.groups.group_expressions) {

package/src/duckdb/src/planner/binder/statement/bind_copy.cpp CHANGED Viewed

@@ -423,7 +423,10 @@ vector<Value> BindCopyOption(ClientContext &context, TableFunctionBinder &option
 		}
 	}
 	auto bound_expr = option_binder.Bind(expr);
-	auto val = ExpressionExecutor::EvaluateScalar(context, *bound_expr);
+	if (bound_expr->HasParameter()) {
+		throw ParameterNotResolvedException();
+	}
+	auto val = ExpressionExecutor::EvaluateScalar(context, *bound_expr, true);
 	if (val.IsNull()) {
 		throw BinderException("NULL is not supported as a valid option for COPY option \"" + name + "\"");
 	}

package/src/duckdb/src/planner/binder/statement/bind_insert.cpp CHANGED Viewed

@@ -465,7 +465,11 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
 	if (on_conflict_info.action_type == OnConflictAction::REPLACE) {
 		D_ASSERT(!on_conflict_info.set_info);
-		on_conflict_info.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
+		// For BY POSITION, create explicit SET information
+		// For BY NAME, leave it empty and let bind_merge_into handle it automatically
+		if (stmt.column_order != InsertColumnOrder::INSERT_BY_NAME) {
+			on_conflict_info.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
+		}
 		on_conflict_info.action_type = OnConflictAction::UPDATE;
 	}
 	// now set up the merge actions
@@ -484,16 +488,19 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
 		// when doing UPDATE set up the when matched action
 		auto update_action = make_uniq<MergeIntoAction>();
 		update_action->action_type = MergeActionType::MERGE_UPDATE;
-		for (auto &col : on_conflict_info.set_info->expressions) {
-			vector<unordered_set<string>> lambda_params;
-			DoUpdateSetQualify(col, table_name, lambda_params);
-		}
-		if (on_conflict_info.set_info->condition) {
-			vector<unordered_set<string>> lambda_params;
-			DoUpdateSetQualify(on_conflict_info.set_info->condition, table_name, lambda_params);
-			update_action->condition = std::move(on_conflict_info.set_info->condition);
+		update_action->column_order = stmt.column_order;
+		if (on_conflict_info.set_info) {
+			for (auto &col : on_conflict_info.set_info->expressions) {
+				vector<unordered_set<string>> lambda_params;
+				DoUpdateSetQualify(col, table_name, lambda_params);
+			}
+			if (on_conflict_info.set_info->condition) {
+				vector<unordered_set<string>> lambda_params;
+				DoUpdateSetQualify(on_conflict_info.set_info->condition, table_name, lambda_params);
+				update_action->condition = std::move(on_conflict_info.set_info->condition);
+			}
+			update_action->update_info = std::move(on_conflict_info.set_info);
 		}
-		update_action->update_info = std::move(on_conflict_info.set_info);
 		merge_into->actions[MergeActionCondition::WHEN_MATCHED].push_back(std::move(update_action));
 	}

package/src/duckdb/src/planner/binder.cpp CHANGED Viewed

@@ -70,7 +70,7 @@ Binder::Binder(ClientContext &context, shared_ptr<Binder> parent_p, BinderType b
 	}
 }
-unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &cte_map) {
+unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &cte_map, unique_ptr<CTENode> &cte_root) {
 	// Extract materialized CTEs from cte_map
 	vector<unique_ptr<CTENode>> materialized_ctes;
 	for (auto &cte : cte_map.map) {
@@ -87,7 +87,6 @@ unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &c
 		return nullptr;
 	}
-	unique_ptr<CTENode> cte_root = nullptr;
 	while (!materialized_ctes.empty()) {
 		unique_ptr<CTENode> node_result;
 		node_result = std::move(materialized_ctes.back());
@@ -110,7 +109,8 @@ unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &c
 template <class T>
 BoundStatement Binder::BindWithCTE(T &statement) {
 	BoundStatement bound_statement;
-	auto bound_cte = BindMaterializedCTE(statement.template Cast<T>().cte_map);
+	unique_ptr<CTENode> cte_root;
+	auto bound_cte = BindMaterializedCTE(statement.template Cast<T>().cte_map, cte_root);
 	if (bound_cte) {
 		reference<BoundCTENode> tail_ref = *bound_cte;

package/src/duckdb/src/planner/bound_result_modifier.cpp CHANGED Viewed

@@ -101,14 +101,17 @@ bool BoundOrderModifier::Equals(const unique_ptr<BoundOrderModifier> &left,
 	return BoundOrderModifier::Equals(*left, *right);
 }
-bool BoundOrderModifier::Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups) {
+bool BoundOrderModifier::Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups,
+                                  optional_ptr<vector<GroupingSet>> grouping_sets) {
 	// for each ORDER BY - check if it is actually necessary
 	// expressions that are in the groups do not need to be ORDERED BY
 	// `ORDER BY` on a group has no effect, because for each aggregate, the group is unique
 	// similarly, we only need to ORDER BY each aggregate once
+	expression_map_t<idx_t> group_expressions;
 	expression_set_t seen_expressions;
+	idx_t i = 0;
 	for (auto &target : groups) {
-		seen_expressions.insert(*target);
+		group_expressions.insert({*target, i++});
 	}
 	vector<BoundOrderByNode> new_order_nodes;
 	for (auto &order_node : orders) {
@@ -116,16 +119,30 @@ bool BoundOrderModifier::Simplify(vector<BoundOrderByNode> &orders, const vector
 			// we do not need to order by this node
 			continue;
 		}
+		auto it = group_expressions.find(*order_node.expression);
+		bool add_to_new_order = it == group_expressions.end();
+		if (!add_to_new_order && grouping_sets) {
+			idx_t group_idx = it->second;
+			for (auto &grouping_set : *grouping_sets) {
+				if (grouping_set.find(group_idx) == grouping_set.end()) {
+					add_to_new_order = true;
+					break;
+				}
+			}
+		}
 		seen_expressions.insert(*order_node.expression);
-		new_order_nodes.push_back(std::move(order_node));
+		if (add_to_new_order) {
+			new_order_nodes.push_back(std::move(order_node));
+		}
 	}
 	orders.swap(new_order_nodes);
 	return orders.empty(); // NOLINT
 }
-bool BoundOrderModifier::Simplify(const vector<unique_ptr<Expression>> &groups) {
-	return Simplify(orders, groups);
+bool BoundOrderModifier::Simplify(const vector<unique_ptr<Expression>> &groups,
+                                  optional_ptr<vector<GroupingSet>> grouping_sets) {
+	return Simplify(orders, groups, grouping_sets);
 }
 BoundLimitNode::BoundLimitNode(LimitNodeType type, idx_t constant_integer, double constant_percentage,

package/src/duckdb/src/planner/expression/bound_function_expression.cpp CHANGED Viewed

@@ -39,7 +39,10 @@ bool BoundFunctionExpression::IsFoldable() const {
 			}
 		}
 	}
-	return function.stability == FunctionStability::VOLATILE ? false : Expression::IsFoldable();
+	if (function.stability == FunctionStability::VOLATILE) {
+		return false;
+	}
+	return Expression::IsFoldable();
 }
 bool BoundFunctionExpression::CanThrow() const {

package/src/duckdb/src/planner/expression_binder/constant_binder.cpp CHANGED Viewed

@@ -19,7 +19,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> &expr_ptr
 				return BindExpression(expr_ptr, depth, root_expression);
 			}
 		}
-		return BindUnsupportedExpression(expr, depth, clause + " cannot contain column names");
+		throw BinderException::Unsupported(expr, clause + " cannot contain column names");
 	}
 	case ExpressionClass::SUBQUERY:
 		throw BinderException(clause + " cannot contain subqueries");

package/src/duckdb/src/planner/expression_binder.cpp CHANGED Viewed

@@ -1,6 +1,5 @@
 #include "duckdb/planner/expression_binder.hpp"
-#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
 #include "duckdb/parser/expression/list.hpp"
 #include "duckdb/parser/parsed_expression_iterator.hpp"
 #include "duckdb/planner/binder.hpp"
@@ -166,7 +165,7 @@ static bool CombineMissingColumns(ErrorData &current, ErrorData new_error) {
 		}
 		auto score = StringUtil::SimilarityRating(candidate_column, column_name);
 		candidates.insert(candidate);
-		scores.emplace_back(make_pair(std::move(candidate), score));
+		scores.emplace_back(std::move(candidate), score);
 	}
 	// get a new top-n
 	auto top_candidates = StringUtil::TopNStrings(scores);

package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp CHANGED Viewed

@@ -236,6 +236,16 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator &op, boo
 		if (DetectCorrelatedExpressions(*child, lateral, new_lateral_depth, condition)) {
 			has_correlation = true;
 		}
+		if (op.type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE && child_idx == 0) {
+			auto &setop = op.Cast<LogicalCTE>();
+			binder.recursive_ctes[setop.table_index] = &setop;
+			has_correlated_expressions[op] = has_correlation;
+			if (has_correlation) {
+				setop.correlated_columns = correlated_columns;
+			}
+		}
 		child_idx++;
 	}
@@ -261,6 +271,7 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator &op, boo
 					return true;
 				}
 				// Found a materialized CTE, subtree correlation depends on the CTE node
+				has_correlated_expressions[op] = has_correlated_expressions[*cte_node];
 				return has_correlated_expressions[*cte_node];
 			}
 			// No CTE found: subtree is correlated
@@ -279,47 +290,32 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator &op, boo
 		binder.recursive_ctes[setop.table_index] = &setop;
 		if (has_correlation) {
 			setop.correlated_columns = correlated_columns;
-			MarkSubtreeCorrelated(*op.children[1].get());
-		}
-	}
-	if (op.type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE) {
-		auto &setop = op.Cast<LogicalCTE>();
-		binder.recursive_ctes[setop.table_index] = &setop;
-		// only mark the entire subtree as correlated if the materializing side is correlated
-		auto entry = has_correlated_expressions.find(*op.children[0]);
-		if (entry != has_correlated_expressions.end()) {
-			if (has_correlation && entry->second) {
-				setop.correlated_columns = correlated_columns;
-				MarkSubtreeCorrelated(*op.children[1].get());
-			}
+			MarkSubtreeCorrelated(*op.children[1].get(), setop.table_index);
 		}
 	}
 	return has_correlation;
 }
-bool FlattenDependentJoins::MarkSubtreeCorrelated(LogicalOperator &op) {
+bool FlattenDependentJoins::MarkSubtreeCorrelated(LogicalOperator &op, idx_t cte_index) {
 	// Do not mark base table scans as correlated
 	auto entry = has_correlated_expressions.find(op);
 	D_ASSERT(entry != has_correlated_expressions.end());
 	bool has_correlation = entry->second;
 	for (auto &child : op.children) {
-		has_correlation |= MarkSubtreeCorrelated(*child.get());
+		has_correlation |= MarkSubtreeCorrelated(*child.get(), cte_index);
 	}
 	if (op.type != LogicalOperatorType::LOGICAL_GET || op.children.size() == 1) {
 		if (op.type == LogicalOperatorType::LOGICAL_CTE_REF) {
 			// There may be multiple recursive CTEs. Only mark CTE_REFs as correlated,
 			// IFF the CTE that we are reading from is correlated.
 			auto &cteref = op.Cast<LogicalCTERef>();
-			auto cte = binder.recursive_ctes.find(cteref.cte_index);
-			bool has_correlation = false;
-			if (cte != binder.recursive_ctes.end()) {
-				auto &rec_cte = cte->second->Cast<LogicalCTE>();
-				has_correlation = !rec_cte.correlated_columns.empty();
+			if (cteref.cte_index != cte_index) {
+				has_correlated_expressions[op] = has_correlation;
+				return has_correlation;
 			}
-			has_correlated_expressions[op] = has_correlation;
-			return has_correlation;
+			has_correlated_expressions[op] = true;
+			return true;
 		} else {
 			has_correlated_expressions[op] = has_correlation;
 		}
@@ -695,6 +691,42 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 				return plan;
 			}
 		} else if (join.join_type == JoinType::MARK) {
+			if (!left_has_correlation && right_has_correlation) {
+				// found a MARK join where the left side has no correlation
+				ColumnBinding right_binding;
+				// there may still be correlation on the right side that we have to deal with
+				// push into the right side if necessary or decorrelate it independently otherwise
+				plan->children[1] = PushDownDependentJoinInternal(std::move(plan->children[1]),
+				                                                  parent_propagate_null_values, lateral_depth);
+				right_binding = this->base_binding;
+				// now push into the left side of the MARK join even though it has no correlation
+				// this is necessary to add the correlated columns to the column bindings and allow
+				// the join condition to be rewritten correctly
+				plan->children[0] = PushDownDependentJoinInternal(std::move(plan->children[0]),
+				                                                  parent_propagate_null_values, lateral_depth);
+				auto left_binding = this->base_binding;
+				// add the correlated columns to the join conditions
+				for (idx_t i = 0; i < correlated_columns.size(); i++) {
+					JoinCondition cond;
+					cond.left = make_uniq<BoundColumnRefExpression>(
+					    correlated_columns[i].type,
+					    ColumnBinding(left_binding.table_index, left_binding.column_index + i));
+					cond.right = make_uniq<BoundColumnRefExpression>(
+					    correlated_columns[i].type,
+					    ColumnBinding(right_binding.table_index, right_binding.column_index + i));
+					cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
+					auto &comparison_join = join.Cast<LogicalComparisonJoin>();
+					comparison_join.conditions.push_back(std::move(cond));
+				}
+				return plan;
+			}
 			// push the child into the LHS
 			plan->children[0] = PushDownDependentJoinInternal(std::move(plan->children[0]),
 			                                                  parent_propagate_null_values, lateral_depth);
@@ -1031,7 +1063,8 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 			}
 		}
-		RewriteCTEScan cte_rewriter(table_index, correlated_columns);
+		RewriteCTEScan cte_rewriter(table_index, correlated_columns,
+		                            plan->type == LogicalOperatorType::LOGICAL_RECURSIVE_CTE);
 		cte_rewriter.VisitOperator(*plan->children[1]);
 		parent_propagate_null_values = false;

package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp CHANGED Viewed

@@ -14,8 +14,10 @@
 namespace duckdb {
-RewriteCTEScan::RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns)
-    : table_index(table_index), correlated_columns(correlated_columns) {
+RewriteCTEScan::RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns,
+                               bool rewrite_dependent_joins)
+    : table_index(table_index), correlated_columns(correlated_columns),
+      rewrite_dependent_joins(rewrite_dependent_joins) {
 }
 void RewriteCTEScan::VisitOperator(LogicalOperator &op) {
@@ -29,7 +31,7 @@ void RewriteCTEScan::VisitOperator(LogicalOperator &op) {
 			}
 			cteref.correlated_columns += correlated_columns.size();
 		}
-	} else if (op.type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN) {
+	} else if (op.type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN && rewrite_dependent_joins) {
 		// There is another DependentJoin below the correlated recursive CTE.
 		// We have to add the correlated columns of the recursive CTE to the
 		// set of columns of this operator.

package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp CHANGED Viewed

@@ -118,6 +118,15 @@ void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stat
 	if (!v1_0_0_storage) {
 		options.emplace("v1_0_0_storage", v1_0_0_storage);
 	}
+	// If there is a context available, bind indexes before serialization.
+	// This is necessary so that buffered index operations are replayed before we checkpoint, otherwise
+	// we would lose them if there was a restart after this.
+	if (context && context->transaction.HasActiveTransaction()) {
+		info.BindIndexes(*context);
+	}
+	// FIXME: If we do not have a context, however, the unbound indexes have to be serialized to disk.
 	auto index_storage_infos = info.GetIndexes().SerializeToDisk(context, options);
 	auto debug_verify_blocks = DBConfig::GetSetting<DebugVerifyBlocksSetting>(GetDatabase());

package/src/duckdb/src/storage/storage_info.cpp CHANGED Viewed

@@ -85,6 +85,7 @@ static const StorageVersionInfo storage_version_info[] = {
 	{"v1.4.0", 67},
 	{"v1.4.1", 67},
 	{"v1.4.2", 67},
+	{"v1.4.3", 67},
 	{nullptr, 0}
 };
 // END OF STORAGE VERSION INFO
@@ -112,6 +113,7 @@ static const SerializationVersionInfo serialization_version_info[] = {
 	{"v1.4.0", 6},
 	{"v1.4.1", 6},
 	{"v1.4.2", 6},
+	{"v1.4.3", 6},
 	{"latest", 6},
 	{nullptr, 0}
 };

package/src/duckdb/src/storage/table/chunk_info.cpp CHANGED Viewed

@@ -32,7 +32,7 @@ static bool UseVersion(TransactionData transaction, transaction_t id) {
 	return TransactionVersionOperator::UseInsertedVersion(transaction.start_time, transaction.transaction_id, id);
 }
-bool ChunkInfo::Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const {
+bool ChunkInfo::Cleanup(transaction_t lowest_transaction) const {
 	return false;
 }
@@ -99,7 +99,7 @@ idx_t ChunkConstantInfo::GetCommittedDeletedCount(idx_t max_count) {
 	return delete_id < TRANSACTION_ID_START ? max_count : 0;
 }
-bool ChunkConstantInfo::Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const {
+bool ChunkConstantInfo::Cleanup(transaction_t lowest_transaction) const {
 	if (delete_id != NOT_DELETED_ID) {
 		// the chunk info is labeled as deleted - we need to keep it around
 		return false;
@@ -253,7 +253,7 @@ void ChunkVectorInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t e
 	}
 }
-bool ChunkVectorInfo::Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const {
+bool ChunkVectorInfo::Cleanup(transaction_t lowest_transaction) const {
 	if (any_deleted) {
 		// if any rows are deleted we can't clean-up
 		return false;

package/src/duckdb/src/storage/table/column_data.cpp CHANGED Viewed

@@ -536,6 +536,11 @@ void ColumnData::RevertAppend(row_t start_row_p) {
 	if (segment->start == start_row) {
 		// we are truncating exactly this segment - erase it entirely
 		data.EraseSegments(l, segment_index);
+		if (segment_index > 0) {
+			// if we have a previous segment, we need to update the next pointer
+			auto previous_segment = data.GetSegmentByIndex(l, UnsafeNumericCast<int64_t>(segment_index - 1));
+			previous_segment->next = nullptr;
+		}
 	} else {
 		// we need to truncate within the segment
 		// remove any segments AFTER this segment: they should be deleted entirely
@@ -583,7 +588,6 @@ void ColumnData::Update(TransactionData transaction, DataTable &data_table, idx_
 	Vector base_vector(type);
 	ColumnScanState state;
 	FetchUpdateData(state, row_ids, base_vector);
 	UpdateInternal(transaction, data_table, column_index, update_vector, row_ids, update_count, base_vector);
 }

package/src/duckdb/src/storage/table/column_data_checkpointer.cpp CHANGED Viewed

@@ -363,7 +363,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
 }
 bool ColumnDataCheckpointer::HasChanges(ColumnData &col_data) {
-	return col_data.HasChanges();
+	return col_data.HasAnyChanges();
 }
 void ColumnDataCheckpointer::WritePersistentSegments(ColumnCheckpointState &state) {

package/src/duckdb/src/storage/table/column_segment.cpp CHANGED Viewed

@@ -242,7 +242,9 @@ void ColumnSegment::ConvertToPersistent(QueryContext context, optional_ptr<Block
 	// Thus, we set the compression function to constant and reset the block buffer.
 	D_ASSERT(stats.statistics.IsConstant());
 	auto &config = DBConfig::GetConfig(db);
-	function = *config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
+	if (GetCompressionFunction().type != CompressionType::COMPRESSION_EMPTY) {
+		function = *config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
+	}
 	block.reset();
 }