npm - duckdb - Versions diffs - 1.4.3-dev0.0 → 1.4.4-dev0.0 - Mend

duckdb 1.4.3-dev0.0 → 1.4.4-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp CHANGED Viewed

@@ -462,24 +462,30 @@ void CSVSniffer::DetectTypes() {
 		idx_t varchar_cols = 0;
 		for (idx_t col = 0; col < info_sql_types_candidates.size(); col++) {
 			auto &col_type_candidates = info_sql_types_candidates[col];
-			// check number of varchar columns
+			// check the number of varchar columns
 			const auto &col_type = col_type_candidates.back();
 			if (col_type == LogicalType::VARCHAR) {
 				varchar_cols++;
 			}
 		}
-		// it's good if the dialect creates more non-varchar columns, but only if we sacrifice < 30% of
-		// best_num_cols.
+		// it's good if the dialect creates more non-varchar columns
+		const bool has_less_varchar_cols = varchar_cols < min_varchar_cols;
+		// but only if we sacrifice < 30% of best_num_cols.
+		const bool acceptable_best_num_cols =
+		    static_cast<double>(info_sql_types_candidates.size()) > static_cast<double>(max_columns_found) * 0.7;
 		const idx_t number_of_errors = candidate->error_handler->GetSize();
-		if (!best_candidate || (varchar_cols<min_varchar_cols &&static_cast<double>(info_sql_types_candidates.size())>(
-		                            static_cast<double>(max_columns_found) * 0.7) &&
-		                        (!options.ignore_errors.GetValue() || number_of_errors < min_errors))) {
+		const bool better_strictness = best_candidate_is_strict ? !candidate->used_unstrictness : true;
+		const bool acceptable_candidate = has_less_varchar_cols && acceptable_best_num_cols && better_strictness;
+		// If we escaped an unquoted character when strict is false.
+		if (!best_candidate ||
+		    (acceptable_candidate && (!options.ignore_errors.GetValue() || number_of_errors < min_errors))) {
 			min_errors = number_of_errors;
 			best_header_row.clear();
 			// we have a new best_options candidate
 			best_candidate = std::move(candidate);
 			min_varchar_cols = varchar_cols;
+			best_candidate_is_strict = !best_candidate->used_unstrictness;
 			best_sql_types_candidates_per_column_idx = info_sql_types_candidates;
 			for (auto &format_candidate : format_candidates) {
 				best_format_candidates[format_candidate.first] = format_candidate.second.format;

package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp CHANGED Viewed

@@ -67,10 +67,14 @@ public:
 				InterruptState interrupt_state;
 				OperatorSourceInput source_input {global_state, *local_state, interrupt_state};
-				auto source_result = table.GetData(context, source, source_input);
-				if (source_result == SourceResultType::BLOCKED) {
-					throw NotImplementedException(
-					    "Unexpected interrupt from table Source in PositionalTableScanner refill");
+				auto source_result = SourceResultType::HAVE_MORE_OUTPUT;
+				while (source_result == SourceResultType::HAVE_MORE_OUTPUT && source.size() == 0) {
+					// TODO: this could as well just be propagated further, but for now iterating it is
+					source_result = table.GetData(context, source, source_input);
+					if (source_result == SourceResultType::BLOCKED) {
+						throw NotImplementedException(
+						    "Unexpected interrupt from table Source in PositionalTableScanner refill");
+					}
 				}
 			}
 			source_offset = 0;

package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp CHANGED Viewed

@@ -259,7 +259,7 @@ bool PhysicalTableScan::Equals(const PhysicalOperator &other_p) const {
 		return false;
 	}
 	auto &other = other_p.Cast<PhysicalTableScan>();
-	if (function.function != other.function.function) {
+	if (function != other.function) {
 		return false;
 	}
 	if (column_ids != other.column_ids) {

package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp CHANGED Viewed

@@ -236,7 +236,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalAggregate &op) {
 	D_ASSERT(op.children.size() == 1);
 	reference<PhysicalOperator> plan = CreatePlan(*op.children[0]);
-	plan = ExtractAggregateExpressions(plan, op.expressions, op.groups);
+	plan = ExtractAggregateExpressions(plan, op.expressions, op.groups, op.grouping_sets);
 	bool can_use_simple_aggregation = true;
 	for (auto &expression : op.expressions) {
@@ -305,7 +305,8 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalAggregate &op) {
 PhysicalOperator &PhysicalPlanGenerator::ExtractAggregateExpressions(PhysicalOperator &child,
                                                                      vector<unique_ptr<Expression>> &aggregates,
-                                                                     vector<unique_ptr<Expression>> &groups) {
+                                                                     vector<unique_ptr<Expression>> &groups,
+                                                                     optional_ptr<vector<GroupingSet>> grouping_sets) {
 	vector<unique_ptr<Expression>> expressions;
 	vector<LogicalType> types;
@@ -314,7 +315,7 @@ PhysicalOperator &PhysicalPlanGenerator::ExtractAggregateExpressions(PhysicalOpe
 		auto &bound_aggr = aggr->Cast<BoundAggregateExpression>();
 		if (bound_aggr.order_bys) {
 			// sorted aggregate!
-			FunctionBinder::BindSortedAggregate(context, bound_aggr, groups);
+			FunctionBinder::BindSortedAggregate(context, bound_aggr, groups, grouping_sets);
 		}
 	}
 	for (auto &group : groups) {

package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp CHANGED Viewed

@@ -65,7 +65,8 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
 			if (ClientConfig::GetConfig(context).enable_optimizer) {
 				bool changes_made = false;
-				auto new_expr = OrderedAggregateOptimizer::Apply(context, *first_aggregate, groups, changes_made);
+				auto new_expr =
+				    OrderedAggregateOptimizer::Apply(context, *first_aggregate, groups, nullptr, changes_made);
 				if (new_expr) {
 					D_ASSERT(new_expr->return_type == first_aggregate->return_type);
 					D_ASSERT(new_expr->GetExpressionType() == ExpressionType::BOUND_AGGREGATE);
@@ -81,7 +82,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
 		}
 	}
-	child = ExtractAggregateExpressions(child, aggregates, groups);
+	child = ExtractAggregateExpressions(child, aggregates, groups, nullptr);
 	// we add a physical hash aggregation in the plan to select the distinct groups
 	auto &group_by = Make<PhysicalHashAggregate>(context, aggregate_types, std::move(aggregates), std::move(groups),

package/src/duckdb/src/execution/physical_plan/plan_filter.cpp CHANGED Viewed

@@ -14,7 +14,6 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalFilter &op) {
 	D_ASSERT(op.children.size() == 1);
 	reference<PhysicalOperator> plan = CreatePlan(*op.children[0]);
 	if (!op.expressions.empty()) {
-		D_ASSERT(!plan.get().GetTypes().empty());
 		// create a filter if there is anything to filter
 		auto &filter = Make<PhysicalFilter>(plan.get().GetTypes(), std::move(op.expressions), op.estimated_cardinality);
 		filter.children.push_back(plan);

package/src/duckdb/src/execution/physical_plan/plan_window.cpp CHANGED Viewed

@@ -2,13 +2,11 @@
 #include "duckdb/execution/operator/aggregate/physical_window.hpp"
 #include "duckdb/execution/operator/projection/physical_projection.hpp"
 #include "duckdb/execution/physical_plan_generator.hpp"
-#include "duckdb/main/client_context.hpp"
+#include "duckdb/main/client_config.hpp"
 #include "duckdb/planner/expression/bound_reference_expression.hpp"
 #include "duckdb/planner/expression/bound_window_expression.hpp"
 #include "duckdb/planner/operator/logical_window.hpp"
-#include <numeric>
 namespace duckdb {
 PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
@@ -44,12 +42,12 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
 	// Process the window functions by sharing the partition/order definitions
 	unordered_map<idx_t, idx_t> projection_map;
 	vector<vector<idx_t>> window_expressions;
-	idx_t blocking_count = 0;
+	idx_t streaming_count = 0;
 	auto output_pos = input_width;
 	while (!blocking_windows.empty() || !streaming_windows.empty()) {
-		const bool process_streaming = blocking_windows.empty();
-		auto &remaining = process_streaming ? streaming_windows : blocking_windows;
-		blocking_count += process_streaming ? 0 : 1;
+		const bool process_blocking = streaming_windows.empty();
+		auto &remaining = process_blocking ? blocking_windows : streaming_windows;
+		streaming_count += process_blocking ? 0 : 1;
 		// Find all functions that share the partitioning of the first remaining expression
 		auto over_idx = remaining[0];
@@ -122,7 +120,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
 		}
 		// Chain the new window operator on top of the plan
-		if (i < blocking_count) {
+		if (i >= streaming_count) {
 			auto &window = Make<PhysicalWindow>(types, std::move(select_list), op.estimated_cardinality);
 			window.children.push_back(plan);
 			plan = window;

package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp CHANGED Viewed

@@ -677,14 +677,15 @@ struct SortedAggregateFunction {
 } // namespace
 void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
-                                         const vector<unique_ptr<Expression>> &groups) {
+                                         const vector<unique_ptr<Expression>> &groups,
+                                         optional_ptr<vector<GroupingSet>> grouping_sets) {
 	if (!expr.order_bys || expr.order_bys->orders.empty() || expr.children.empty()) {
 		// not a sorted aggregate: return
 		return;
 	}
 	// Remove unnecessary ORDER BY clauses and return if nothing remains
 	if (context.config.enable_optimizer) {
-		if (expr.order_bys->Simplify(groups)) {
+		if (expr.order_bys->Simplify(groups, grouping_sets)) {
 			expr.order_bys.reset();
 			return;
 		}
@@ -741,7 +742,7 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundWindowExpr
 	}
 	// Remove unnecessary ORDER BY clauses and return if nothing remains
 	if (context.config.enable_optimizer) {
-		if (BoundOrderModifier::Simplify(expr.arg_orders, expr.partitions)) {
+		if (BoundOrderModifier::Simplify(expr.arg_orders, expr.partitions, nullptr)) {
 			expr.arg_orders.clear();
 			return;
 		}

package/src/duckdb/src/function/macro_function.cpp CHANGED Viewed

@@ -48,13 +48,31 @@ MacroBindResult MacroFunction::BindMacroFunction(
 	ExpressionBinder expr_binder(binder, binder.context);
 	expr_binder.lambda_bindings = binder.lambda_bindings;
+	// Figure out whether we even need to bind arguments
+	bool requires_bind = false;
+	for (auto &function : functions) {
+		for (const auto &type : function->types) {
+			if (type.id() != LogicalTypeId::UNKNOWN) {
+				requires_bind = true;
+				break;
+			}
+		}
+		if (requires_bind) {
+			break;
+		}
+	}
 	// Find argument types and separate positional and default arguments
 	vector<LogicalType> positional_arg_types;
 	InsertionOrderPreservingMap<LogicalType> named_arg_types;
 	for (auto &arg : function_expr.children) {
 		auto arg_copy = arg->Copy();
-		const auto arg_bind_result = expr_binder.BindExpression(arg_copy, depth + 1);
-		auto arg_type = arg_bind_result.HasError() ? LogicalType::UNKNOWN : arg_bind_result.expression->return_type;
+		LogicalType arg_type = LogicalType::UNKNOWN;
+		if (requires_bind) {
+			const auto arg_bind_result = expr_binder.BindExpression(arg_copy, depth + 1);
+			arg_type = arg_bind_result.HasError() ? LogicalType::UNKNOWN : arg_bind_result.expression->return_type;
+		}
 		if (!arg->GetAlias().empty()) {
 			// Default argument
 			if (named_arguments.find(arg->GetAlias()) != named_arguments.end()) {

package/src/duckdb/src/function/table/system/duckdb_log.cpp CHANGED Viewed

@@ -62,6 +62,9 @@ unique_ptr<TableRef> DuckDBLogBindReplace(ClientContext &context, TableFunctionB
 	bool denormalized_table = false;
 	auto denormalized_table_setting = input.named_parameters.find("denormalized_table");
 	if (denormalized_table_setting != input.named_parameters.end()) {
+		if (denormalized_table_setting->second.IsNull()) {
+			throw InvalidInputException("denormalized_table cannot be NULL");
+		}
 		denormalized_table = denormalized_table_setting->second.GetValue<bool>();
 	}

package/src/duckdb/src/function/table/system/test_all_types.cpp CHANGED Viewed

@@ -19,9 +19,10 @@ struct TestAllTypesData : public GlobalTableFunctionState {
 	idx_t offset;
 };
-vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_large_bignum) {
+vector<TestType> TestAllTypesFun::GetTestTypes(const bool use_large_enum, const bool use_large_bignum) {
 	vector<TestType> result;
-	// scalar types/numerics
+	// Numeric types.
 	result.emplace_back(LogicalType::BOOLEAN, "bool");
 	result.emplace_back(LogicalType::TINYINT, "tinyint");
 	result.emplace_back(LogicalType::SMALLINT, "smallint");
@@ -33,24 +34,31 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
 	result.emplace_back(LogicalType::USMALLINT, "usmallint");
 	result.emplace_back(LogicalType::UINTEGER, "uint");
 	result.emplace_back(LogicalType::UBIGINT, "ubigint");
+	// BIGNUM.
 	if (use_large_bignum) {
 		string data;
-		idx_t total_data_size = Bignum::BIGNUM_HEADER_SIZE + Bignum::MAX_DATA_SIZE;
+		constexpr idx_t total_data_size = Bignum::BIGNUM_HEADER_SIZE + Bignum::MAX_DATA_SIZE;
 		data.resize(total_data_size);
-		// Let's set our header
+		// Let's set the max header.
 		Bignum::SetHeader(&data[0], Bignum::MAX_DATA_SIZE, false);
-		// Set all our other bits
+		// Set all other max bits.
 		memset(&data[Bignum::BIGNUM_HEADER_SIZE], 0xFF, Bignum::MAX_DATA_SIZE);
 		auto max = Value::BIGNUM(data);
-		// Let's set our header
+		// Let's set the min header.
 		Bignum::SetHeader(&data[0], Bignum::MAX_DATA_SIZE, true);
-		// Set all our other bits
+		// Set all other min bits.
 		memset(&data[Bignum::BIGNUM_HEADER_SIZE], 0x00, Bignum::MAX_DATA_SIZE);
 		auto min = Value::BIGNUM(data);
 		result.emplace_back(LogicalType::BIGNUM, "bignum", min, max);
 	} else {
 		result.emplace_back(LogicalType::BIGNUM, "bignum");
 	}
+	// Time-types.
 	result.emplace_back(LogicalType::DATE, "date");
 	result.emplace_back(LogicalType::TIME, "time");
 	result.emplace_back(LogicalType::TIMESTAMP, "timestamp");
@@ -59,15 +67,19 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
 	result.emplace_back(LogicalType::TIMESTAMP_NS, "timestamp_ns");
 	result.emplace_back(LogicalType::TIME_TZ, "time_tz");
 	result.emplace_back(LogicalType::TIMESTAMP_TZ, "timestamp_tz");
-	result.emplace_back(LogicalType::FLOAT, "float");
-	result.emplace_back(LogicalType::DOUBLE, "double");
+	// More complex numeric types.
+	result.emplace_back(LogicalType::FLOAT, "float", Value::FLOAT(std::numeric_limits<float>::lowest()),
+	                    Value::FLOAT(std::numeric_limits<float>::max()));
+	result.emplace_back(LogicalType::DOUBLE, "double", Value::DOUBLE(std::numeric_limits<double>::lowest()),
+	                    Value::DOUBLE(std::numeric_limits<double>::max()));
 	result.emplace_back(LogicalType::DECIMAL(4, 1), "dec_4_1");
 	result.emplace_back(LogicalType::DECIMAL(9, 4), "dec_9_4");
 	result.emplace_back(LogicalType::DECIMAL(18, 6), "dec_18_6");
 	result.emplace_back(LogicalType::DECIMAL(38, 10), "dec38_10");
 	result.emplace_back(LogicalType::UUID, "uuid");
-	// interval
+	// Interval.
 	interval_t min_interval;
 	min_interval.months = 0;
 	min_interval.days = 0;
@@ -79,14 +91,15 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
 	max_interval.micros = 999999999;
 	result.emplace_back(LogicalType::INTERVAL, "interval", Value::INTERVAL(min_interval),
 	                    Value::INTERVAL(max_interval));
-	// strings/blobs/bitstrings
+	// VARCHAR / BLOB / Bitstrings.
 	result.emplace_back(LogicalType::VARCHAR, "varchar", Value("🦆🦆🦆🦆🦆🦆"),
 	                    Value(string("goo\x00se", 6)));
 	result.emplace_back(LogicalType::BLOB, "blob", Value::BLOB("thisisalongblob\\x00withnullbytes"),
 	                    Value::BLOB("\\x00\\x00\\x00a"));
 	result.emplace_back(LogicalType::BIT, "bit", Value::BIT("0010001001011100010101011010111"), Value::BIT("10101"));
-	// enums
+	// ENUMs.
 	Vector small_enum(LogicalType::VARCHAR, 2);
 	auto small_enum_ptr = FlatVector::GetData<string_t>(small_enum);
 	small_enum_ptr[0] = StringVector::AddStringOrBlob(small_enum, "DUCK_DUCK_ENUM");
@@ -116,7 +129,7 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
 		result.emplace_back(LogicalType::ENUM(large_enum, 2), "large_enum");
 	}
-	// arrays
+	// ARRAYs.
 	auto int_list_type = LogicalType::LIST(LogicalType::INTEGER);
 	auto empty_int_list = Value::LIST(LogicalType::INTEGER, vector<Value>());
 	auto int_list =

package/src/duckdb/src/function/table/table_scan.cpp CHANGED Viewed

@@ -54,6 +54,7 @@ struct IndexScanLocalState : public LocalTableFunctionState {
 	TableScanState scan_state;
 	//! The column IDs of the local storage scan.
 	vector<StorageIndex> column_ids;
+	bool in_charge_of_final_stretch {false};
 };
 static StorageIndex TransformStorageIndex(const ColumnIndex &column_id) {
@@ -114,7 +115,7 @@ class DuckIndexScanState : public TableScanGlobalState {
 public:
 	DuckIndexScanState(ClientContext &context, const FunctionData *bind_data_p)
 	    : TableScanGlobalState(context, bind_data_p), next_batch_index(0), arena(Allocator::Get(context)),
-	      row_ids(nullptr), row_id_count(0), finished(false) {
+	      row_ids(nullptr), row_id_count(0), finished_first_phase(false), started_last_phase(false) {
 	}
 	//! The batch index of the next Sink.
@@ -129,7 +130,8 @@ public:
 	//! The column IDs of the to-be-scanned columns.
 	vector<StorageIndex> column_ids;
 	//! True, if no more row IDs must be scanned.
-	bool finished;
+	bool finished_first_phase;
+	bool started_last_phase;
 	//! Synchronize changes to the global index scan state.
 	mutex index_scan_lock;
@@ -163,44 +165,75 @@ public:
 		auto &storage = duck_table.GetStorage();
 		auto &l_state = data_p.local_state->Cast<IndexScanLocalState>();
-		idx_t scan_count = 0;
-		idx_t offset = 0;
-		{
-			// Synchronize changes to the shared global state.
-			lock_guard<mutex> l(index_scan_lock);
-			if (!finished) {
-				l_state.batch_index = next_batch_index;
-				next_batch_index++;
-				offset = l_state.batch_index * STANDARD_VECTOR_SIZE;
-				auto remaining = row_id_count - offset;
-				scan_count = remaining < STANDARD_VECTOR_SIZE ? remaining : STANDARD_VECTOR_SIZE;
-				finished = remaining < STANDARD_VECTOR_SIZE ? true : false;
+		enum class ExecutionPhase { NONE = 0, STORAGE = 1, LOCAL_STORAGE = 2 };
+		// We might need to loop back, so while (true)
+		while (true) {
+			idx_t scan_count = 0;
+			idx_t offset = 0;
+			// Phase selection
+			auto phase_to_be_performed = ExecutionPhase::NONE;
+			{
+				// Synchronize changes to the shared global state.
+				lock_guard<mutex> l(index_scan_lock);
+				if (!finished_first_phase) {
+					l_state.batch_index = next_batch_index;
+					next_batch_index++;
+					offset = l_state.batch_index * STANDARD_VECTOR_SIZE;
+					auto remaining = row_id_count - offset;
+					scan_count = remaining <= STANDARD_VECTOR_SIZE ? remaining : STANDARD_VECTOR_SIZE;
+					finished_first_phase = remaining <= STANDARD_VECTOR_SIZE ? true : false;
+					phase_to_be_performed = ExecutionPhase::STORAGE;
+				} else if (!started_last_phase) {
+					// First thread to get last phase, great, set l_state's in_charge_of_final_stretch, so same thread
+					// will be on again
+					started_last_phase = true;
+					l_state.in_charge_of_final_stretch = true;
+					phase_to_be_performed = ExecutionPhase::LOCAL_STORAGE;
+				} else if (l_state.in_charge_of_final_stretch) {
+					phase_to_be_performed = ExecutionPhase::LOCAL_STORAGE;
+				}
 			}
-		}
-		if (scan_count != 0) {
-			auto row_id_data = reinterpret_cast<data_ptr_t>(row_ids + offset);
-			Vector local_vector(LogicalType::ROW_TYPE, row_id_data);
-			if (CanRemoveFilterColumns()) {
-				l_state.all_columns.Reset();
-				storage.Fetch(tx, l_state.all_columns, column_ids, local_vector, scan_count, l_state.fetch_state);
-				output.ReferenceColumns(l_state.all_columns, projection_ids);
-			} else {
-				storage.Fetch(tx, output, column_ids, local_vector, scan_count, l_state.fetch_state);
+			switch (phase_to_be_performed) {
+			case ExecutionPhase::NONE: {
+				// No work to be picked up
+				return;
+			}
+			case ExecutionPhase::STORAGE: {
+				// Scan (in parallel) storage
+				auto row_id_data = reinterpret_cast<data_ptr_t>(row_ids + offset);
+				Vector local_vector(LogicalType::ROW_TYPE, row_id_data);
+				if (CanRemoveFilterColumns()) {
+					l_state.all_columns.Reset();
+					storage.Fetch(tx, l_state.all_columns, column_ids, local_vector, scan_count, l_state.fetch_state);
+					output.ReferenceColumns(l_state.all_columns, projection_ids);
+				} else {
+					storage.Fetch(tx, output, column_ids, local_vector, scan_count, l_state.fetch_state);
+				}
+				if (output.size() == 0) {
+					// output is empty, loop back, since there might be results to be picked up from LOCAL_STORAGE phase
+					continue;
+				}
+				return;
+			}
+			case ExecutionPhase::LOCAL_STORAGE: {
+				// Scan (sequentially, always same logical thread) local_storage
+				auto &local_storage = LocalStorage::Get(tx);
+				{
+					if (CanRemoveFilterColumns()) {
+						l_state.all_columns.Reset();
+						local_storage.Scan(l_state.scan_state.local_state, column_ids, l_state.all_columns);
+						output.ReferenceColumns(l_state.all_columns, projection_ids);
+					} else {
+						local_storage.Scan(l_state.scan_state.local_state, column_ids, output);
+					}
+				}
+				return;
 			}
-		}
-		if (output.size() == 0) {
-			auto &local_storage = LocalStorage::Get(tx);
-			if (CanRemoveFilterColumns()) {
-				l_state.all_columns.Reset();
-				local_storage.Scan(l_state.scan_state.local_state, column_ids, l_state.all_columns);
-				output.ReferenceColumns(l_state.all_columns, projection_ids);
-			} else {
-				local_storage.Scan(l_state.scan_state.local_state, column_ids, output);
 			}
 		}
 	}
@@ -350,7 +383,8 @@ unique_ptr<GlobalTableFunctionState> DuckTableScanInitGlobal(ClientContext &cont
 unique_ptr<GlobalTableFunctionState> DuckIndexScanInitGlobal(ClientContext &context, TableFunctionInitInput &input,
                                                              const TableScanBindData &bind_data, set<row_t> &row_ids) {
 	auto g_state = make_uniq<DuckIndexScanState>(context, input.bind_data.get());
-	g_state->finished = row_ids.empty() ? true : false;
+	g_state->finished_first_phase = row_ids.empty() ? true : false;
+	g_state->started_last_phase = false;
 	if (!row_ids.empty()) {
 		auto row_id_ptr = g_state->arena.AllocateAligned(row_ids.size() * sizeof(row_t));

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "2"
+#define DUCKDB_PATCH_VERSION "3"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 4
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.4.2"
+#define DUCKDB_VERSION "v1.4.3"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "68d7555f68"
+#define DUCKDB_SOURCE_ID "d1dc88f950"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/function/table_function.cpp CHANGED Viewed

@@ -37,6 +37,30 @@ TableFunction::TableFunction(const vector<LogicalType> &arguments, table_functio
 TableFunction::TableFunction() : TableFunction("", {}, nullptr, nullptr, nullptr, nullptr) {
 }
+bool TableFunction::operator==(const TableFunction &rhs) const {
+	return name == rhs.name && arguments == rhs.arguments && varargs == rhs.varargs && bind == rhs.bind &&
+	       bind_replace == rhs.bind_replace && bind_operator == rhs.bind_operator && init_global == rhs.init_global &&
+	       init_local == rhs.init_local && function == rhs.function && in_out_function == rhs.in_out_function &&
+	       in_out_function_final == rhs.in_out_function_final && statistics == rhs.statistics &&
+	       dependency == rhs.dependency && cardinality == rhs.cardinality &&
+	       pushdown_complex_filter == rhs.pushdown_complex_filter && pushdown_expression == rhs.pushdown_expression &&
+	       to_string == rhs.to_string && dynamic_to_string == rhs.dynamic_to_string &&
+	       table_scan_progress == rhs.table_scan_progress && get_partition_data == rhs.get_partition_data &&
+	       get_bind_info == rhs.get_bind_info && type_pushdown == rhs.type_pushdown &&
+	       get_multi_file_reader == rhs.get_multi_file_reader && supports_pushdown_type == rhs.supports_pushdown_type &&
+	       get_partition_info == rhs.get_partition_info && get_partition_stats == rhs.get_partition_stats &&
+	       get_virtual_columns == rhs.get_virtual_columns && get_row_id_columns == rhs.get_row_id_columns &&
+	       serialize == rhs.serialize && deserialize == rhs.deserialize &&
+	       verify_serialization == rhs.verify_serialization && projection_pushdown == rhs.projection_pushdown &&
+	       filter_pushdown == rhs.filter_pushdown && filter_prune == rhs.filter_prune &&
+	       sampling_pushdown == rhs.sampling_pushdown && late_materialization == rhs.late_materialization &&
+	       global_initialization == rhs.global_initialization;
+}
+bool TableFunction::operator!=(const TableFunction &rhs) const {
+	return !(*this == rhs);
+}
 bool TableFunction::Equal(const TableFunction &rhs) const {
 	// number of types
 	if (this->arguments.size() != rhs.arguments.size()) {

package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp CHANGED Viewed

@@ -76,6 +76,7 @@ public:
 	static constexpr idx_t DERIVED_KEY_LENGTH = 32;
 private:
+	mutable mutex lock;
 	std::unordered_map<std::string, EncryptionKey> derived_keys;
 };

package/src/duckdb/src/include/duckdb/common/limits.hpp CHANGED Viewed

@@ -24,10 +24,12 @@ namespace duckdb {
 template <class T>
 struct NumericLimits {
 	static constexpr T Minimum() {
-		return std::numeric_limits<T>::lowest();
+		return std::numeric_limits<T>::has_infinity ? -std::numeric_limits<T>::infinity()
+		                                            : std::numeric_limits<T>::lowest();
 	}
 	static constexpr T Maximum() {
-		return std::numeric_limits<T>::max();
+		return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity()
+		                                            : std::numeric_limits<T>::max();
 	}
 	static constexpr bool IsSigned() {
 		return std::is_signed<T>::value;

package/src/duckdb/src/include/duckdb/common/local_file_system.hpp CHANGED Viewed

@@ -38,6 +38,8 @@ public:
 	int64_t GetFileSize(FileHandle &handle) override;
 	//! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
 	timestamp_t GetLastModifiedTime(FileHandle &handle) override;
+	//! Returns a tag that uniquely identifies the version of the file
+	string GetVersionTag(FileHandle &handle) override;
 	//! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
 	FileType GetFileType(FileHandle &handle) override;
 	//! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of

package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp CHANGED Viewed

@@ -31,6 +31,8 @@ template <class BLOCK_ITERATOR_STATE>
 class BlockIteratorStateBase {
 protected:
 	friend BLOCK_ITERATOR_STATE;
+private:
 	explicit BlockIteratorStateBase(const idx_t tuple_count_p) : tuple_count(tuple_count_p) {
 	}

package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp CHANGED Viewed

@@ -256,6 +256,8 @@ public:
 				if (parent.get().GetType() == NType::PREFIX) {
 					// We might have to compress:
 					// PREFIX (greatgrandparent) - Node4 (grandparent) - PREFIX - INLINED_LEAF.
+					// The parent does not have to be passed in, as it is a child of the possibly being compressed N4.
+					// Then, when we delete that child, we also free it.
 					Node::DeleteChild(art, grandparent, greatgrandparent, current_key.get()[grandparent_depth], status,
 					                  row_id);
 					return;

package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp CHANGED Viewed

@@ -48,7 +48,7 @@ public:
 	//! Concatenates parent -> prev_node4 -> child.
 	static void Concat(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
-	                   const GateStatus node4_status);
+	                   const GateStatus node4_status, const GateStatus status);
 	//! Removes up to pos bytes from the prefix.
 	//! Shifts all subsequent bytes by pos. Frees empty nodes.
@@ -72,7 +72,7 @@ private:
 	static Prefix GetTail(ART &art, const Node &node);
 	static void ConcatInternal(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
-	                           const bool inside_gate);
+	                           const GateStatus status);
 	static void ConcatNode4WasGate(ART &art, Node &node4, const Node child, uint8_t byte);
 	static void ConcatChildIsGate(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte);
 	static void ConcatOutsideGate(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte);

package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp CHANGED Viewed

@@ -169,7 +169,7 @@ public:
 	//! Replay index insert and delete operations buffered during WAL replay.
 	//! table_types has the physical types of the table in the order they appear, not logical (no generated columns).
 	//! mapped_column_ids contains the sorted order of Indexed physical column ID's (see unbound_index.hpp comments).
-	void ApplyBufferedReplays(const vector<LogicalType> &table_types, vector<BufferedIndexData> &buffered_replays,
+	void ApplyBufferedReplays(const vector<LogicalType> &table_types, BufferedIndexReplays &buffered_replays,
 	                          const vector<StorageIndex> &mapped_column_ids);
 protected: