npm - duckdb - Versions diffs - 1.3.1-dev6.0 → 1.3.2-dev0.0 - Mend

duckdb 1.3.1-dev6.0 → 1.3.2-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp CHANGED Viewed

@@ -173,13 +173,13 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
 }
 // Returns true if a comment is acceptable
-bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool comment_set_by_user) {
-	if (comment_set_by_user) {
+bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, const CSVReaderOptions &options) {
+	if (options.dialect_options.state_machine_options.comment.IsSetByUser()) {
 		return true;
 	}
 	// For a comment to be acceptable, we want 3/5th's the majority of unmatched in the columns
 	constexpr double min_majority = 0.6;
-	// detected comments, are all lines that started with a comment character.
+	// detected comments are all lines that started with a comment character.
 	double detected_comments = 0;
 	// If at least one comment is a full line comment
 	bool has_full_line_comment = false;
@@ -192,7 +192,9 @@ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool
 				has_full_line_comment = true;
 				valid_comments++;
 			}
-			if (result.column_counts[i].number_of_columns == num_cols && result.column_counts[i].is_mid_comment) {
+			if ((result.column_counts[i].number_of_columns == num_cols ||
+			     (result.column_counts[i].number_of_columns <= num_cols && options.null_padding)) &&
+			    result.column_counts[i].is_mid_comment) {
 				valid_comments++;
 			}
 		}
@@ -212,13 +214,13 @@ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool
 	return valid_comments / detected_comments >= min_majority;
 }
-void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner, idx_t &rows_read,
-                                         idx_t &best_consistent_rows, idx_t &prev_padding_count,
-                                         idx_t &min_ignored_rows) {
+void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner, CandidateStats &stats,
+                                         vector<unique_ptr<ColumnCountScanner>> &successful_candidates) {
 	// The sniffed_column_counts variable keeps track of the number of columns found for each row
 	auto &sniffed_column_counts = scanner->ParseChunk();
 	idx_t dirty_notes = 0;
 	idx_t dirty_notes_minus_comments = 0;
+	idx_t empty_lines = 0;
 	if (sniffed_column_counts.error) {
 		if (!scanner->error_handler->HasError(MAXIMUM_LINE_SIZE)) {
 			all_fail_max_line_size = false;
@@ -232,7 +234,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	idx_t consistent_rows = 0;
 	idx_t num_cols = sniffed_column_counts.result_position == 0 ? 1 : sniffed_column_counts[0].number_of_columns;
 	const bool ignore_errors = options.ignore_errors.GetValue();
-	// If we are ignoring errors and not null_padding , we pick the most frequent number of columns as the right one
+	// If we are ignoring errors and not null_padding, we pick the most frequent number of columns as the right one
 	const bool use_most_frequent_columns = ignore_errors && !options.null_padding;
 	if (use_most_frequent_columns) {
 		num_cols = sniffed_column_counts.GetMostFrequentColumnCount();
@@ -242,8 +244,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	idx_t ignored_rows = 0;
 	const bool allow_padding = options.null_padding;
 	bool first_valid = false;
-	if (sniffed_column_counts.result_position > rows_read) {
-		rows_read = sniffed_column_counts.result_position;
+	if (sniffed_column_counts.result_position > stats.rows_read) {
+		stats.rows_read = sniffed_column_counts.result_position;
 	}
 	if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, ignore_errors,
 	                                        sniffed_column_counts[0].last_value_always_empty)) {
@@ -279,9 +281,10 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 				sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
 			}
 			padding_count = 0;
-			// we use the maximum amount of num_cols that we find
+			// we use the maximum number of num_cols that we find
 			num_cols = sniffed_column_counts[row].number_of_columns;
-			dirty_notes = row;
+			dirty_notes = row + sniffed_column_counts[row].empty_lines;
+			empty_lines = sniffed_column_counts[row].empty_lines;
 			dirty_notes_minus_comments = dirty_notes - comment_rows;
 			header_idx = row;
 			consistent_rows = 1;
@@ -289,7 +292,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 			if (!first_valid) {
 				first_valid = true;
 				sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
-				dirty_notes = row;
+				dirty_notes = row + sniffed_column_counts[row].empty_lines;
+				empty_lines = sniffed_column_counts[row].empty_lines;
 				dirty_notes_minus_comments = dirty_notes - comment_rows;
 				num_cols = sniffed_column_counts[row].number_of_columns;
 			}
@@ -311,24 +315,26 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	consistent_rows += padding_count;
 	// Whether there are more values (rows) available that are consistent, exceeding the current best.
-	const bool more_values = consistent_rows > best_consistent_rows && num_cols >= max_columns_found;
+	const bool more_values = consistent_rows > stats.best_consistent_rows && num_cols >= max_columns_found;
-	const bool more_columns = consistent_rows == best_consistent_rows && num_cols > max_columns_found;
+	const bool more_columns = consistent_rows == stats.best_consistent_rows && num_cols > max_columns_found;
 	// If additional padding is required when compared to the previous padding count.
-	const bool require_more_padding = padding_count > prev_padding_count;
+	const bool require_more_padding = padding_count > stats.prev_padding_count;
 	// If less padding is now required when compared to the previous padding count.
-	const bool require_less_padding = padding_count < prev_padding_count;
+	const bool require_less_padding = padding_count < stats.prev_padding_count;
 	// If there was only a single column before, and the new number of columns exceeds that.
-	const bool single_column_before = max_columns_found < 2 && num_cols > max_columns_found * candidates.size();
+	const bool single_column_before =
+	    max_columns_found < 2 && num_cols > max_columns_found * successful_candidates.size();
 	// If the number of rows is consistent with the calculated value after accounting for skipped rows and the
 	// start row.
-	const bool rows_consistent =
-	    consistent_rows + (dirty_notes_minus_comments - options.dialect_options.skip_rows.GetValue()) + comment_rows ==
-	    sniffed_column_counts.result_position - options.dialect_options.skip_rows.GetValue();
+	const bool rows_consistent = consistent_rows +
+	                                 (dirty_notes_minus_comments - options.dialect_options.skip_rows.GetValue()) +
+	                                 comment_rows - empty_lines ==
+	                             sniffed_column_counts.result_position - options.dialect_options.skip_rows.GetValue();
 	// If there are more than one consistent row.
 	const bool more_than_one_row = consistent_rows > 1;
@@ -336,14 +342,14 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	const bool more_than_one_column = num_cols > 1;
 	// If the start position is valid.
-	const bool start_good = !candidates.empty() &&
-	                        dirty_notes <= candidates.front()->GetStateMachine().dialect_options.skip_rows.GetValue();
+	const bool start_good =
+	    !successful_candidates.empty() &&
+	    dirty_notes <= successful_candidates.front()->GetStateMachine().dialect_options.skip_rows.GetValue();
 	// If padding happened but it is not allowed.
 	const bool invalid_padding = !allow_padding && padding_count > 0;
-	const bool comments_are_acceptable = AreCommentsAcceptable(
-	    sniffed_column_counts, num_cols, options.dialect_options.state_machine_options.comment.IsSetByUser());
+	const bool comments_are_acceptable = AreCommentsAcceptable(sniffed_column_counts, num_cols, options);
 	const bool quoted =
 	    scanner->ever_quoted &&
@@ -360,44 +366,44 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 	// If rows are consistent and no invalid padding happens, this is the best suitable candidate if one of the
 	// following is valid:
 	// - There's a single column before.
-	// - There are more values and no additional padding is required.
+	// - There are more values, and no additional padding is required.
 	// - There's more than one column and less padding is required.
 	if (columns_match_set && (rows_consistent || (set_columns.IsSet() && ignore_errors)) &&
 	    (single_column_before || ((more_values || more_columns) && !require_more_padding) ||
-	     (more_than_one_column && require_less_padding) || quoted) &&
+	     (more_than_one_column && require_less_padding) || (quoted && comment_rows == 0)) &&
 	    !invalid_padding && comments_are_acceptable) {
-		if (!candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size() &&
-		    consistent_rows <= best_consistent_rows) {
+		if (!successful_candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size() &&
+		    consistent_rows <= stats.best_consistent_rows) {
 			// We have a candidate that fits our requirements better
-			if (candidates.front()->ever_quoted || !scanner->ever_quoted) {
+			if (successful_candidates.front()->ever_quoted || !scanner->ever_quoted) {
 				return;
 			}
 		}
 		auto &sniffing_state_machine = scanner->GetStateMachine();
-		if (!candidates.empty() && candidates.front()->ever_quoted) {
+		if (!successful_candidates.empty() && successful_candidates.front()->ever_quoted) {
 			// Give preference to quoted boys.
 			if (!scanner->ever_quoted) {
 				return;
 			} else {
 				// Give preference to one that got escaped
-				if (!scanner->ever_escaped && candidates.front()->ever_escaped &&
+				if (!scanner->ever_escaped && successful_candidates.front()->ever_escaped &&
 				    sniffing_state_machine.dialect_options.state_machine_options.strict_mode.GetValue()) {
 					return;
 				}
-				if (best_consistent_rows == consistent_rows && num_cols >= max_columns_found) {
+				if (stats.best_consistent_rows == consistent_rows && num_cols >= max_columns_found) {
 					// If both have not been escaped, this might get solved later on.
 					sniffing_state_machine.dialect_options.num_cols = num_cols;
-					candidates.emplace_back(std::move(scanner));
+					successful_candidates.emplace_back(std::move(scanner));
 					max_columns_found = num_cols;
 					return;
 				}
 			}
 		}
-		if (max_columns_found == num_cols && (ignored_rows > min_ignored_rows)) {
+		if (max_columns_found == num_cols && (ignored_rows > stats.min_ignored_rows)) {
 			return;
 		}
-		if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < best_consistent_rows / 2 &&
+		if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < stats.best_consistent_rows / 2 &&
 		    (options.null_padding || ignore_errors)) {
 			// When null_padding is true, we only give preference to a max number of columns if null padding is at least
 			// 50% as consistent as the best case scenario
@@ -406,39 +412,40 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 		if (quoted && num_cols < max_columns_found) {
 			if (scanner->ever_escaped &&
 			    sniffing_state_machine.dialect_options.state_machine_options.strict_mode.GetValue()) {
-				for (auto &candidate : candidates) {
+				for (auto &candidate : successful_candidates) {
 					if (candidate->ever_quoted && candidate->ever_escaped) {
 						return;
 					}
 				}
 			} else {
-				for (auto &candidate : candidates) {
+				for (auto &candidate : successful_candidates) {
 					if (candidate->ever_quoted) {
 						return;
 					}
 				}
 			}
 		}
-		best_consistent_rows = consistent_rows;
+		stats.best_consistent_rows = consistent_rows;
 		max_columns_found = num_cols;
-		prev_padding_count = padding_count;
-		min_ignored_rows = ignored_rows;
+		stats.prev_padding_count = padding_count;
+		stats.min_ignored_rows = ignored_rows;
 		if (options.dialect_options.skip_rows.IsSetByUser()) {
-			// If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
+			// If skip rows are set by the user, and we found dirty notes, we only accept it if either null_padding or
 			// ignore_errors is set we have comments
-			if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue() && comment_rows == 0) {
+			if (dirty_notes - empty_lines != 0 && !options.null_padding && !options.ignore_errors.GetValue() &&
+			    comment_rows == 0) {
 				return;
 			}
 			sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
 		} else if (!options.null_padding) {
 			sniffing_state_machine.dialect_options.skip_rows = dirty_notes_minus_comments;
 		}
-		candidates.clear();
+		successful_candidates.clear();
 		sniffing_state_machine.dialect_options.num_cols = num_cols;
 		lines_sniffed = sniffed_column_counts.result_position;
-		candidates.emplace_back(std::move(scanner));
+		successful_candidates.emplace_back(std::move(scanner));
 		return;
 	}
 	// If there's more than one row and column, the start is good, rows are consistent,
@@ -449,7 +456,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 		auto &sniffing_state_machine = scanner->GetStateMachine();
 		if (options.dialect_options.skip_rows.IsSetByUser()) {
-			// If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
+			// If skip rows are set by the user, and we found dirty notes, we only accept it if either null_padding or
 			// ignore_errors is set
 			if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue()) {
 				return;
@@ -460,7 +467,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
 		}
 		sniffing_state_machine.dialect_options.num_cols = num_cols;
 		lines_sniffed = sniffed_column_counts.result_position;
-		candidates.emplace_back(std::move(scanner));
+		successful_candidates.emplace_back(std::move(scanner));
 	}
 }
@@ -481,8 +488,8 @@ bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) const {
 }
 void CSVSniffer::RefineCandidates() {
-	// It's very frequent that more than one dialect can parse a csv file, hence here we run one state machine
-	// fully on the whole sample dataset, when/if it fails we go to the next one.
+	// It's very frequent that more than one dialect can parse a csv file; hence here we run one state machine
+	// fully on the whole sample dataset, when/if it fails, we go to the next one.
 	if (candidates.empty()) {
 		// No candidates to refine
 		return;
@@ -587,22 +594,14 @@ NewLineIdentifier CSVSniffer::DetectNewLineDelimiter(CSVBufferManager &buffer_ma
 void CSVSniffer::DetectDialect() {
 	// Variables for Dialect Detection
 	DialectCandidates dialect_candidates(options.dialect_options.state_machine_options);
-	// Number of rows read
-	idx_t rows_read = 0;
-	// Best Number of consistent rows (i.e., presenting all columns)
-	idx_t best_consistent_rows = 0;
-	// If padding was necessary (i.e., rows are missing some columns, how many)
-	idx_t prev_padding_count = 0;
-	// Min number of ignores rows
-	idx_t best_ignored_rows = 0;
+	CandidateStats stats;
 	// Vector of CSV State Machines
 	vector<unique_ptr<ColumnCountScanner>> csv_state_machines;
 	// Step 1: Generate state machines
 	GenerateStateMachineSearchSpace(csv_state_machines, dialect_candidates);
 	// Step 2: Analyze all candidates on the first chunk
 	for (auto &state_machine : csv_state_machines) {
-		AnalyzeDialectCandidate(std::move(state_machine), rows_read, best_consistent_rows, prev_padding_count,
-		                        best_ignored_rows);
+		AnalyzeDialectCandidate(std::move(state_machine), stats, candidates);
 	}
 	// Step 3: Loop over candidates and find if they can still produce good results for the remaining chunks
 	RefineCandidates();

package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp CHANGED Viewed

@@ -17,14 +17,11 @@
 #include "duckdb/parallel/thread_context.hpp"
 #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
 #include "duckdb/planner/expression/bound_reference_expression.hpp"
-#include "duckdb/planner/filter/conjunction_filter.hpp"
 #include "duckdb/planner/filter/constant_filter.hpp"
 #include "duckdb/planner/filter/in_filter.hpp"
-#include "duckdb/planner/filter/null_filter.hpp"
 #include "duckdb/planner/filter/optional_filter.hpp"
 #include "duckdb/planner/table_filter.hpp"
 #include "duckdb/storage/buffer_manager.hpp"
-#include "duckdb/storage/storage_manager.hpp"
 #include "duckdb/storage/temporary_memory_manager.hpp"
 namespace duckdb {
@@ -249,7 +246,7 @@ public:
 };
 unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
-	auto result = make_uniq<JoinHashTable>(context, conditions, payload_columns.col_types, join_type,
+	auto result = make_uniq<JoinHashTable>(context, *this, conditions, payload_columns.col_types, join_type,
 	                                       rhs_output_columns.col_idxs);
 	if (!delim_types.empty() && join_type == JoinType::MARK) {
 		// correlated MARK join
@@ -779,7 +776,9 @@ unique_ptr<DataChunk> JoinFilterPushdownInfo::Finalize(ClientContext &context, o
 				continue;
 			}
 			// if the HT is small we can generate a complete "OR" filter
-			if (ht && ht->Count() > 1 && ht->Count() <= dynamic_or_filter_threshold) {
+			// but only if the join condition is equality.
+			if (ht && ht->Count() > 1 && ht->Count() <= dynamic_or_filter_threshold &&
+			    cmp == ExpressionType::COMPARE_EQUAL) {
 				PushInFilter(info, *ht, op, filter_idx, filter_col_idx);
 			}
@@ -852,6 +851,8 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
 			sink.external = false;
 		}
 	}
+	DUCKDB_LOG(context, PhysicalOperatorLogType, *this, "PhysicalHashJoin", "Finalize",
+	           {{"external", to_string(sink.external)}});
 	if (sink.external) {
 		// External Hash Join
 		sink.perfect_join_executor.reset();
@@ -862,8 +863,12 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
 		if (!very_very_skewed &&
 		    (max_partition_ht_size + sink.probe_side_requirement) > sink.temporary_memory_state->GetReservation()) {
 			// We have to repartition
+			const auto radix_bits_before = ht.GetRadixBits();
 			ht.SetRepartitionRadixBits(sink.temporary_memory_state->GetReservation(), sink.max_partition_size,
 			                           sink.max_partition_count);
+			DUCKDB_LOG(context, PhysicalOperatorLogType, *this, "PhysicalHashJoin", "Repartition",
+			           {{"partitions_before", to_string(RadixPartitioning::NumberOfPartitions(radix_bits_before))},
+			            {"partitions_after", to_string(RadixPartitioning::NumberOfPartitions(ht.GetRadixBits()))}});
 			auto new_event = make_shared_ptr<HashJoinRepartitionEvent>(pipeline, *this, sink, sink.local_hash_tables);
 			event.InsertEvent(std::move(new_event));
 		} else {

package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp CHANGED Viewed

@@ -112,12 +112,16 @@ public:
 		}
 		// initialize writing to the file
 		global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path);
+		if (op.function.initialize_operator) {
+			op.function.initialize_operator(*global_state, op);
+		}
 		if (op.return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS) {
 			written_file_info = make_uniq<CopyToFileInfo>(op.file_path);
 			written_file_info->file_stats = make_uniq<CopyFunctionFileStatistics>();
 			op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state,
 			                                           *written_file_info->file_stats);
 		}
+		initialized = true;
 	}
 	void AddBatchData(idx_t batch_index, unique_ptr<PreparedBatchData> new_batch, idx_t memory_usage) {

package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp CHANGED Viewed

@@ -52,6 +52,7 @@ public:
 	      file_write_lock_if_rotating(make_uniq<StorageLock>()) {
 		max_open_files = ClientConfig::GetConfig(context).partitioned_write_max_open_files;
 	}
 	StorageLock lock;
 	atomic<bool> initialized;
 	atomic<idx_t> rows_copied;
@@ -78,6 +79,9 @@ public:
 		}
 		// initialize writing to the file
 		global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path);
+		if (op.function.initialize_operator) {
+			op.function.initialize_operator(*global_state, op);
+		}
 		auto written_file_info = AddFile(*write_lock, op.file_path, op.return_type);
 		if (written_file_info) {
 			op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state,
@@ -217,6 +221,9 @@ public:
 			written_file_info->partition_keys = Value::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR,
 			                                               std::move(partition_keys), std::move(partition_values));
 		}
+		if (op.function.initialize_operator) {
+			op.function.initialize_operator(*info->global_state, op);
+		}
 		auto &result = *info;
 		info->active_writes = 1;
 		// store in active write map
@@ -353,6 +360,9 @@ unique_ptr<GlobalFunctionData> PhysicalCopyToFile::CreateFileState(ClientContext
 	if (written_file_info) {
 		function.copy_to_get_written_statistics(context, *bind_data, *result, *written_file_info->file_stats);
 	}
+	if (function.initialize_operator) {
+		function.initialize_operator(*result, *this);
+	}
 	return result;
 }
@@ -408,12 +418,9 @@ void CheckDirectory(FileSystem &fs, const string &file_path, CopyOverwriteMode o
 unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext &context) const {
 	if (partition_output || per_thread_output || rotate) {
 		auto &fs = FileSystem::GetFileSystem(context);
-		if (fs.FileExists(file_path)) {
-			// the target file exists AND is a file (not a directory)
-			if (fs.IsRemoteFile(file_path)) {
-				// for remote files we cannot do anything - as we cannot delete the file
-				throw IOException("Cannot write to \"%s\" - it exists and is a file, not a directory!", file_path);
-			} else {
+		if (!fs.IsRemoteFile(file_path)) {
+			if (fs.FileExists(file_path)) {
+				// the target file exists AND is a file (not a directory)
 				// for local files we can remove the file if OVERWRITE_OR_IGNORE is enabled
 				if (overwrite_mode == CopyOverwriteMode::COPY_OVERWRITE) {
 					fs.RemoveFile(file_path);
@@ -432,7 +439,7 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
 		}
 		auto state = make_uniq<CopyToFunctionGlobalState>(context);
-		if (!per_thread_output && rotate) {
+		if (!per_thread_output && rotate && write_empty_file) {
 			auto global_lock = state->lock.GetExclusiveLock();
 			state->global_state = CreateFileState(context, *state, *global_lock);
 		}
@@ -490,6 +497,9 @@ void PhysicalCopyToFile::WriteRotateInternal(ExecutionContext &context, GlobalSi
 	while (true) {
 		// Grab global lock and dereference the current file state (and corresponding lock)
 		auto global_guard = g.lock.GetExclusiveLock();
+		if (!g.global_state) {
+			g.global_state = CreateFileState(context.client, *sink_state, *global_guard);
+		}
 		auto &file_state = *g.global_state;
 		auto &file_lock = *g.file_write_lock_if_rotating;
 		if (rotate && function.rotate_next_file(file_state, *bind_data, file_size_bytes)) {
@@ -523,7 +533,7 @@ SinkResultType PhysicalCopyToFile::Sink(ExecutionContext &context, DataChunk &ch
 	auto &g = input.global_state.Cast<CopyToFunctionGlobalState>();
 	auto &l = input.local_state.Cast<CopyToFunctionLocalState>();
-	if (!write_empty_file) {
+	if (!write_empty_file && !rotate) {
 		// if we are only writing the file when there are rows to write we need to initialize here
 		g.Initialize(context.client, *this);
 	}

package/src/duckdb/src/execution/operator/persistent/physical_export.cpp CHANGED Viewed

@@ -19,7 +19,7 @@ namespace duckdb {
 void ReorderTableEntries(catalog_entry_vector_t &tables);
-using std::stringstream;
+using duckdb::stringstream;
 PhysicalExport::PhysicalExport(vector<LogicalType> types, CopyFunction function, unique_ptr<CopyInfo> info,
                                idx_t estimated_cardinality, unique_ptr<BoundExportData> exported_tables)

package/src/duckdb/src/execution/operator/schema/physical_attach.cpp CHANGED Viewed

@@ -72,6 +72,7 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
 	if (!options.default_table.name.empty()) {
 		attached_db->GetCatalog().SetDefaultTable(options.default_table.schema, options.default_table.name);
 	}
+	attached_db->FinalizeLoad(context.client);
 	return SourceResultType::FINISHED;
 }

package/src/duckdb/src/execution/physical_plan_generator.cpp CHANGED Viewed

@@ -28,17 +28,17 @@ unique_ptr<PhysicalPlan> PhysicalPlanGenerator::Plan(unique_ptr<LogicalOperator>
 PhysicalOperator &PhysicalPlanGenerator::ResolveAndPlan(unique_ptr<LogicalOperator> op) {
 	auto &profiler = QueryProfiler::Get(context);
+	// Resolve the types of each operator.
+	profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES);
+	op->ResolveOperatorTypes();
+	profiler.EndPhase();
 	// Resolve the column references.
 	profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_COLUMN_BINDING);
 	ColumnBindingResolver resolver;
 	resolver.VisitOperator(*op);
 	profiler.EndPhase();
-	// Resolve the types of each operator.
-	profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES);
-	op->ResolveOperatorTypes();
-	profiler.EndPhase();
 	// Create the main physical plan.
 	profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_CREATE_PLAN);
 	physical_plan = PlanInternal(*op);

package/src/duckdb/src/function/cast/vector_cast_helpers.cpp CHANGED Viewed

@@ -124,7 +124,8 @@ static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t en
 	bool escaped = false;
 	bool quoted = false;
-	char quote_char;
+	// Satisfy GCC warning about uninitialized variable
+	char quote_char = '\0';
 	stack<char> scopes;
 	for (idx_t i = 0; i < length; i++) {
 		auto current_char = buf[start + i];

package/src/duckdb/src/function/function.cpp CHANGED Viewed

@@ -36,6 +36,10 @@ bool TableFunctionData::Equals(const FunctionData &other) const {
 	return false;
 }
+bool FunctionData::SupportStatementCache() const {
+	return true;
+}
 Function::Function(string name_p) : name(std::move(name_p)) {
 }
 Function::~Function() {

package/src/duckdb/src/function/scalar/operator/arithmetic.cpp CHANGED Viewed

@@ -882,9 +882,15 @@ ScalarFunctionSet OperatorMultiplyFun::GetFunctions() {
 	multiply.AddFunction(
 	    ScalarFunction({LogicalType::INTERVAL, LogicalType::DOUBLE}, LogicalType::INTERVAL,
 	                   ScalarFunction::BinaryFunction<interval_t, double, interval_t, MultiplyOperator>));
+	multiply.AddFunction(
+	    ScalarFunction({LogicalType::DOUBLE, LogicalType::INTERVAL}, LogicalType::INTERVAL,
+	                   ScalarFunction::BinaryFunction<double, interval_t, interval_t, MultiplyOperator>));
 	multiply.AddFunction(
 	    ScalarFunction({LogicalType::BIGINT, LogicalType::INTERVAL}, LogicalType::INTERVAL,
 	                   ScalarFunction::BinaryFunction<int64_t, interval_t, interval_t, MultiplyOperator>));
+	multiply.AddFunction(
+	    ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
+	                   ScalarFunction::BinaryFunction<interval_t, int64_t, interval_t, MultiplyOperator>));
 	for (auto &func : multiply.functions) {
 		ScalarFunction::SetReturnsError(func);
 	}

package/src/duckdb/src/function/scalar/struct/remap_struct.cpp CHANGED Viewed

@@ -84,6 +84,7 @@ static void RemapMap(Vector &input, Vector &default_vector, Vector &result, idx_
 	auto &result_key_vector = MapVector::GetKeys(result);
 	auto &result_value_vector = MapVector::GetValues(result);
 	auto list_size = ListVector::GetListSize(input);
+	ListVector::Reserve(result, list_size);
 	ListVector::SetListSize(result, list_size);
 	bool has_top_level_null = false;
@@ -136,6 +137,7 @@ static void RemapList(Vector &input, Vector &default_vector, Vector &result, idx
 	auto &input_vector = ListVector::GetEntry(input);
 	auto &result_vector = ListVector::GetEntry(result);
 	auto list_size = ListVector::GetListSize(input);
+	ListVector::Reserve(result, list_size);
 	ListVector::SetListSize(result, list_size);
 	bool has_top_level_null = false;
@@ -401,6 +403,9 @@ struct RemapEntry {
 			auto &child_types = StructType::GetChildTypes(default_type);
 			for (idx_t child_idx = 0; child_idx < child_types.size(); child_idx++) {
 				auto &child_default = child_types[child_idx];
+				if (!result_entry->second.child_remaps || !entry->second.child_map) {
+					throw BinderException("No child remaps found");
+				}
 				HandleDefault(child_idx, child_default.first, child_default.second, *entry->second.child_map,
 				              *result_entry->second.child_remaps);
 			}
@@ -542,6 +547,10 @@ static unique_ptr<FunctionData> RemapStructBind(ClientContext &context, ScalarFu
 		if (arg->return_type.id() == LogicalTypeId::UNKNOWN) {
 			throw ParameterNotResolvedException();
 		}
+		if (arg->return_type.id() == LogicalTypeId::SQLNULL && arg_idx == 2) {
+			// remap target can be NULL
+			continue;
+		}
 		if (!arg->return_type.IsNested()) {
 			throw BinderException("Struct remap can only remap nested types, not '%s'", arg->return_type.ToString());
 		} else if (arg->return_type.id() == LogicalTypeId::STRUCT && StructType::IsUnnamed(arg->return_type)) {
@@ -571,11 +580,11 @@ static unique_ptr<FunctionData> RemapStructBind(ClientContext &context, ScalarFu
 	auto target_map = RemapIndex::GetMap(to_type);
 	Value remap_val = ExpressionExecutor::EvaluateScalar(context, *arguments[2]);
-	auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type);
 	// (recursively) generate the remap entries
 	case_insensitive_map_t<RemapEntry> remap_map;
 	if (!remap_val.IsNull()) {
+		auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type);
 		auto &remap_values = StructValue::GetChildren(remap_val);
 		for (idx_t remap_idx = 0; remap_idx < remap_values.size(); remap_idx++) {
 			auto &remap_val = remap_values[remap_idx];

package/src/duckdb/src/function/table/copy_csv.cpp CHANGED Viewed

@@ -221,6 +221,7 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyFunctio
 	memset(bind_data->requires_quotes.get(), 0, sizeof(bool) * 256);
 	bind_data->requires_quotes['\n'] = true;
 	bind_data->requires_quotes['\r'] = true;
+	bind_data->requires_quotes['#'] = true;
 	bind_data->requires_quotes[NumericCast<idx_t>(
 	    bind_data->options.dialect_options.state_machine_options.delimiter.GetValue()[0])] = true;
 	bind_data->requires_quotes[NumericCast<idx_t>(

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "0"
+#define DUCKDB_PATCH_VERSION "1"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 3
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.3.0"
+#define DUCKDB_VERSION "v1.3.1"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "71c5c07cdd"
+#define DUCKDB_SOURCE_ID "2063dda3e6"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/catalog/catalog.hpp CHANGED Viewed

@@ -109,6 +109,7 @@ public:
 	}
 	virtual void Initialize(bool load_builtin) = 0;
 	virtual void Initialize(optional_ptr<ClientContext> context, bool load_builtin);
+	virtual void FinalizeLoad(optional_ptr<ClientContext> context);
 	bool IsSystemCatalog() const;
 	bool IsTemporaryCatalog() const;

package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp CHANGED Viewed

@@ -66,7 +66,7 @@ private:
 	unique_ptr<CatalogEntry> ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info);
 	unique_ptr<CatalogEntry> SetNotNull(ClientContext &context, SetNotNullInfo &info);
 	unique_ptr<CatalogEntry> DropNotNull(ClientContext &context, DropNotNullInfo &info);
-	unique_ptr<CatalogEntry> AddForeignKeyConstraint(optional_ptr<ClientContext> context, AlterForeignKeyInfo &info);
+	unique_ptr<CatalogEntry> AddForeignKeyConstraint(AlterForeignKeyInfo &info);
 	unique_ptr<CatalogEntry> DropForeignKeyConstraint(ClientContext &context, AlterForeignKeyInfo &info);
 	unique_ptr<CatalogEntry> SetColumnComment(ClientContext &context, SetColumnCommentInfo &info);
 	unique_ptr<CatalogEntry> AddConstraint(ClientContext &context, AddConstraintInfo &info);

package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp CHANGED Viewed

@@ -35,7 +35,7 @@ private:
 	static string WriteOptionallyQuoted(const string &input);
 };
-enum class CatalogSetPathType { SET_SCHEMA, SET_SCHEMAS };
+enum class CatalogSetPathType { SET_SCHEMA, SET_SCHEMAS, SET_DIRECTLY };
 //! The schema search path, in order by which entries are searched if no schema entry is provided
 class CatalogSearchPath {