npm - duckdb - Versions diffs - 0.7.2-dev3294.0 → 0.7.2-dev3353.0 - Mend

duckdb 0.7.2-dev3294.0 → 0.7.2-dev3353.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/binding.gyp CHANGED Viewed

@@ -237,18 +237,18 @@
                 "src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
                 "src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
                 "src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
-                "src/duckdb/extension/icu/./icu-makedate.cpp",
-                "src/duckdb/extension/icu/./icu-datepart.cpp",
-                "src/duckdb/extension/icu/./icu-timebucket.cpp",
-                "src/duckdb/extension/icu/./icu-list-range.cpp",
-                "src/duckdb/extension/icu/./icu-table-range.cpp",
                 "src/duckdb/extension/icu/./icu-dateadd.cpp",
                 "src/duckdb/extension/icu/./icu-datetrunc.cpp",
-                "src/duckdb/extension/icu/./icu-datefunc.cpp",
+                "src/duckdb/extension/icu/./icu-datesub.cpp",
+                "src/duckdb/extension/icu/./icu-table-range.cpp",
+                "src/duckdb/extension/icu/./icu-timebucket.cpp",
+                "src/duckdb/extension/icu/./icu-list-range.cpp",
+                "src/duckdb/extension/icu/./icu-datepart.cpp",
                 "src/duckdb/extension/icu/./icu-timezone.cpp",
-                "src/duckdb/extension/icu/./icu-strptime.cpp",
                 "src/duckdb/extension/icu/./icu-extension.cpp",
-                "src/duckdb/extension/icu/./icu-datesub.cpp",
+                "src/duckdb/extension/icu/./icu-makedate.cpp",
+                "src/duckdb/extension/icu/./icu-datefunc.cpp",
+                "src/duckdb/extension/icu/./icu-strptime.cpp",
                 "src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
                 "src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
                 "src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.7.2-dev3294.0",
+  "version": "0.7.2-dev3353.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/src/execution/index/art/art.cpp CHANGED Viewed

@@ -1039,6 +1039,9 @@ void ART::InitializeMerge(ARTFlags &flags) {
 bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
 	auto &other_art = other_index.Cast<ART>();
+	if (!other_art.tree->IsSet()) {
+		return true;
+	}
 	if (tree->IsSet()) {
 		//  fully deserialize other_index, and traverse it to increment its buffer IDs

package/src/duckdb/src/execution/index/art/prefix.cpp CHANGED Viewed

@@ -159,23 +159,15 @@ void Prefix::Concatenate(ART &art, const uint8_t byte, const Prefix &other) {
 		return;
 	}
+	auto this_inlined = IsInlined();
 	auto this_count = count;
 	auto this_data = data;
 	Initialize();
-	// append the other prefix
+	// append the other prefix and possibly move the data to a segment
 	Append(art, other);
 	if (IsInlined()) {
-		// move to a segment
-		reference<PrefixSegment> segment(MoveInlinedToSegment(art));
-		// append the byte
-		segment = segment.get().Append(art, count, byte);
-		// append this prefix
-		for (idx_t i = 0; i < this_count; i++) {
-			segment = segment.get().Append(art, count, this_data.inlined[i]);
-		}
-		return;
+		MoveInlinedToSegment(art);
 	}
 	// get the tail
@@ -183,6 +175,14 @@ void Prefix::Concatenate(ART &art, const uint8_t byte, const Prefix &other) {
 	// append the byte
 	segment = segment.get().Append(art, count, byte);
+	if (this_inlined) {
+		// append this prefix
+		for (idx_t i = 0; i < this_count; i++) {
+			segment = segment.get().Append(art, count, this_data.inlined[i]);
+		}
+		return;
+	}
 	// iterate all segments of this prefix, copy their data, and free them
 	auto this_ptr = this_data.ptr;
 	auto remaining = this_count;

package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp CHANGED Viewed

@@ -17,6 +17,7 @@
 #include "utf8proc.hpp"
 #include "duckdb/parser/keyword_helper.hpp"
 #include "duckdb/main/error_manager.hpp"
+#include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
 #include <algorithm>
 #include <cctype>
@@ -25,9 +26,10 @@
 namespace duckdb {
-string BaseCSVReader::GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
-	string estimated = (linenr_estimated ? string(" (estimated)") : string(""));
-	return to_string(linenr + 1) + estimated;
+string BaseCSVReader::GetLineNumberStr(idx_t line_error, bool is_line_estimated, idx_t buffer_idx) {
+	// If an error happens during auto-detect it is an estimated line
+	string estimated = (is_line_estimated ? string(" (estimated)") : string(""));
+	return to_string(GetLineError(line_error, buffer_idx)) + estimated;
 }
 BaseCSVReader::BaseCSVReader(ClientContext &context_p, BufferedCSVReaderOptions options_p,
@@ -165,40 +167,48 @@ struct TryCastTimestampOperator {
 template <class OP, class T>
 static bool TemplatedTryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector,
-                                       idx_t count, string &error_message) {
+                                       idx_t count, string &error_message, idx_t &line_error) {
 	D_ASSERT(input_vector.GetType().id() == LogicalTypeId::VARCHAR);
 	bool all_converted = true;
+	idx_t cur_line = 0;
 	UnaryExecutor::Execute<string_t, T>(input_vector, result_vector, count, [&](string_t input) {
 		T result;
 		if (!OP::Operation(options, input, result, error_message)) {
+			line_error = cur_line;
 			all_converted = false;
 		}
+		cur_line++;
 		return result;
 	});
 	return all_converted;
 }
 bool TryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count,
-                       string &error_message) {
+                       string &error_message, idx_t &line_error) {
 	return TemplatedTryCastDateVector<TryCastDateOperator, date_t>(options, input_vector, result_vector, count,
-	                                                               error_message);
+	                                                               error_message, line_error);
 }
 bool TryCastTimestampVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count,
                             string &error_message) {
+	idx_t line_error;
 	return TemplatedTryCastDateVector<TryCastTimestampOperator, timestamp_t>(options, input_vector, result_vector,
-	                                                                         count, error_message);
+	                                                                         count, error_message, line_error);
 }
 template <class OP, class T>
 bool TemplatedTryCastFloatingVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector,
-                                    idx_t count, string &error_message) {
+                                    idx_t count, string &error_message, idx_t &line_error) {
 	D_ASSERT(input_vector.GetType().id() == LogicalTypeId::VARCHAR);
 	bool all_converted = true;
+	idx_t row = 0;
 	UnaryExecutor::Execute<string_t, T>(input_vector, result_vector, count, [&](string_t input) {
 		T result;
 		if (!OP::Operation(input, result, &error_message)) {
+			line_error = row;
 			all_converted = false;
+		} else {
+			row++;
 		}
 		return result;
 	});
@@ -226,7 +236,8 @@ bool BaseCSVReader::TryCastVector(Vector &parse_chunk_col, idx_t size, const Log
 	if (options.has_format[LogicalTypeId::DATE] && sql_type == LogicalTypeId::DATE) {
 		// use the date format to cast the chunk
 		string error_message;
-		return TryCastDateVector(options, parse_chunk_col, dummy_result, size, error_message);
+		idx_t line_error;
+		return TryCastDateVector(options, parse_chunk_col, dummy_result, size, error_message, line_error);
 	} else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type == LogicalTypeId::TIMESTAMP) {
 		// use the timestamp format to cast the chunk
 		string error_message;
@@ -238,7 +249,8 @@ bool BaseCSVReader::TryCastVector(Vector &parse_chunk_col, idx_t size, const Log
 	}
 }
-void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes) {
+void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes,
+                             idx_t buffer_idx) {
 	auto length = str_val.GetSize();
 	if (length == 0 && column == 0) {
 		row_empty = true;
@@ -260,7 +272,8 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
 		} else {
 			throw InvalidInputException(
 			    "Error in file \"%s\", on line %s: expected %lld values per row, but got more. (%s)", options.file_path,
-			    GetLineNumberStr(linenr, linenr_estimated).c_str(), return_types.size(), options.ToString());
+			    GetLineNumberStr(linenr, linenr_estimated, buffer_idx).c_str(), return_types.size(),
+			    options.ToString());
 		}
 	}
@@ -301,7 +314,7 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
 	column++;
 }
-bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error_message) {
+bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error_message, idx_t buffer_idx) {
 	linenr++;
 	if (row_empty) {
@@ -338,8 +351,8 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 			} else {
 				throw InvalidInputException(
 				    "Error in file \"%s\" on line %s: expected %lld values per row, but got %d.\nParser options:\n%s",
-				    options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), return_types.size(), column,
-				    options.ToString());
+				    options.file_path, GetLineNumberStr(linenr, linenr_estimated, buffer_idx).c_str(),
+				    return_types.size(), column, options.ToString());
 			}
 		}
 	}
@@ -363,7 +376,7 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
 	}
 	if (mode == ParserMode::PARSING && parse_chunk.size() == STANDARD_VECTOR_SIZE) {
-		Flush(insert_chunk);
+		Flush(insert_chunk, buffer_idx);
 		return true;
 	}
@@ -426,20 +439,21 @@ bool TryCastDecimalVectorCommaSeparated(BufferedCSVReaderOptions &options, Vecto
 }
 bool TryCastFloatingVectorCommaSeparated(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector,
-                                         idx_t count, string &error_message, const LogicalType &result_type) {
+                                         idx_t count, string &error_message, const LogicalType &result_type,
+                                         idx_t &line_error) {
 	switch (result_type.InternalType()) {
 	case PhysicalType::DOUBLE:
 		return TemplatedTryCastFloatingVector<TryCastErrorMessageCommaSeparated, double>(
-		    options, input_vector, result_vector, count, error_message);
+		    options, input_vector, result_vector, count, error_message, line_error);
 	case PhysicalType::FLOAT:
 		return TemplatedTryCastFloatingVector<TryCastErrorMessageCommaSeparated, float>(
-		    options, input_vector, result_vector, count, error_message);
+		    options, input_vector, result_vector, count, error_message, line_error);
 	default:
 		throw InternalException("Unimplemented physical type for floating");
 	}
 }
-bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
+bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_add_line) {
 	if (parse_chunk.size() == 0) {
 		return true;
 	}
@@ -468,9 +482,12 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 		} else {
 			string error_message;
 			bool success;
+			idx_t line_error = 0;
+			bool target_type_not_varchar = false;
 			if (options.has_format[LogicalTypeId::DATE] && type.id() == LogicalTypeId::DATE) {
 				// use the date format to cast the chunk
-				success = TryCastDateVector(options, parse_vector, result_vector, parse_chunk.size(), error_message);
+				success = TryCastDateVector(options, parse_vector, result_vector, parse_chunk.size(), error_message,
+				                            line_error);
 			} else if (options.has_format[LogicalTypeId::TIMESTAMP] && type.id() == LogicalTypeId::TIMESTAMP) {
 				// use the date format to cast the chunk
 				success =
@@ -478,12 +495,13 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 			} else if (options.decimal_separator != "." &&
 			           (type.id() == LogicalTypeId::FLOAT || type.id() == LogicalTypeId::DOUBLE)) {
 				success = TryCastFloatingVectorCommaSeparated(options, parse_vector, result_vector, parse_chunk.size(),
-				                                              error_message, type);
+				                                              error_message, type, line_error);
 			} else if (options.decimal_separator != "." && type.id() == LogicalTypeId::DECIMAL) {
 				success = TryCastDecimalVectorCommaSeparated(options, parse_vector, result_vector, parse_chunk.size(),
 				                                             error_message, type);
 			} else {
 				// target type is not varchar: perform a cast
+				target_type_not_varchar = true;
 				success =
 				    VectorOperations::TryCast(context, parse_vector, result_vector, parse_chunk.size(), &error_message);
 			}
@@ -503,15 +521,25 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
 			}
 			// figure out the exact line number
-			UnifiedVectorFormat inserted_column_data;
-			result_vector.ToUnifiedFormat(parse_chunk.size(), inserted_column_data);
-			idx_t row_idx;
-			for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
-				if (!inserted_column_data.validity.RowIsValid(row_idx) && !FlatVector::IsNull(parse_vector, row_idx)) {
-					break;
+			if (target_type_not_varchar) {
+				UnifiedVectorFormat inserted_column_data;
+				result_vector.ToUnifiedFormat(parse_chunk.size(), inserted_column_data);
+				for (; line_error < parse_chunk.size(); line_error++) {
+					if (!inserted_column_data.validity.RowIsValid(line_error) &&
+					    !FlatVector::IsNull(parse_vector, line_error)) {
+						break;
+					}
 				}
 			}
-			auto error_line = linenr - (parse_chunk.size() - row_idx) + 1;
+			idx_t error_line;
+			// The line_error must be summed with linenr (All lines emmited from this batch)
+			// But subtracted from the parse_chunk
+			D_ASSERT(line_error + linenr >= parse_chunk.size());
+			line_error += linenr;
+			line_error -= parse_chunk.size();
+			error_line = GetLineError(line_error, buffer_idx);
 			if (options.auto_detect) {
 				throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "

package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp CHANGED Viewed

@@ -39,9 +39,6 @@ BufferedCSVReader::BufferedCSVReader(ClientContext &context, string filename, Bu
 	Initialize(requested_types);
 }
-BufferedCSVReader::~BufferedCSVReader() {
-}
 enum class QuoteRule : uint8_t { QUOTES_RFC = 0, QUOTES_OTHER = 1, NO_QUOTES = 2 };
 static bool StartsWithNumericDate(string &separator, const string &value) {

package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp CHANGED Viewed

@@ -15,19 +15,20 @@
 #include "utf8proc.hpp"
 #include "duckdb/parser/keyword_helper.hpp"
 #include "duckdb/function/table/read_csv.hpp"
+#include "duckdb/execution/operator/persistent/csv_line_info.hpp"
 #include <algorithm>
 #include <cctype>
 #include <cstring>
 #include <fstream>
-#include <utility>
 namespace duckdb {
 ParallelCSVReader::ParallelCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
                                      unique_ptr<CSVBufferRead> buffer_p, idx_t first_pos_first_buffer_p,
-                                     const vector<LogicalType> &requested_types)
-    : BaseCSVReader(context, std::move(options_p), requested_types), first_pos_first_buffer(first_pos_first_buffer_p) {
+                                     const vector<LogicalType> &requested_types, idx_t file_idx_p)
+    : BaseCSVReader(context, std::move(options_p), requested_types), file_idx(file_idx_p),
+      first_pos_first_buffer(first_pos_first_buffer_p) {
 	Initialize(requested_types);
 	SetBufferRead(std::move(buffer_p));
 	if (options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1) {
@@ -35,9 +36,6 @@ ParallelCSVReader::ParallelCSVReader(ClientContext &context, BufferedCSVReaderOp
 	}
 }
-ParallelCSVReader::~ParallelCSVReader() {
-}
 void ParallelCSVReader::Initialize(const vector<LogicalType> &requested_types) {
 	return_types = requested_types;
 	InitParseChunk(return_types.size());
@@ -76,7 +74,7 @@ void ParallelCSVReader::SkipEmptyLines() {
 	}
 }
-bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
+bool ParallelCSVReader::SetPosition() {
 	if (buffer->buffer->IsCSVFileFirstBuffer() && start_buffer == position_buffer &&
 	    start_buffer == first_pos_first_buffer) {
 		start_buffer = buffer->buffer->GetStart();
@@ -84,7 +82,7 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
 		verification_positions.beginning_of_first_line = position_buffer;
 		verification_positions.end_of_last_line = position_buffer;
 		// First buffer doesn't need any setting
-		// Unless we have a header
 		if (options.header) {
 			for (; position_buffer < end_buffer; position_buffer++) {
 				if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
@@ -205,10 +203,8 @@ void ParallelCSVReader::SetBufferRead(unique_ptr<CSVBufferRead> buffer_read_p) {
 	} else {
 		buffer_size = buffer_read_p->buffer->GetBufferSize();
 	}
-	linenr = buffer_read_p->estimated_linenr;
 	buffer = std::move(buffer_read_p);
-	linenr_estimated = true;
 	reached_remainder_state = false;
 	verification_positions.beginning_of_first_line = 0;
 	verification_positions.end_of_last_line = 0;
@@ -239,10 +235,12 @@ bool ParallelCSVReader::BufferRemainder() {
 	return true;
 }
-void VerifyLineLength(idx_t line_size, idx_t max_line_size) {
-	if (line_size > max_line_size) {
-		// FIXME: this should also output the correct estimated linenumber where it broke
-		throw InvalidInputException("Maximum line size of %llu bytes exceeded!", max_line_size);
+void ParallelCSVReader::VerifyLineLength(idx_t line_size) {
+	if (line_size > options.maximum_line_size) {
+		throw InvalidInputException("Error in file \"%s\" on line %s: Maximum line size of %llu bytes exceeded!",
+		                            options.file_path,
+		                            GetLineNumberStr(parse_chunk.size(), linenr_estimated, buffer->batch_index).c_str(),
+		                            options.maximum_line_size);
 	}
 }
@@ -261,6 +259,33 @@ bool AllNewLine(string_t value, idx_t column_amount) {
 }
 bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message, bool try_add_line) {
+	// If line is not set, we have to figure it out, we assume whatever is in the first line
+	if (options.new_line == NewLineIdentifier::NOT_SET) {
+		idx_t cur_pos = position_buffer;
+		// we can start in the middle of a new line, so move a bit forward.
+		while (cur_pos < end_buffer) {
+			if (StringUtil::CharacterIsNewline((*buffer)[cur_pos])) {
+				cur_pos++;
+			} else {
+				break;
+			}
+		}
+		for (; cur_pos < end_buffer; cur_pos++) {
+			if (StringUtil::CharacterIsNewline((*buffer)[cur_pos])) {
+				bool carriage_return = (*buffer)[cur_pos] == '\r';
+				bool carriage_return_followed = false;
+				cur_pos++;
+				if (cur_pos < end_buffer) {
+					if (carriage_return && (*buffer)[cur_pos] == '\n') {
+						carriage_return_followed = true;
+						cur_pos++;
+					}
+				}
+				SetNewLineDelimiter(carriage_return, carriage_return_followed);
+				break;
+			}
+		}
+	}
 	// used for parsing algorithm
 	if (start_buffer == buffer_size) {
 		// Nothing to read
@@ -276,7 +301,7 @@ bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error
 	vector<idx_t> escape_positions;
 	if ((start_buffer == buffer->buffer_start || start_buffer == buffer->buffer_end) && !try_add_line) {
 		// First time reading this buffer piece
-		if (!SetPosition(insert_chunk)) {
+		if (!SetPosition()) {
 			finished = true;
 			return true;
 		}
@@ -340,7 +365,8 @@ normal : {
 add_value : {
 	/* state: Add value to string vector */
-	AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes);
+	AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes,
+	         buffer->local_batch_index);
 	// increase position by 1 and move start to the new position
 	offset = 0;
 	has_quotes = false;
@@ -356,20 +382,23 @@ add_row : {
 	// check type of newline (\r or \n)
 	bool carriage_return = (*buffer)[position_buffer] == '\r';
-	AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes);
+	AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes,
+	         buffer->local_batch_index);
 	if (try_add_line) {
 		bool success = column == insert_chunk.ColumnCount();
 		if (success) {
-			AddRow(insert_chunk, column, error_message);
-			success = Flush(insert_chunk);
+			idx_t cur_linenr = linenr;
+			AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
+			success = Flush(insert_chunk, buffer->local_batch_index, true);
+			linenr = cur_linenr;
 		}
 		reached_remainder_state = false;
 		parse_chunk.Reset();
 		return success;
 	} else {
-		VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
+		VerifyLineLength(position_buffer - line_start);
 		line_start = position_buffer;
-		finished_chunk = AddRow(insert_chunk, column, error_message);
+		finished_chunk = AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
 	}
 	// increase position by 1 and move start to the new position
 	offset = 0;
@@ -377,15 +406,12 @@ add_row : {
 	position_buffer++;
 	start_buffer = position_buffer;
 	verification_positions.end_of_last_line = position_buffer;
-	if (reached_remainder_state) {
-		goto final_state;
-	}
-	if (!BufferRemainder()) {
-		goto final_state;
-	}
 	if (carriage_return) {
 		// \r newline, go to special state that parses an optional \n afterwards
 		// optionally skips a newline (\n) character, which allows \r\n to be interpreted as a single line
+		if (!BufferRemainder()) {
+			goto final_state;
+		}
 		if ((*buffer)[position_buffer] == '\n') {
 			if (options.new_line == NewLineIdentifier::SINGLE) {
 				error_message = "Wrong NewLine Identifier. Expecting \\r\\n";
@@ -419,6 +445,12 @@ add_row : {
 			error_message = "Wrong NewLine Identifier. Expecting \\r or \\n";
 			return false;
 		}
+		if (reached_remainder_state) {
+			goto final_state;
+		}
+		if (!BufferRemainder()) {
+			goto final_state;
+		}
 		SkipEmptyLines();
 		verification_positions.end_of_last_line = position_buffer;
 		start_buffer = position_buffer;
@@ -451,7 +483,8 @@ in_quotes:
 			}
 			// still in quoted state at the end of the file or at the end of a buffer when running multithreaded, error:
 			throw InvalidInputException("Error in file \"%s\" on line %s: unterminated quotes. (%s)", options.file_path,
-			                            GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+			                            GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(),
+			                            options.ToString());
 		} else {
 			goto final_state;
 		}
@@ -492,7 +525,8 @@ unquote : {
 		error_message = StringUtil::Format(
 		    "Error in file \"%s\" on line %s: quote should be followed by end of value, end of "
 		    "row or another quote. (%s). ",
-		    options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+		    options.file_path, GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(),
+		    options.ToString());
 		return false;
 	}
 }
@@ -506,13 +540,13 @@ handle_escape : {
 	if (position_buffer >= buffer_size && buffer->buffer->IsCSVFileLastBuffer()) {
 		error_message = StringUtil::Format(
 		    "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path,
-		    GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+		    GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(), options.ToString());
 		return false;
 	}
 	if ((*buffer)[position_buffer] != options.quote[0] && (*buffer)[position_buffer] != options.escape[0]) {
 		error_message = StringUtil::Format(
 		    "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path,
-		    GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+		    GetLineNumberStr(linenr, linenr_estimated, buffer->local_batch_index).c_str(), options.ToString());
 		return false;
 	}
 	// escape was followed by quote or escape, go back to quoted state
@@ -535,6 +569,7 @@ final_state : {
 				finished = true;
 			}
 		}
+		buffer->lines_read += insert_chunk.size();
 		return true;
 	}
 	// If this is the last buffer, we have to read the last value
@@ -544,20 +579,22 @@ final_state : {
 			// remaining values to be added to the chunk
 			auto str_value = buffer->GetValue(start_buffer, position_buffer, offset);
 			if (!AllNewLine(str_value, insert_chunk.data.size()) || offset == 0) {
-				AddValue(str_value, column, escape_positions, has_quotes);
+				AddValue(str_value, column, escape_positions, has_quotes, buffer->local_batch_index);
 				if (try_add_line) {
 					bool success = column == return_types.size();
 					if (success) {
-						AddRow(insert_chunk, column, error_message);
-						success = Flush(insert_chunk);
+						auto cur_linenr = linenr;
+						AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
+						success = Flush(insert_chunk, buffer->local_batch_index);
+						linenr = cur_linenr;
 					}
 					parse_chunk.Reset();
 					reached_remainder_state = false;
 					return success;
 				} else {
-					VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
+					VerifyLineLength(position_buffer - line_start);
 					line_start = position_buffer;
-					AddRow(insert_chunk, column, error_message);
+					AddRow(insert_chunk, column, error_message, buffer->local_batch_index);
 					verification_positions.end_of_last_line = position_buffer;
 				}
 			}
@@ -565,7 +602,8 @@ final_state : {
 	}
 	// flush the parsed chunk and finalize parsing
 	if (mode == ParserMode::PARSING) {
-		Flush(insert_chunk);
+		Flush(insert_chunk, buffer->local_batch_index);
+		buffer->lines_read += insert_chunk.size();
 	}
 	if (position_buffer - verification_positions.end_of_last_line > options.buffer_size) {
 		error_message = "Line does not fit in one buffer. Increase the buffer size.";
@@ -597,6 +635,16 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
 	}
 }
+idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
+	while (true) {
+		if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
+			auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
+			return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
+		}
+	}
+}
 bool ParallelCSVReader::TryParseCSV(ParserMode mode) {
 	DataChunk dummy_chunk;
 	string error_message;