npm - duckdb - Versions diffs - 0.5.2-dev2006.0 → 0.5.2-dev2076.0 - Mend

duckdb 0.5.2-dev2006.0 → 0.5.2-dev2076.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +1649 -786
package/src/duckdb.hpp +373 -93
package/src/parquet-amalgamation.cpp +37721 -37721

package/src/duckdb.cpp CHANGED Viewed

@@ -28769,9 +28769,10 @@ bool TryCast::Operation(string_t input, hugeint_t &result, bool strict) {
 //===--------------------------------------------------------------------===//
 // Decimal String Cast
 //===--------------------------------------------------------------------===//
-template <class T>
+template <class TYPE>
 struct DecimalCastData {
-	T result;
+	typedef TYPE type_t;
+	TYPE result;
 	uint8_t width;
 	uint8_t scale;
 	uint8_t digit_count;
@@ -28794,8 +28795,14 @@ struct DecimalCastOperation {
 		}
 		state.digit_count++;
 		if (NEGATIVE) {
+			if (state.result < (NumericLimits<typename T::type_t>::Minimum() / 10)) {
+				return false;
+			}
 			state.result = state.result * 10 - digit;
 		} else {
+			if (state.result > (NumericLimits<typename T::type_t>::Maximum() / 10)) {
+				return false;
+			}
 			state.result = state.result * 10 + digit;
 		}
 		return true;
@@ -42650,6 +42657,19 @@ static int8_t TemplatedCompareValue(Vector &left_vec, Vector &right_vec, idx_t l
 	return 1;
 }
+template <>
+int8_t TemplatedCompareValue<Value>(Vector &left_vec, Vector &right_vec, idx_t left_idx, idx_t right_idx) {
+	auto left_val = left_vec.GetValue(left_idx);
+	auto right_val = right_vec.GetValue(right_idx);
+	if (ValueOperations::Equals(left_val, right_val)) {
+		return 0;
+	}
+	if (ValueOperations::LessThan(left_val, right_val)) {
+		return -1;
+	}
+	return 1;
+}
 // return type here is int32 because strcmp() on some platforms returns rather large values
 static int32_t CompareValue(Vector &left_vec, Vector &right_vec, idx_t vector_idx_left, idx_t vector_idx_right,
                             OrderByNullType null_order) {
@@ -42693,7 +42713,7 @@ static int32_t CompareValue(Vector &left_vec, Vector &right_vec, idx_t vector_id
 	case PhysicalType::INTERVAL:
 		return TemplatedCompareValue<interval_t>(left_vec, right_vec, vector_idx_left, vector_idx_right);
 	default:
-		throw NotImplementedException("Type for comparison");
+		return TemplatedCompareValue<Value>(left_vec, right_vec, vector_idx_left, vector_idx_right);
 	}
 }
@@ -79319,398 +79339,446 @@ string PhysicalTopN::ParamsToString() const {
 namespace duckdb {
-static bool ParseBoolean(const Value &value, const string &loption);
+string BaseCSVReader::GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
+	string estimated = (linenr_estimated ? string(" (estimated)") : string(""));
+	return to_string(linenr + 1) + estimated;
+}
-static bool ParseBoolean(const vector<Value> &set, const string &loption) {
-	if (set.empty()) {
-		// no option specified: default to true
-		return true;
-	}
-	if (set.size() > 1) {
-		throw BinderException("\"%s\" expects a single argument as a boolean value (e.g. TRUE or 1)", loption);
-	}
-	return ParseBoolean(set[0], loption);
+BaseCSVReader::BaseCSVReader(FileSystem &fs_p, Allocator &allocator, FileOpener *opener_p,
+                             BufferedCSVReaderOptions options_p, const vector<LogicalType> &requested_types)
+    : fs(fs_p), allocator(allocator), opener(opener_p), options(move(options_p)) {
 }
-static bool ParseBoolean(const Value &value, const string &loption) {
+BaseCSVReader::BaseCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
+                             const vector<LogicalType> &requested_types)
+    : BaseCSVReader(FileSystem::GetFileSystem(context), Allocator::Get(context), FileSystem::GetFileOpener(context),
+                    move(options_p), requested_types) {
+}
-	if (value.type().id() == LogicalTypeId::LIST) {
-		auto &children = ListValue::GetChildren(value);
-		return ParseBoolean(children, loption);
-	}
-	if (value.type() == LogicalType::FLOAT || value.type() == LogicalType::DOUBLE ||
-	    value.type().id() == LogicalTypeId::DECIMAL) {
-		throw BinderException("\"%s\" expects a boolean value (e.g. TRUE or 1)", loption);
-	}
-	return BooleanValue::Get(value.DefaultCastAs(LogicalType::BOOLEAN));
+BaseCSVReader::~BaseCSVReader() {
 }
-static string ParseString(const Value &value, const string &loption) {
-	if (value.type().id() == LogicalTypeId::LIST) {
-		auto &children = ListValue::GetChildren(value);
-		if (children.size() != 1) {
-			throw BinderException("\"%s\" expects a single argument as a string value", loption);
-		}
-		return ParseString(children[0], loption);
+unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
+	auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
+	                               options_p.compression, this->opener);
+	return make_unique<CSVFileHandle>(move(file_handle));
+}
+void BaseCSVReader::InitParseChunk(idx_t num_cols) {
+	// adapt not null info
+	if (options.force_not_null.size() != num_cols) {
+		options.force_not_null.resize(num_cols, false);
 	}
-	if (value.type().id() != LogicalTypeId::VARCHAR) {
-		throw BinderException("\"%s\" expects a string argument!", loption);
+	if (num_cols == parse_chunk.ColumnCount()) {
+		parse_chunk.Reset();
+	} else {
+		parse_chunk.Destroy();
+		// initialize the parse_chunk with a set of VARCHAR types
+		vector<LogicalType> varchar_types(num_cols, LogicalType::VARCHAR);
+		parse_chunk.Initialize(allocator, varchar_types);
 	}
-	return value.GetValue<string>();
 }
-static int64_t ParseInteger(const Value &value, const string &loption) {
-	if (value.type().id() == LogicalTypeId::LIST) {
-		auto &children = ListValue::GetChildren(value);
-		if (children.size() != 1) {
-			// no option specified or multiple options specified
-			throw BinderException("\"%s\" expects a single argument as an integer value", loption);
-		}
-		return ParseInteger(children[0], loption);
+void BaseCSVReader::InitInsertChunkIdx(idx_t num_cols) {
+	for (idx_t col = 0; col < num_cols; ++col) {
+		insert_cols_idx.push_back(col);
 	}
-	return value.GetValue<int64_t>();
 }
-static vector<bool> ParseColumnList(const vector<Value> &set, vector<string> &names, const string &loption) {
-	vector<bool> result;
+void BaseCSVReader::SetDateFormat(const string &format_specifier, const LogicalTypeId &sql_type) {
+	options.has_format[sql_type] = true;
+	auto &date_format = options.date_format[sql_type];
+	date_format.format_specifier = format_specifier;
+	StrTimeFormat::ParseFormatSpecifier(date_format.format_specifier, date_format);
+}
-	if (set.empty()) {
-		throw BinderException("\"%s\" expects a column list or * as parameter", loption);
+bool BaseCSVReader::TryCastValue(const Value &value, const LogicalType &sql_type) {
+	if (options.has_format[LogicalTypeId::DATE] && sql_type.id() == LogicalTypeId::DATE) {
+		date_t result;
+		string error_message;
+		return options.date_format[LogicalTypeId::DATE].TryParseDate(string_t(StringValue::Get(value)), result,
+		                                                             error_message);
+	} else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type.id() == LogicalTypeId::TIMESTAMP) {
+		timestamp_t result;
+		string error_message;
+		return options.date_format[LogicalTypeId::TIMESTAMP].TryParseTimestamp(string_t(StringValue::Get(value)),
+		                                                                       result, error_message);
+	} else {
+		Value new_value;
+		string error_message;
+		return value.DefaultTryCastAs(sql_type, new_value, &error_message, true);
 	}
-	// list of options: parse the list
-	unordered_map<string, bool> option_map;
-	for (idx_t i = 0; i < set.size(); i++) {
-		option_map[set[i].ToString()] = false;
+}
+struct TryCastDateOperator {
+	static bool Operation(BufferedCSVReaderOptions &options, string_t input, date_t &result, string &error_message) {
+		return options.date_format[LogicalTypeId::DATE].TryParseDate(input, result, error_message);
 	}
-	result.resize(names.size(), false);
-	for (idx_t i = 0; i < names.size(); i++) {
-		auto entry = option_map.find(names[i]);
-		if (entry != option_map.end()) {
-			result[i] = true;
-			entry->second = true;
-		}
+};
+struct TryCastTimestampOperator {
+	static bool Operation(BufferedCSVReaderOptions &options, string_t input, timestamp_t &result,
+	                      string &error_message) {
+		return options.date_format[LogicalTypeId::TIMESTAMP].TryParseTimestamp(input, result, error_message);
 	}
-	for (auto &entry : option_map) {
-		if (!entry.second) {
-			throw BinderException("\"%s\" expected to find %s, but it was not found in the table", loption,
-			                      entry.first.c_str());
+};
+template <class OP, class T>
+static bool TemplatedTryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector,
+                                       idx_t count, string &error_message) {
+	D_ASSERT(input_vector.GetType().id() == LogicalTypeId::VARCHAR);
+	bool all_converted = true;
+	UnaryExecutor::Execute<string_t, T>(input_vector, result_vector, count, [&](string_t input) {
+		T result;
+		if (!OP::Operation(options, input, result, error_message)) {
+			all_converted = false;
 		}
-	}
-	return result;
+		return result;
+	});
+	return all_converted;
 }
-static vector<bool> ParseColumnList(const Value &value, vector<string> &names, const string &loption) {
-	vector<bool> result;
+bool TryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count,
+                       string &error_message) {
+	return TemplatedTryCastDateVector<TryCastDateOperator, date_t>(options, input_vector, result_vector, count,
+	                                                               error_message);
+}
-	// Only accept a list of arguments
-	if (value.type().id() != LogicalTypeId::LIST) {
-		// Support a single argument if it's '*'
-		if (value.type().id() == LogicalTypeId::VARCHAR && value.GetValue<string>() == "*") {
-			result.resize(names.size(), true);
-			return result;
-		}
-		throw BinderException("\"%s\" expects a column list or * as parameter", loption);
-	}
-	auto &children = ListValue::GetChildren(value);
-	// accept '*' as single argument
-	if (children.size() == 1 && children[0].type().id() == LogicalTypeId::VARCHAR &&
-	    children[0].GetValue<string>() == "*") {
-		result.resize(names.size(), true);
-		return result;
+bool TryCastTimestampVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count,
+                            string &error_message) {
+	return TemplatedTryCastDateVector<TryCastTimestampOperator, timestamp_t>(options, input_vector, result_vector,
+	                                                                         count, error_message);
+}
+bool BaseCSVReader::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) {
+	// try vector-cast from string to sql_type
+	Vector dummy_result(sql_type);
+	if (options.has_format[LogicalTypeId::DATE] && sql_type == LogicalTypeId::DATE) {
+		// use the date format to cast the chunk
+		string error_message;
+		return TryCastDateVector(options, parse_chunk_col, dummy_result, size, error_message);
+	} else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type == LogicalTypeId::TIMESTAMP) {
+		// use the timestamp format to cast the chunk
+		string error_message;
+		return TryCastTimestampVector(options, parse_chunk_col, dummy_result, size, error_message);
+	} else {
+		// target type is not varchar: perform a cast
+		string error_message;
+		return VectorOperations::DefaultTryCast(parse_chunk_col, dummy_result, size, &error_message, true);
 	}
-	return ParseColumnList(children, names, loption);
 }
-struct CSVFileHandle {
-public:
-	explicit CSVFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(move(file_handle_p)) {
-		can_seek = file_handle->CanSeek();
-		plain_file_source = file_handle->OnDiskFile() && can_seek;
-		file_size = file_handle->GetFileSize();
+void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes) {
+	auto length = str_val.GetSize();
+	if (length == 0 && column == 0) {
+		row_empty = true;
+	} else {
+		row_empty = false;
 	}
-	bool CanSeek() {
-		return can_seek;
+	if (!sql_types.empty() && column == sql_types.size() && length == 0) {
+		// skip a single trailing delimiter in last column
+		return;
 	}
-	void Seek(idx_t position) {
-		if (!can_seek) {
-			throw InternalException("Cannot seek in this file");
-		}
-		file_handle->Seek(position);
+	if (mode == ParserMode::SNIFFING_DIALECT) {
+		column++;
+		return;
 	}
-	idx_t SeekPosition() {
-		if (!can_seek) {
-			throw InternalException("Cannot seek in this file");
+	if (column >= sql_types.size()) {
+		if (options.ignore_errors) {
+			error_column_overflow = true;
+			return;
+		} else {
+			throw InvalidInputException(
+			    "Error in file \"%s\", on line %s: expected %lld values per row, but got more. (%s)", options.file_path,
+			    GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), options.ToString());
 		}
-		return file_handle->SeekPosition();
 	}
-	void Reset() {
-		if (plain_file_source) {
-			file_handle->Reset();
-		} else {
-			if (!reset_enabled) {
-				throw InternalException("Reset called but reset is not enabled for this CSV Handle");
+	// insert the line number into the chunk
+	idx_t row_entry = parse_chunk.size();
+	// test against null string, but only if the value was not quoted
+	if ((!has_quotes || sql_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
+	    Equals::Operation(str_val, string_t(options.null_str))) {
+		FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
+	} else {
+		auto &v = parse_chunk.data[column];
+		auto parse_data = FlatVector::GetData<string_t>(v);
+		if (!escape_positions.empty()) {
+			// remove escape characters (if any)
+			string old_val = str_val.GetString();
+			string new_val = "";
+			idx_t prev_pos = 0;
+			for (idx_t i = 0; i < escape_positions.size(); i++) {
+				idx_t next_pos = escape_positions[i];
+				new_val += old_val.substr(prev_pos, next_pos - prev_pos);
+				if (options.escape.empty() || options.escape == options.quote) {
+					prev_pos = next_pos + options.quote.size();
+				} else {
+					prev_pos = next_pos + options.escape.size();
+				}
 			}
-			read_position = 0;
+			new_val += old_val.substr(prev_pos, old_val.size() - prev_pos);
+			escape_positions.clear();
+			parse_data[row_entry] = StringVector::AddStringOrBlob(v, string_t(new_val));
+		} else {
+			parse_data[row_entry] = str_val;
 		}
 	}
-	bool PlainFileSource() {
-		return plain_file_source;
-	}
-	bool OnDiskFile() {
-		return file_handle->OnDiskFile();
-	}
+	// move to the next column
+	column++;
+}
-	idx_t FileSize() {
-		return file_size;
-	}
+bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
+	linenr++;
-	idx_t Read(void *buffer, idx_t nr_bytes) {
-		if (!plain_file_source) {
-			// not a plain file source: we need to do some bookkeeping around the reset functionality
-			idx_t result_offset = 0;
-			if (read_position < buffer_size) {
-				// we need to read from our cached buffer
-				auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
-				memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
-				result_offset += buffer_read_count;
-				read_position += buffer_read_count;
-				if (result_offset == nr_bytes) {
-					return nr_bytes;
-				}
-			} else if (!reset_enabled && cached_buffer) {
-				// reset is disabled but we still have cached data
-				// we can remove any cached data
-				cached_buffer.reset();
-				buffer_size = 0;
-				buffer_capacity = 0;
-				read_position = 0;
-			}
-			// we have data left to read from the file
-			// read directly into the buffer
-			auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
-			read_position += bytes_read;
-			if (reset_enabled) {
-				// if reset caching is enabled, we need to cache the bytes that we have read
-				if (buffer_size + bytes_read >= buffer_capacity) {
-					// no space; first enlarge the buffer
-					buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
-					auto new_buffer = unique_ptr<data_t[]>(new data_t[buffer_capacity]);
-					if (buffer_size > 0) {
-						memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
-					}
-					cached_buffer = move(new_buffer);
-				}
-				memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
-				buffer_size += bytes_read;
+	if (row_empty) {
+		row_empty = false;
+		if (sql_types.size() != 1) {
+			if (mode == ParserMode::PARSING) {
+				FlatVector::SetNull(parse_chunk.data[0], parse_chunk.size(), false);
 			}
+			column = 0;
+			return false;
+		}
+	}
+	// Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
+	if (error_column_overflow) {
+		D_ASSERT(options.ignore_errors);
+		error_column_overflow = false;
+		column = 0;
+		return false;
+	}
-			return result_offset + bytes_read;
+	if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
+		if (options.ignore_errors) {
+			column = 0;
+			return false;
 		} else {
-			return file_handle->Read(buffer, nr_bytes);
+			throw InvalidInputException(
+			    "Error in file \"%s\" on line %s: expected %lld values per row, but got %d. (%s)", options.file_path,
+			    GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column, options.ToString());
 		}
 	}
-	string ReadLine() {
-		bool carriage_return = false;
-		string result;
-		char buffer[1];
-		while (true) {
-			idx_t bytes_read = Read(buffer, 1);
-			if (bytes_read == 0) {
-				return result;
-			}
-			if (carriage_return) {
-				if (buffer[0] != '\n') {
-					if (!file_handle->CanSeek()) {
-						throw BinderException(
-						    "Carriage return newlines not supported when reading CSV files in which we cannot seek");
-					}
-					file_handle->Seek(file_handle->SeekPosition() - 1);
-					return result;
-				}
-			}
-			if (buffer[0] == '\n') {
-				return result;
-			}
-			if (buffer[0] != '\r') {
-				result += buffer[0];
-			} else {
-				carriage_return = true;
-			}
+	if (mode == ParserMode::SNIFFING_DIALECT) {
+		sniffed_column_counts.push_back(column);
+		if (sniffed_column_counts.size() == options.sample_chunk_size) {
+			return true;
 		}
+	} else {
+		parse_chunk.SetCardinality(parse_chunk.size() + 1);
 	}
-	void DisableReset() {
-		this->reset_enabled = false;
+	if (mode == ParserMode::PARSING_HEADER) {
+		return true;
 	}
-private:
-	unique_ptr<FileHandle> file_handle;
-	bool reset_enabled = true;
-	bool can_seek = false;
-	bool plain_file_source = false;
-	idx_t file_size = 0;
-	// reset support
-	unique_ptr<data_t[]> cached_buffer;
-	idx_t read_position = 0;
-	idx_t buffer_size = 0;
-	idx_t buffer_capacity = 0;
-};
+	if (mode == ParserMode::SNIFFING_DATATYPES && parse_chunk.size() == options.sample_chunk_size) {
+		return true;
+	}
-void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
-	this->delimiter = StringUtil::Replace(input, "\\t", "\t");
-	this->has_delimiter = true;
-	if (input.empty()) {
-		this->delimiter = string("\0", 1);
+	if (mode == ParserMode::PARSING && parse_chunk.size() == STANDARD_VECTOR_SIZE) {
+		Flush(insert_chunk);
+		return true;
 	}
+	column = 0;
+	return false;
 }
-void BufferedCSVReaderOptions::SetDateFormat(LogicalTypeId type, const string &format, bool read_format) {
-	string error;
-	if (read_format) {
-		auto &date_format = this->date_format[type];
-		error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
-		date_format.format_specifier = format;
-	} else {
-		auto &date_format = this->write_date_format[type];
-		error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
-	}
-	if (!error.empty()) {
-		throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
+void BaseCSVReader::SetNullUnionCols(DataChunk &insert_chunk) {
+	for (idx_t col = 0; col < insert_nulls_idx.size(); ++col) {
+		insert_chunk.data[insert_nulls_idx[col]].SetVectorType(VectorType::CONSTANT_VECTOR);
+		ConstantVector::SetNull(insert_chunk.data[insert_nulls_idx[col]], true);
 	}
-	has_format[type] = true;
 }
-void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value &value,
-                                             vector<string> &expected_names) {
-	if (SetBaseOption(loption, value)) {
+void BaseCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset) {
+	D_ASSERT(col_idx < chunk.data.size());
+	D_ASSERT(row_idx < chunk.size());
+	auto &v = chunk.data[col_idx];
+	if (FlatVector::IsNull(v, row_idx)) {
 		return;
 	}
-	if (loption == "auto_detect") {
-		auto_detect = ParseBoolean(value, loption);
-	} else if (loption == "sample_size") {
-		int64_t sample_size = ParseInteger(value, loption);
-		if (sample_size < 1 && sample_size != -1) {
-			throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1");
-		}
-		if (sample_size == -1) {
-			sample_chunks = std::numeric_limits<uint64_t>::max();
-			sample_chunk_size = STANDARD_VECTOR_SIZE;
-		} else if (sample_size <= STANDARD_VECTOR_SIZE) {
-			sample_chunk_size = sample_size;
-			sample_chunks = 1;
-		} else {
-			sample_chunk_size = STANDARD_VECTOR_SIZE;
-			sample_chunks = sample_size / STANDARD_VECTOR_SIZE;
-		}
-	} else if (loption == "skip") {
-		skip_rows = ParseInteger(value, loption);
-	} else if (loption == "max_line_size" || loption == "maximum_line_size") {
-		maximum_line_size = ParseInteger(value, loption);
-	} else if (loption == "sample_chunk_size") {
-		sample_chunk_size = ParseInteger(value, loption);
-		if (sample_chunk_size > STANDARD_VECTOR_SIZE) {
-			throw BinderException(
-			    "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d",
-			    STANDARD_VECTOR_SIZE);
-		} else if (sample_chunk_size < 1) {
-			throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1");
-		}
-	} else if (loption == "sample_chunks") {
-		sample_chunks = ParseInteger(value, loption);
-		if (sample_chunks < 1) {
-			throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1");
+	auto parse_data = FlatVector::GetData<string_t>(chunk.data[col_idx]);
+	auto s = parse_data[row_idx];
+	auto utf_type = Utf8Proc::Analyze(s.GetDataUnsafe(), s.GetSize());
+	if (utf_type == UnicodeType::INVALID) {
+		string col_name = to_string(col_idx);
+		if (col_idx < col_names.size()) {
+			col_name = "\"" + col_names[col_idx] + "\"";
 		}
-	} else if (loption == "force_not_null") {
-		force_not_null = ParseColumnList(value, expected_names, loption);
-	} else if (loption == "date_format" || loption == "dateformat") {
-		string format = ParseString(value, loption);
-		SetDateFormat(LogicalTypeId::DATE, format, true);
-	} else if (loption == "timestamp_format" || loption == "timestampformat") {
-		string format = ParseString(value, loption);
-		SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
-	} else if (loption == "escape") {
-		escape = ParseString(value, loption);
-		has_escape = true;
-	} else if (loption == "ignore_errors") {
-		ignore_errors = ParseBoolean(value, loption);
-	} else if (loption == "union_by_name") {
-		union_by_name = ParseBoolean(value, loption);
-	} else {
-		throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
+		int64_t error_line = linenr - (chunk.size() - row_idx) + 1 + offset;
+		D_ASSERT(error_line >= 0);
+		throw InvalidInputException("Error in file \"%s\" at line %llu in column \"%s\": "
+		                            "%s. Parser options: %s",
+		                            options.file_path, error_line, col_name,
+		                            ErrorManager::InvalidUnicodeError(s.GetString(), "CSV file"), options.ToString());
 	}
 }
-void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
-	if (SetBaseOption(loption, value)) {
-		return;
+void BaseCSVReader::VerifyUTF8(idx_t col_idx) {
+	D_ASSERT(col_idx < parse_chunk.data.size());
+	for (idx_t i = 0; i < parse_chunk.size(); i++) {
+		VerifyUTF8(col_idx, i, parse_chunk);
 	}
+}
-	if (loption == "force_quote") {
-		force_quote = ParseColumnList(value, names, loption);
-	} else if (loption == "date_format" || loption == "dateformat") {
-		string format = ParseString(value, loption);
-		SetDateFormat(LogicalTypeId::DATE, format, false);
-	} else if (loption == "timestamp_format" || loption == "timestampformat") {
-		string format = ParseString(value, loption);
-		if (StringUtil::Lower(format) == "iso") {
-			format = "%Y-%m-%dT%H:%M:%S.%fZ";
+bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
+	if (parse_chunk.size() == 0) {
+		return true;
+	}
+	bool conversion_error_ignored = false;
+	// convert the columns in the parsed chunk to the types of the table
+	insert_chunk.SetCardinality(parse_chunk);
+	for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
+		if (sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
+			// target type is varchar: no need to convert
+			// just test that all strings are valid utf-8 strings
+			VerifyUTF8(col_idx);
+			insert_chunk.data[insert_cols_idx[col_idx]].Reference(parse_chunk.data[col_idx]);
+		} else {
+			string error_message;
+			bool success;
+			if (options.has_format[LogicalTypeId::DATE] && sql_types[col_idx].id() == LogicalTypeId::DATE) {
+				// use the date format to cast the chunk
+				success =
+				    TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_cols_idx[col_idx]],
+				                      parse_chunk.size(), error_message);
+			} else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
+			           sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
+				// use the date format to cast the chunk
+				success = TryCastTimestampVector(options, parse_chunk.data[col_idx],
+				                                 insert_chunk.data[insert_cols_idx[col_idx]], parse_chunk.size(),
+				                                 error_message);
+			} else {
+				// target type is not varchar: perform a cast
+				success = VectorOperations::DefaultTryCast(parse_chunk.data[col_idx],
+				                                           insert_chunk.data[insert_cols_idx[col_idx]],
+				                                           parse_chunk.size(), &error_message);
+			}
+			if (success) {
+				continue;
+			}
+			if (try_add_line) {
+				return false;
+			}
+			if (options.ignore_errors) {
+				conversion_error_ignored = true;
+				continue;
+			}
+			string col_name = to_string(col_idx);
+			if (col_idx < col_names.size()) {
+				col_name = "\"" + col_names[col_idx] + "\"";
+			}
+			// figure out the exact line number
+			idx_t row_idx;
+			for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
+				auto &inserted_column = insert_chunk.data[col_idx];
+				auto &parsed_column = parse_chunk.data[col_idx];
+				if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
+					break;
+				}
+			}
+			auto error_line = linenr - (parse_chunk.size() - row_idx) + 1;
+			if (options.auto_detect) {
+				throw InvalidInputException("%s in column %s, at line %llu. Parser "
+				                            "options: %s. Consider either increasing the sample size "
+				                            "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
+				                            "or skipping column conversion (ALL_VARCHAR=1)",
+				                            error_message, col_name, error_line, options.ToString());
+			} else {
+				throw InvalidInputException("%s at line %llu in column %s. Parser options: %s ", error_message,
+				                            error_line, col_name, options.ToString());
+			}
 		}
-		SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
-	} else {
-		throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
 	}
-}
+	if (conversion_error_ignored) {
+		D_ASSERT(options.ignore_errors);
+		SelectionVector succesful_rows;
+		succesful_rows.Initialize(parse_chunk.size());
+		idx_t sel_size = 0;
-bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value &value) {
-	// Make sure this function was only called after the option was turned into lowercase
-	D_ASSERT(!std::any_of(loption.begin(), loption.end(), ::isupper));
+		for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
+			bool failed = false;
+			for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
-	if (StringUtil::StartsWith(loption, "delim") || StringUtil::StartsWith(loption, "sep")) {
-		SetDelimiter(ParseString(value, loption));
-	} else if (loption == "quote") {
-		quote = ParseString(value, loption);
-		has_quote = true;
-	} else if (loption == "escape") {
-		escape = ParseString(value, loption);
-		has_escape = true;
-	} else if (loption == "header") {
-		header = ParseBoolean(value, loption);
-		has_header = true;
-	} else if (loption == "null" || loption == "nullstr") {
-		null_str = ParseString(value, loption);
-	} else if (loption == "encoding") {
-		auto encoding = StringUtil::Lower(ParseString(value, loption));
-		if (encoding != "utf8" && encoding != "utf-8") {
-			throw BinderException("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
+				auto &inserted_column = insert_chunk.data[column_idx];
+				auto &parsed_column = parse_chunk.data[column_idx];
+				bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
+				if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
+					failed = true;
+					break;
+				}
+			}
+			if (!failed) {
+				succesful_rows.set_index(sel_size++, row_idx);
+			}
 		}
-	} else if (loption == "compression") {
-		compression = FileCompressionTypeFromString(ParseString(value, loption));
-	} else {
-		// unrecognized option in base CSV
-		return false;
+		insert_chunk.Slice(succesful_rows, sel_size);
 	}
+	parse_chunk.Reset();
 	return true;
 }
+} // namespace duckdb
-std::string BufferedCSVReaderOptions::ToString() const {
-	return "DELIMITER='" + delimiter + (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
-	       ", QUOTE='" + quote + (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
-	       ", ESCAPE='" + escape + (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
-	       ", HEADER=" + std::to_string(header) +
-	       (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
-	       ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
-	       ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <fstream>
+namespace duckdb {
+BufferedCSVReader::BufferedCSVReader(FileSystem &fs_p, Allocator &allocator, FileOpener *opener_p,
+                                     BufferedCSVReaderOptions options_p, const vector<LogicalType> &requested_types)
+    : BaseCSVReader(fs_p, allocator, opener_p, move(options_p), requested_types), buffer_size(0), position(0),
+      start(0) {
+	file_handle = OpenCSV(options);
+	Initialize(requested_types);
 }
-static string GetLineNumberStr(idx_t linenr, bool linenr_estimated) {
-	string estimated = (linenr_estimated ? string(" (estimated)") : string(""));
-	return to_string(linenr + 1) + estimated;
+BufferedCSVReader::BufferedCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
+                                     const vector<LogicalType> &requested_types)
+    : BufferedCSVReader(FileSystem::GetFileSystem(context), Allocator::Get(context), FileSystem::GetFileOpener(context),
+                        move(options_p), requested_types) {
 }
+BufferedCSVReader::~BufferedCSVReader() {
+}
+enum class QuoteRule : uint8_t { QUOTES_RFC = 0, QUOTES_OTHER = 1, NO_QUOTES = 2 };
 static bool StartsWithNumericDate(string &separator, const string &value) {
 	auto begin = value.c_str();
 	auto end = begin + value.size();
@@ -79813,61 +79881,6 @@ TextSearchShiftArray::TextSearchShiftArray(string search_term) : length(search_t
 	}
 }
-BufferedCSVReader::BufferedCSVReader(FileSystem &fs_p, Allocator &allocator, FileOpener *opener_p,
-                                     BufferedCSVReaderOptions options_p, const vector<LogicalType> &requested_types)
-    : fs(fs_p), allocator(allocator), opener(opener_p), options(move(options_p)), buffer_size(0), position(0),
-      start(0) {
-	file_handle = OpenCSV(options);
-	Initialize(requested_types);
-}
-BufferedCSVReader::BufferedCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
-                                     const vector<LogicalType> &requested_types)
-    : BufferedCSVReader(FileSystem::GetFileSystem(context), Allocator::Get(context), FileSystem::GetFileOpener(context),
-                        move(options_p), requested_types) {
-}
-BufferedCSVReader::~BufferedCSVReader() {
-}
-idx_t BufferedCSVReader::GetFileSize() {
-	return file_handle ? file_handle->FileSize() : 0;
-}
-void BufferedCSVReader::Initialize(const vector<LogicalType> &requested_types) {
-	PrepareComplexParser();
-	if (options.auto_detect) {
-		sql_types = SniffCSV(requested_types);
-		if (sql_types.empty()) {
-			throw Exception("Failed to detect column types from CSV: is the file a valid CSV file?");
-		}
-		if (cached_chunks.empty()) {
-			JumpToBeginning(options.skip_rows, options.header);
-		}
-	} else {
-		sql_types = requested_types;
-		ResetBuffer();
-		SkipRowsAndReadHeader(options.skip_rows, options.header);
-	}
-	InitParseChunk(sql_types.size());
-	InitInsertChunkIdx(sql_types.size());
-	// we only need reset support during the automatic CSV type detection
-	// since reset support might require caching (in the case of streams), we disable it for the remainder
-	file_handle->DisableReset();
-}
-void BufferedCSVReader::PrepareComplexParser() {
-	delimiter_search = TextSearchShiftArray(options.delimiter);
-	escape_search = TextSearchShiftArray(options.escape);
-	quote_search = TextSearchShiftArray(options.quote);
-}
-unique_ptr<CSVFileHandle> BufferedCSVReader::OpenCSV(const BufferedCSVReaderOptions &options) {
-	auto file_handle = fs.OpenFile(options.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
-	                               options.compression, this->opener);
-	return make_unique<CSVFileHandle>(move(file_handle));
-}
 // Helper function to generate column names
 static string GenerateColumnName(const idx_t total_cols, const idx_t col_number, const string &prefix = "column") {
 	int max_digits = NumericHelper::UnsignedLength(total_cols - 1);
@@ -79957,6 +79970,28 @@ static string NormalizeColumnName(const string &col_name) {
 	return col_name_cleaned;
 }
+void BufferedCSVReader::Initialize(const vector<LogicalType> &requested_types) {
+	PrepareComplexParser();
+	if (options.auto_detect) {
+		sql_types = SniffCSV(requested_types);
+		if (sql_types.empty()) {
+			throw Exception("Failed to detect column types from CSV: is the file a valid CSV file?");
+		}
+		if (cached_chunks.empty()) {
+			JumpToBeginning(options.skip_rows, options.header);
+		}
+	} else {
+		sql_types = requested_types;
+		ResetBuffer();
+		SkipRowsAndReadHeader(options.skip_rows, options.header);
+	}
+	InitParseChunk(sql_types.size());
+	InitInsertChunkIdx(sql_types.size());
+	// we only need reset support during the automatic CSV type detection
+	// since reset support might require caching (in the case of streams), we disable it for the remainder
+	file_handle->DisableReset();
+}
 void BufferedCSVReader::ResetBuffer() {
 	buffer.reset();
 	buffer_size = 0;
@@ -79980,28 +80015,6 @@ void BufferedCSVReader::ResetStream() {
 	jumping_samples = false;
 }
-void BufferedCSVReader::InitParseChunk(idx_t num_cols) {
-	// adapt not null info
-	if (options.force_not_null.size() != num_cols) {
-		options.force_not_null.resize(num_cols, false);
-	}
-	if (num_cols == parse_chunk.ColumnCount()) {
-		parse_chunk.Reset();
-	} else {
-		parse_chunk.Destroy();
-		// initialize the parse_chunk with a set of VARCHAR types
-		vector<LogicalType> varchar_types(num_cols, LogicalType::VARCHAR);
-		parse_chunk.Initialize(allocator, varchar_types);
-	}
-}
-void BufferedCSVReader::InitInsertChunkIdx(idx_t num_cols) {
-	for (idx_t col = 0; col < num_cols; ++col) {
-		insert_cols_idx.push_back(col);
-	}
-}
 void BufferedCSVReader::JumpToBeginning(idx_t skip_rows = 0, bool skip_header = false) {
 	ResetBuffer();
 	ResetStream();
@@ -80026,6 +80039,12 @@ void BufferedCSVReader::SkipRowsAndReadHeader(idx_t skip_rows, bool skip_header)
 	}
 }
+void BufferedCSVReader::PrepareComplexParser() {
+	delimiter_search = TextSearchShiftArray(options.delimiter);
+	escape_search = TextSearchShiftArray(options.escape);
+	quote_search = TextSearchShiftArray(options.quote);
+}
 bool BufferedCSVReader::JumpToNextSample() {
 	// get bytes contained in the previously read chunk
 	idx_t remaining_bytes_in_buffer = buffer_size - start;
@@ -80099,91 +80118,6 @@ bool BufferedCSVReader::JumpToNextSample() {
 	return true;
 }
-void BufferedCSVReader::SetDateFormat(const string &format_specifier, const LogicalTypeId &sql_type) {
-	options.has_format[sql_type] = true;
-	auto &date_format = options.date_format[sql_type];
-	date_format.format_specifier = format_specifier;
-	StrTimeFormat::ParseFormatSpecifier(date_format.format_specifier, date_format);
-}
-bool BufferedCSVReader::TryCastValue(const Value &value, const LogicalType &sql_type) {
-	if (options.has_format[LogicalTypeId::DATE] && sql_type.id() == LogicalTypeId::DATE) {
-		date_t result;
-		string error_message;
-		return options.date_format[LogicalTypeId::DATE].TryParseDate(string_t(StringValue::Get(value)), result,
-		                                                             error_message);
-	} else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type.id() == LogicalTypeId::TIMESTAMP) {
-		timestamp_t result;
-		string error_message;
-		return options.date_format[LogicalTypeId::TIMESTAMP].TryParseTimestamp(string_t(StringValue::Get(value)),
-		                                                                       result, error_message);
-	} else {
-		Value new_value;
-		string error_message;
-		return value.DefaultTryCastAs(sql_type, new_value, &error_message, true);
-	}
-}
-struct TryCastDateOperator {
-	static bool Operation(BufferedCSVReaderOptions &options, string_t input, date_t &result, string &error_message) {
-		return options.date_format[LogicalTypeId::DATE].TryParseDate(input, result, error_message);
-	}
-};
-struct TryCastTimestampOperator {
-	static bool Operation(BufferedCSVReaderOptions &options, string_t input, timestamp_t &result,
-	                      string &error_message) {
-		return options.date_format[LogicalTypeId::TIMESTAMP].TryParseTimestamp(input, result, error_message);
-	}
-};
-template <class OP, class T>
-static bool TemplatedTryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector,
-                                       idx_t count, string &error_message) {
-	D_ASSERT(input_vector.GetType().id() == LogicalTypeId::VARCHAR);
-	bool all_converted = true;
-	UnaryExecutor::Execute<string_t, T>(input_vector, result_vector, count, [&](string_t input) {
-		T result;
-		if (!OP::Operation(options, input, result, error_message)) {
-			all_converted = false;
-		}
-		return result;
-	});
-	return all_converted;
-}
-bool TryCastDateVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count,
-                       string &error_message) {
-	return TemplatedTryCastDateVector<TryCastDateOperator, date_t>(options, input_vector, result_vector, count,
-	                                                               error_message);
-}
-bool TryCastTimestampVector(BufferedCSVReaderOptions &options, Vector &input_vector, Vector &result_vector, idx_t count,
-                            string &error_message) {
-	return TemplatedTryCastDateVector<TryCastTimestampOperator, timestamp_t>(options, input_vector, result_vector,
-	                                                                         count, error_message);
-}
-bool BufferedCSVReader::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) {
-	// try vector-cast from string to sql_type
-	Vector dummy_result(sql_type);
-	if (options.has_format[LogicalTypeId::DATE] && sql_type == LogicalTypeId::DATE) {
-		// use the date format to cast the chunk
-		string error_message;
-		return TryCastDateVector(options, parse_chunk_col, dummy_result, size, error_message);
-	} else if (options.has_format[LogicalTypeId::TIMESTAMP] && sql_type == LogicalTypeId::TIMESTAMP) {
-		// use the timestamp format to cast the chunk
-		string error_message;
-		return TryCastTimestampVector(options, parse_chunk_col, dummy_result, size, error_message);
-	} else {
-		// target type is not varchar: perform a cast
-		string error_message;
-		return VectorOperations::DefaultTryCast(parse_chunk_col, dummy_result, size, &error_message, true);
-	}
-}
-enum class QuoteRule : uint8_t { QUOTES_RFC = 0, QUOTES_OTHER = 1, NO_QUOTES = 2 };
 void BufferedCSVReader::DetectDialect(const vector<LogicalType> &requested_types,
                                       BufferedCSVReaderOptions &original_options,
                                       vector<BufferedCSVReaderOptions> &info_candidates, idx_t &best_num_cols) {
@@ -81181,267 +81115,926 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
 	}
 }
-void BufferedCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes) {
-	auto length = str_val.GetSize();
-	if (length == 0 && column == 0) {
-		row_empty = true;
-	} else {
-		row_empty = false;
+} // namespace duckdb
+namespace duckdb {
+CSVBuffer::CSVBuffer(idx_t buffer_size_p, CSVFileHandle &file_handle) : first_buffer(true) {
+	buffer = unique_ptr<char[]>(new char[buffer_size_p]);
+	actual_size = file_handle.Read(buffer.get(), buffer_size_p);
+	if (actual_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
+		start_position += 3;
 	}
+	last_buffer = file_handle.FinishedReading();
+}
-	if (!sql_types.empty() && column == sql_types.size() && length == 0) {
-		// skip a single trailing delimiter in last column
-		return;
+CSVBuffer::CSVBuffer(unique_ptr<char[]> buffer_p, idx_t buffer_size_p, idx_t actual_size_p, bool final_buffer)
+    : buffer(move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer) {
+}
+unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t set_buffer_size) {
+	if (file_handle.FinishedReading()) {
+		// this was the last buffer
+		return nullptr;
 	}
-	if (mode == ParserMode::SNIFFING_DIALECT) {
-		column++;
+	auto next_buffer = unique_ptr<char[]>(new char[set_buffer_size]);
+	idx_t next_buffer_actual_size = file_handle.Read(next_buffer.get(), set_buffer_size);
+	return make_unique<CSVBuffer>(move(next_buffer), set_buffer_size, next_buffer_actual_size,
+	                              file_handle.FinishedReading());
+}
+idx_t CSVBuffer::GetBufferSize() {
+	return actual_size;
+}
+idx_t CSVBuffer::GetStart() {
+	return start_position;
+}
+bool CSVBuffer::IsCSVFileLastBuffer() {
+	return last_buffer;
+}
+bool CSVBuffer::IsCSVFileFirstBuffer() {
+	return first_buffer;
+}
+} // namespace duckdb
+namespace duckdb {
+static bool ParseBoolean(const Value &value, const string &loption);
+static bool ParseBoolean(const vector<Value> &set, const string &loption) {
+	if (set.empty()) {
+		// no option specified: default to true
+		return true;
+	}
+	if (set.size() > 1) {
+		throw BinderException("\"%s\" expects a single argument as a boolean value (e.g. TRUE or 1)", loption);
+	}
+	return ParseBoolean(set[0], loption);
+}
+static bool ParseBoolean(const Value &value, const string &loption) {
+	if (value.type().id() == LogicalTypeId::LIST) {
+		auto &children = ListValue::GetChildren(value);
+		return ParseBoolean(children, loption);
+	}
+	if (value.type() == LogicalType::FLOAT || value.type() == LogicalType::DOUBLE ||
+	    value.type().id() == LogicalTypeId::DECIMAL) {
+		throw BinderException("\"%s\" expects a boolean value (e.g. TRUE or 1)", loption);
+	}
+	return BooleanValue::Get(value.DefaultCastAs(LogicalType::BOOLEAN));
+}
+static string ParseString(const Value &value, const string &loption) {
+	if (value.type().id() == LogicalTypeId::LIST) {
+		auto &children = ListValue::GetChildren(value);
+		if (children.size() != 1) {
+			throw BinderException("\"%s\" expects a single argument as a string value", loption);
+		}
+		return ParseString(children[0], loption);
+	}
+	if (value.type().id() != LogicalTypeId::VARCHAR) {
+		throw BinderException("\"%s\" expects a string argument!", loption);
+	}
+	return value.GetValue<string>();
+}
+static int64_t ParseInteger(const Value &value, const string &loption) {
+	if (value.type().id() == LogicalTypeId::LIST) {
+		auto &children = ListValue::GetChildren(value);
+		if (children.size() != 1) {
+			// no option specified or multiple options specified
+			throw BinderException("\"%s\" expects a single argument as an integer value", loption);
+		}
+		return ParseInteger(children[0], loption);
+	}
+	return value.GetValue<int64_t>();
+}
+static vector<bool> ParseColumnList(const vector<Value> &set, vector<string> &names, const string &loption) {
+	vector<bool> result;
+	if (set.empty()) {
+		throw BinderException("\"%s\" expects a column list or * as parameter", loption);
+	}
+	// list of options: parse the list
+	unordered_map<string, bool> option_map;
+	for (idx_t i = 0; i < set.size(); i++) {
+		option_map[set[i].ToString()] = false;
+	}
+	result.resize(names.size(), false);
+	for (idx_t i = 0; i < names.size(); i++) {
+		auto entry = option_map.find(names[i]);
+		if (entry != option_map.end()) {
+			result[i] = true;
+			entry->second = true;
+		}
+	}
+	for (auto &entry : option_map) {
+		if (!entry.second) {
+			throw BinderException("\"%s\" expected to find %s, but it was not found in the table", loption,
+			                      entry.first.c_str());
+		}
+	}
+	return result;
+}
+static vector<bool> ParseColumnList(const Value &value, vector<string> &names, const string &loption) {
+	vector<bool> result;
+	// Only accept a list of arguments
+	if (value.type().id() != LogicalTypeId::LIST) {
+		// Support a single argument if it's '*'
+		if (value.type().id() == LogicalTypeId::VARCHAR && value.GetValue<string>() == "*") {
+			result.resize(names.size(), true);
+			return result;
+		}
+		throw BinderException("\"%s\" expects a column list or * as parameter", loption);
+	}
+	auto &children = ListValue::GetChildren(value);
+	// accept '*' as single argument
+	if (children.size() == 1 && children[0].type().id() == LogicalTypeId::VARCHAR &&
+	    children[0].GetValue<string>() == "*") {
+		result.resize(names.size(), true);
+		return result;
+	}
+	return ParseColumnList(children, names, loption);
+}
+void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
+	this->delimiter = StringUtil::Replace(input, "\\t", "\t");
+	this->has_delimiter = true;
+	if (input.empty()) {
+		this->delimiter = string("\0", 1);
+	}
+}
+void BufferedCSVReaderOptions::SetDateFormat(LogicalTypeId type, const string &format, bool read_format) {
+	string error;
+	if (read_format) {
+		auto &date_format = this->date_format[type];
+		error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
+		date_format.format_specifier = format;
+	} else {
+		auto &date_format = this->write_date_format[type];
+		error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
+	}
+	if (!error.empty()) {
+		throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
+	}
+	has_format[type] = true;
+}
+void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value &value,
+                                             vector<string> &expected_names) {
+	if (SetBaseOption(loption, value)) {
 		return;
 	}
-	if (column >= sql_types.size()) {
-		if (options.ignore_errors) {
-			error_column_overflow = true;
-			return;
+	if (loption == "auto_detect") {
+		auto_detect = ParseBoolean(value, loption);
+	} else if (loption == "sample_size") {
+		int64_t sample_size = ParseInteger(value, loption);
+		if (sample_size < 1 && sample_size != -1) {
+			throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1");
+		}
+		if (sample_size == -1) {
+			sample_chunks = std::numeric_limits<uint64_t>::max();
+			sample_chunk_size = STANDARD_VECTOR_SIZE;
+		} else if (sample_size <= STANDARD_VECTOR_SIZE) {
+			sample_chunk_size = sample_size;
+			sample_chunks = 1;
 		} else {
-			throw InvalidInputException(
-			    "Error in file \"%s\", on line %s: expected %lld values per row, but got more. (%s)", options.file_path,
-			    GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), options.ToString());
+			sample_chunk_size = STANDARD_VECTOR_SIZE;
+			sample_chunks = sample_size / STANDARD_VECTOR_SIZE;
+		}
+	} else if (loption == "skip") {
+		skip_rows = ParseInteger(value, loption);
+	} else if (loption == "max_line_size" || loption == "maximum_line_size") {
+		maximum_line_size = ParseInteger(value, loption);
+	} else if (loption == "sample_chunk_size") {
+		sample_chunk_size = ParseInteger(value, loption);
+		if (sample_chunk_size > STANDARD_VECTOR_SIZE) {
+			throw BinderException(
+			    "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d",
+			    STANDARD_VECTOR_SIZE);
+		} else if (sample_chunk_size < 1) {
+			throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1");
+		}
+	} else if (loption == "sample_chunks") {
+		sample_chunks = ParseInteger(value, loption);
+		if (sample_chunks < 1) {
+			throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1");
 		}
+	} else if (loption == "force_not_null") {
+		force_not_null = ParseColumnList(value, expected_names, loption);
+	} else if (loption == "date_format" || loption == "dateformat") {
+		string format = ParseString(value, loption);
+		SetDateFormat(LogicalTypeId::DATE, format, true);
+	} else if (loption == "timestamp_format" || loption == "timestampformat") {
+		string format = ParseString(value, loption);
+		SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
+	} else if (loption == "escape") {
+		escape = ParseString(value, loption);
+		has_escape = true;
+	} else if (loption == "ignore_errors") {
+		ignore_errors = ParseBoolean(value, loption);
+	} else if (loption == "union_by_name") {
+		union_by_name = ParseBoolean(value, loption);
+	} else {
+		throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
 	}
+}
-	// insert the line number into the chunk
-	idx_t row_entry = parse_chunk.size();
+void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
+	if (SetBaseOption(loption, value)) {
+		return;
+	}
-	// test against null string, but only if the value was not quoted
-	if ((!has_quotes || sql_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
-	    Equals::Operation(str_val, string_t(options.null_str))) {
-		FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
+	if (loption == "force_quote") {
+		force_quote = ParseColumnList(value, names, loption);
+	} else if (loption == "date_format" || loption == "dateformat") {
+		string format = ParseString(value, loption);
+		SetDateFormat(LogicalTypeId::DATE, format, false);
+	} else if (loption == "timestamp_format" || loption == "timestampformat") {
+		string format = ParseString(value, loption);
+		if (StringUtil::Lower(format) == "iso") {
+			format = "%Y-%m-%dT%H:%M:%S.%fZ";
+		}
+		SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
 	} else {
-		auto &v = parse_chunk.data[column];
-		auto parse_data = FlatVector::GetData<string_t>(v);
-		if (!escape_positions.empty()) {
-			// remove escape characters (if any)
-			string old_val = str_val.GetString();
-			string new_val = "";
-			idx_t prev_pos = 0;
-			for (idx_t i = 0; i < escape_positions.size(); i++) {
-				idx_t next_pos = escape_positions[i];
-				new_val += old_val.substr(prev_pos, next_pos - prev_pos);
+		throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
+	}
+}
-				if (options.escape.empty() || options.escape == options.quote) {
-					prev_pos = next_pos + options.quote.size();
-				} else {
-					prev_pos = next_pos + options.escape.size();
-				}
-			}
-			new_val += old_val.substr(prev_pos, old_val.size() - prev_pos);
-			escape_positions.clear();
-			parse_data[row_entry] = StringVector::AddStringOrBlob(v, string_t(new_val));
-		} else {
-			parse_data[row_entry] = str_val;
+bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value &value) {
+	// Make sure this function was only called after the option was turned into lowercase
+	D_ASSERT(!std::any_of(loption.begin(), loption.end(), ::isupper));
+	if (StringUtil::StartsWith(loption, "delim") || StringUtil::StartsWith(loption, "sep")) {
+		SetDelimiter(ParseString(value, loption));
+	} else if (loption == "quote") {
+		quote = ParseString(value, loption);
+		has_quote = true;
+	} else if (loption == "escape") {
+		escape = ParseString(value, loption);
+		has_escape = true;
+	} else if (loption == "header") {
+		header = ParseBoolean(value, loption);
+		has_header = true;
+	} else if (loption == "null" || loption == "nullstr") {
+		null_str = ParseString(value, loption);
+	} else if (loption == "encoding") {
+		auto encoding = StringUtil::Lower(ParseString(value, loption));
+		if (encoding != "utf8" && encoding != "utf-8") {
+			throw BinderException("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
 		}
+	} else if (loption == "compression") {
+		compression = FileCompressionTypeFromString(ParseString(value, loption));
+	} else {
+		// unrecognized option in base CSV
+		return false;
 	}
+	return true;
+}
-	// move to the next column
-	column++;
+std::string BufferedCSVReaderOptions::ToString() const {
+	return "DELIMITER='" + delimiter + (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
+	       ", QUOTE='" + quote + (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
+	       ", ESCAPE='" + escape + (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
+	       ", HEADER=" + std::to_string(header) +
+	       (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
+	       ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
+	       ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
 }
-bool BufferedCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column) {
-	linenr++;
+} // namespace duckdb
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/execution/operator/persistent/buffered_csv_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
-	if (row_empty) {
-		row_empty = false;
-		if (sql_types.size() != 1) {
-			if (mode == ParserMode::PARSING) {
-				FlatVector::SetNull(parse_chunk.data[0], parse_chunk.size(), false);
+#include <sstream>
+#include <utility>
+namespace duckdb {
+struct CSVBufferRead {
+	CSVBufferRead(shared_ptr<CSVBuffer> buffer_p, idx_t buffer_start_p, idx_t buffer_end_p, idx_t batch_index,
+	              idx_t estimated_linenr)
+	    : buffer(move(buffer_p)), buffer_start(buffer_start_p), buffer_end(buffer_end_p), batch_index(batch_index),
+	      estimated_linenr(estimated_linenr) {
+		if (buffer) {
+			if (buffer_end > buffer->GetBufferSize()) {
+				buffer_end = buffer->GetBufferSize();
 			}
-			column = 0;
-			return false;
+		} else {
+			buffer_start = 0;
+			buffer_end = 0;
 		}
 	}
-	// Error forwarded by 'ignore_errors' - originally encountered in 'AddValue'
-	if (error_column_overflow) {
-		D_ASSERT(options.ignore_errors);
-		error_column_overflow = false;
-		column = 0;
-		return false;
+	CSVBufferRead(shared_ptr<CSVBuffer> buffer_p, shared_ptr<CSVBuffer> nxt_buffer_p, idx_t buffer_start_p,
+	              idx_t buffer_end_p, idx_t batch_index, idx_t estimated_linenr)
+	    : CSVBufferRead(std::move(buffer_p), buffer_start_p, buffer_end_p, batch_index, estimated_linenr) {
+		next_buffer = std::move(nxt_buffer_p);
 	}
-	if (column < sql_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
-		if (options.ignore_errors) {
-			column = 0;
-			return false;
+	CSVBufferRead() : buffer_start(0), buffer_end(NumericLimits<idx_t>::Maximum()) {};
+	const char &operator[](size_t i) const {
+		if (i < buffer->GetBufferSize()) {
+			return buffer->buffer[i];
+		}
+		return next_buffer->buffer[i - buffer->GetBufferSize()];
+	}
+	string_t GetValue(idx_t start_buffer, idx_t position_buffer, idx_t offset) {
+		idx_t length = position_buffer - start_buffer - offset;
+		// 1) It's all in the current buffer
+		if (start_buffer + length <= buffer->GetBufferSize()) {
+			auto buffer_ptr = buffer->buffer.get();
+			return string_t(buffer_ptr + start_buffer, length);
+		} else if (start_buffer >= buffer->GetBufferSize()) {
+			// 2) It's all in the next buffer
+			D_ASSERT(next_buffer);
+			D_ASSERT(next_buffer->GetBufferSize() >= length + (start_buffer - buffer->GetBufferSize()));
+			auto buffer_ptr = next_buffer->buffer.get();
+			return string_t(buffer_ptr + (start_buffer - buffer->GetBufferSize()), length);
 		} else {
-			throw InvalidInputException(
-			    "Error in file \"%s\" on line %s: expected %lld values per row, but got %d. (%s)", options.file_path,
-			    GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column, options.ToString());
+			// 3) It starts in the current buffer and ends in the next buffer
+			D_ASSERT(next_buffer);
+			auto intersection = unique_ptr<char[]>(new char[length]);
+			idx_t cur_pos = 0;
+			for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
+				intersection[cur_pos++] = buffer->buffer[i];
+			}
+			idx_t nxt_buffer_pos = 0;
+			for (; cur_pos < length; cur_pos++) {
+				intersection[cur_pos] = next_buffer->buffer[nxt_buffer_pos++];
+			}
+			intersections.emplace_back(move(intersection));
+			return string_t(intersections.back().get(), length);
 		}
 	}
-	if (mode == ParserMode::SNIFFING_DIALECT) {
-		sniffed_column_counts.push_back(column);
+	shared_ptr<CSVBuffer> buffer;
+	shared_ptr<CSVBuffer> next_buffer;
+	vector<unique_ptr<char[]>> intersections;
-		if (sniffed_column_counts.size() == options.sample_chunk_size) {
-			return true;
-		}
-	} else {
-		parse_chunk.SetCardinality(parse_chunk.size() + 1);
+	idx_t buffer_start;
+	idx_t buffer_end;
+	idx_t batch_index;
+	idx_t estimated_linenr;
+};
+//! Buffered CSV reader is a class that reads values from a stream and parses them as a CSV file
+class ParallelCSVReader : public BaseCSVReader {
+public:
+	ParallelCSVReader(ClientContext &context, BufferedCSVReaderOptions options, unique_ptr<CSVBufferRead> buffer,
+	                  const vector<LogicalType> &requested_types);
+	~ParallelCSVReader();
+	//! Current Position (Relative to the Buffer)
+	idx_t position_buffer = 0;
+	//! Start of the piece of the buffer this thread should read
+	idx_t start_buffer = 0;
+	//! End of the piece of this buffer this thread should read
+	idx_t end_buffer = NumericLimits<idx_t>::Maximum();
+	//! The actual buffer size
+	idx_t buffer_size = 0;
+	//! If this flag is set, it means we are about to try to read our last row.
+	bool reached_remainder_state = false;
+	unique_ptr<CSVBufferRead> buffer;
+public:
+	void SetBufferRead(unique_ptr<CSVBufferRead> buffer);
+	//! Extract a single DataChunk from the CSV file and stores it in insert_chunk
+	void ParseCSV(DataChunk &insert_chunk);
+private:
+	//! Initialize Parser
+	void Initialize(const vector<LogicalType> &requested_types);
+	//! Try to parse a single datachunk from the file. Throws an exception if anything goes wrong.
+	void ParseCSV(ParserMode mode);
+	//! Try to parse a single datachunk from the file. Returns whether or not the parsing is successful
+	bool TryParseCSV(ParserMode mode);
+	//! Extract a single DataChunk from the CSV file and stores it in insert_chunk
+	bool TryParseCSV(ParserMode mode, DataChunk &insert_chunk, string &error_message);
+	//! Sets Position depending on the byte_start of this thread
+	bool SetPosition(DataChunk &insert_chunk);
+	//! When a buffer finishes reading its piece, it still can try to scan up to the real end of the buffer
+	//! Up to finding a new line. This function sets the buffer_end and marks a boolean variable
+	//! when changing the buffer end the first time.
+	//! It returns FALSE if the parser should jump to the final state of parsing or not
+	bool BufferRemainder();
+	//! Parses a CSV file with a one-byte delimiter, escape and quote character
+	bool TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message, bool try_add_line = false);
+};
+} // namespace duckdb
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/function/table/read_csv.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+namespace duckdb {
+class ReadCSV {
+public:
+	static unique_ptr<CSVFileHandle> OpenCSV(const BufferedCSVReaderOptions &options, ClientContext &context);
+};
+struct BaseCSVData : public TableFunctionData {
+	virtual ~BaseCSVData() {
 	}
+	//! The file path of the CSV file to read or write
+	vector<string> files;
+	//! The CSV reader options
+	BufferedCSVReaderOptions options;
+	//! Offsets for generated columns
+	idx_t filename_col_idx;
+	idx_t hive_partition_col_idx;
-	if (mode == ParserMode::PARSING_HEADER) {
-		return true;
+	void Finalize();
+};
+struct WriteCSVData : public BaseCSVData {
+	WriteCSVData(string file_path, vector<LogicalType> sql_types, vector<string> names) : sql_types(move(sql_types)) {
+		files.push_back(move(file_path));
+		options.names = move(names);
 	}
-	if (mode == ParserMode::SNIFFING_DATATYPES && parse_chunk.size() == options.sample_chunk_size) {
-		return true;
+	//! The SQL types to write
+	vector<LogicalType> sql_types;
+	//! The newline string to write
+	string newline = "\n";
+	//! Whether or not we are writing a simple CSV (delimiter, quote and escape are all 1 byte in length)
+	bool is_simple;
+	//! The size of the CSV file (in bytes) that we buffer before we flush it to disk
+	idx_t flush_size = 4096 * 8;
+};
+struct ReadCSVData : public BaseCSVData {
+	//! The expected SQL types to read
+	vector<LogicalType> sql_types;
+	//! The initial reader (if any): this is used when automatic detection is used during binding.
+	//! In this case, the CSV reader is already created and might as well be re-used.
+	unique_ptr<BufferedCSVReader> initial_reader;
+	//! The union readers are created (when csv union_by_name option is on) during binding
+	//! Those readers can be re-used during ReadCSVFunction
+	vector<unique_ptr<BufferedCSVReader>> union_readers;
+	//! Whether or not the single-threaded reader should be used
+	bool single_threaded = false;
+	void InitializeFiles(ClientContext &context, const vector<string> &patterns);
+	void FinalizeRead(ClientContext &context);
+};
+struct CSVCopyFunction {
+	static void RegisterFunction(BuiltinFunctions &set);
+};
+struct ReadCSVTableFunction {
+	static TableFunction GetFunction(bool list_parameter = false);
+	static TableFunction GetAutoFunction(bool list_parameter = false);
+	static void RegisterFunction(BuiltinFunctions &set);
+};
+} // namespace duckdb
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <fstream>
+#include <utility>
+namespace duckdb {
+ParallelCSVReader::ParallelCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
+                                     unique_ptr<CSVBufferRead> buffer_p, const vector<LogicalType> &requested_types)
+    : BaseCSVReader(context, move(options_p), requested_types) {
+	Initialize(requested_types);
+	SetBufferRead(move(buffer_p));
+	if (options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1) {
+		throw InternalException("Parallel CSV reader cannot handle CSVs with multi-byte delimiters/escapes/quotes");
 	}
+}
-	if (mode == ParserMode::PARSING && parse_chunk.size() == STANDARD_VECTOR_SIZE) {
-		Flush(insert_chunk);
+ParallelCSVReader::~ParallelCSVReader() {
+}
+void ParallelCSVReader::Initialize(const vector<LogicalType> &requested_types) {
+	sql_types = requested_types;
+	InitParseChunk(sql_types.size());
+	InitInsertChunkIdx(sql_types.size());
+}
+bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
+	if (buffer->buffer->IsCSVFileFirstBuffer() && start_buffer == position_buffer &&
+	    start_buffer == buffer->buffer->GetStart()) {
+		// First buffer doesn't need any setting
 		return true;
 	}
-	column = 0;
-	return false;
+	// We have to move position up to next new line
+	idx_t end_buffer_real = end_buffer;
+	// Check if we already start in a valid line
+	string error_message;
+	bool successfully_read_first_line = false;
+	while (!successfully_read_first_line) {
+		DataChunk first_line_chunk;
+		first_line_chunk.Initialize(allocator, insert_chunk.GetTypes());
+		for (; position_buffer < end_buffer; position_buffer++) {
+			if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
+				position_buffer++;
+				break;
+			}
+		}
+		D_ASSERT(position_buffer <= end_buffer);
+		if (position_buffer == end_buffer && !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
+			break;
+		}
+		idx_t position_set = position_buffer;
+		start_buffer = position_buffer;
+		// We check if we can add this line
+		successfully_read_first_line = TryParseSimpleCSV(first_line_chunk, error_message, true);
+		start_buffer = position_set;
+		end_buffer = end_buffer_real;
+		position_buffer = position_set;
+		if (end_buffer == position_buffer) {
+			break;
+		}
+	}
+	return successfully_read_first_line;
 }
-void BufferedCSVReader::SetNullUnionCols(DataChunk &insert_chunk) {
-	for (idx_t col = 0; col < insert_nulls_idx.size(); ++col) {
-		insert_chunk.data[insert_nulls_idx[col]].SetVectorType(VectorType::CONSTANT_VECTOR);
-		ConstantVector::SetNull(insert_chunk.data[insert_nulls_idx[col]], true);
+void ParallelCSVReader::SetBufferRead(unique_ptr<CSVBufferRead> buffer_read_p) {
+	if (!buffer_read_p->buffer) {
+		throw InternalException("ParallelCSVReader::SetBufferRead - CSVBufferRead does not have a buffer to read");
 	}
+	position_buffer = buffer_read_p->buffer_start;
+	start_buffer = buffer_read_p->buffer_start;
+	end_buffer = buffer_read_p->buffer_end;
+	if (buffer_read_p->next_buffer) {
+		buffer_size = buffer_read_p->buffer->GetBufferSize() + buffer_read_p->next_buffer->GetBufferSize();
+	} else {
+		buffer_size = buffer_read_p->buffer->GetBufferSize();
+	}
+	linenr = buffer_read_p->estimated_linenr;
+	buffer = move(buffer_read_p);
+	linenr_estimated = true;
+	reached_remainder_state = false;
+	D_ASSERT(end_buffer <= buffer_size);
 }
-void BufferedCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, int64_t offset) {
-	D_ASSERT(col_idx < chunk.data.size());
-	D_ASSERT(row_idx < chunk.size());
-	auto &v = chunk.data[col_idx];
-	if (FlatVector::IsNull(v, row_idx)) {
-		return;
+// If BufferRemainder returns false, it means we are done scanning this buffer and should go to the end_state
+bool ParallelCSVReader::BufferRemainder() {
+	if (position_buffer >= end_buffer && !reached_remainder_state) {
+		// First time we finish the buffer piece we should scan here, we set the variables
+		// to allow this piece to be scanned up to the end of the buffer or the next new line
+		reached_remainder_state = true;
+		// end_buffer is allowed to go to buffer size to finish its last line
+		end_buffer = buffer_size;
+	}
+	if (position_buffer >= end_buffer) {
+		// buffer ends, return false
+		return false;
 	}
+	// we can still scan stuff, return true
+	return true;
+}
-	auto parse_data = FlatVector::GetData<string_t>(chunk.data[col_idx]);
-	auto s = parse_data[row_idx];
-	auto utf_type = Utf8Proc::Analyze(s.GetDataUnsafe(), s.GetSize());
-	if (utf_type == UnicodeType::INVALID) {
-		string col_name = to_string(col_idx);
-		if (col_idx < col_names.size()) {
-			col_name = "\"" + col_names[col_idx] + "\"";
+bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message, bool try_add_line) {
+	// used for parsing algorithm
+	D_ASSERT(end_buffer <= buffer_size);
+	bool finished_chunk = false;
+	idx_t column = 0;
+	idx_t offset = 0;
+	bool has_quotes = false;
+	vector<idx_t> escape_positions;
+	if (start_buffer == buffer->buffer_start && !try_add_line) {
+		// First time reading this buffer piece
+		if (!SetPosition(insert_chunk)) {
+			// This means the buffer size does not contain a new line
+			return true;
 		}
-		int64_t error_line = linenr - (chunk.size() - row_idx) + 1 + offset;
-		D_ASSERT(error_line >= 0);
-		throw InvalidInputException("Error in file \"%s\" at line %llu in column \"%s\": "
-		                            "%s. Parser options: %s",
-		                            options.file_path, error_line, col_name,
-		                            ErrorManager::InvalidUnicodeError(s.GetString(), "CSV file"), options.ToString());
 	}
-}
-void BufferedCSVReader::VerifyUTF8(idx_t col_idx) {
-	D_ASSERT(col_idx < parse_chunk.data.size());
-	for (idx_t i = 0; i < parse_chunk.size(); i++) {
-		VerifyUTF8(col_idx, i, parse_chunk);
+	// start parsing the first value
+	goto value_start;
+value_start : {
+	/* state: value_start */
+	if (!BufferRemainder()) {
+		goto final_state;
 	}
-}
+	offset = 0;
-void BufferedCSVReader::Flush(DataChunk &insert_chunk) {
-	if (parse_chunk.size() == 0) {
-		return;
+	// this state parses the first character of a value
+	if ((*buffer)[position_buffer] == options.quote[0]) {
+		// quote: actual value starts in the next position
+		// move to in_quotes state
+		start_buffer = position_buffer + 1;
+		goto in_quotes;
+	} else {
+		// no quote, move to normal parsing state
+		start_buffer = position_buffer;
+		goto normal;
 	}
+};
-	bool conversion_error_ignored = false;
+normal : {
+	/* state: normal parsing state */
+	// this state parses the remainder of a non-quoted value until we reach a delimiter or newline
+	for (; position_buffer < end_buffer; position_buffer++) {
+		if ((*buffer)[position_buffer] == options.delimiter[0]) {
+			// delimiter: end the value and add it to the chunk
+			goto add_value;
+		} else if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
+			// newline: add row
+			D_ASSERT(try_add_line || column == insert_chunk.ColumnCount() - 1);
+			goto add_row;
+		}
+	}
+	if (!BufferRemainder()) {
+		goto final_state;
+	} else {
+		goto normal;
+	}
+};
-	// convert the columns in the parsed chunk to the types of the table
-	insert_chunk.SetCardinality(parse_chunk);
-	for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
-		if (sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
-			// target type is varchar: no need to convert
-			// just test that all strings are valid utf-8 strings
-			VerifyUTF8(col_idx);
-			insert_chunk.data[insert_cols_idx[col_idx]].Reference(parse_chunk.data[col_idx]);
-		} else {
-			string error_message;
-			bool success;
-			if (options.has_format[LogicalTypeId::DATE] && sql_types[col_idx].id() == LogicalTypeId::DATE) {
-				// use the date format to cast the chunk
-				success =
-				    TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_cols_idx[col_idx]],
-				                      parse_chunk.size(), error_message);
-			} else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
-			           sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
-				// use the date format to cast the chunk
-				success = TryCastTimestampVector(options, parse_chunk.data[col_idx],
-				                                 insert_chunk.data[insert_cols_idx[col_idx]], parse_chunk.size(),
-				                                 error_message);
-			} else {
-				// target type is not varchar: perform a cast
-				success = VectorOperations::DefaultTryCast(parse_chunk.data[col_idx],
-				                                           insert_chunk.data[insert_cols_idx[col_idx]],
-				                                           parse_chunk.size(), &error_message);
-			}
-			if (success) {
-				continue;
-			}
-			if (options.ignore_errors) {
-				conversion_error_ignored = true;
-				continue;
-			}
-			string col_name = to_string(col_idx);
-			if (col_idx < col_names.size()) {
-				col_name = "\"" + col_names[col_idx] + "\"";
-			}
+add_value : {
+	/* state: Add value to string vector */
+	AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes);
+	// increase position by 1 and move start to the new position
+	offset = 0;
+	has_quotes = false;
+	start_buffer = ++position_buffer;
+	if (!BufferRemainder()) {
+		goto final_state;
+	}
+	goto value_start;
+};
-			// figure out the exact line number
-			idx_t row_idx;
-			for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
-				auto &inserted_column = insert_chunk.data[col_idx];
-				auto &parsed_column = parse_chunk.data[col_idx];
+add_row : {
+	/* state: Add Row to Parse chunk */
+	// check type of newline (\r or \n)
+	bool carriage_return = (*buffer)[position_buffer] == '\r';
-				if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
-					break;
-				}
+	AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes);
+	if (try_add_line) {
+		bool success = column == insert_chunk.ColumnCount();
+		if (success) {
+			AddRow(insert_chunk, column);
+			success = Flush(insert_chunk);
+		}
+		reached_remainder_state = false;
+		parse_chunk.Reset();
+		return success;
+	} else {
+		finished_chunk = AddRow(insert_chunk, column);
+	}
+	// increase position by 1 and move start to the new position
+	offset = 0;
+	has_quotes = false;
+	start_buffer = ++position_buffer;
+	if (reached_remainder_state || finished_chunk) {
+		goto final_state;
+	}
+	if (!BufferRemainder()) {
+		goto final_state;
+	}
+	if (carriage_return) {
+		// \r newline, go to special state that parses an optional \n afterwards
+		goto carriage_return;
+	} else {
+		// \n newline, move to value start
+		if (finished_chunk) {
+			goto final_state;
+		}
+		goto value_start;
+	}
+}
+in_quotes:
+	/* state: in_quotes this state parses the remainder of a quoted value*/
+	has_quotes = true;
+	position_buffer++;
+	for (; position_buffer < end_buffer; position_buffer++) {
+		if ((*buffer)[position_buffer] == options.quote[0]) {
+			// quote: move to unquoted state
+			goto unquote;
+		} else if ((*buffer)[position_buffer] == options.escape[0]) {
+			// escape: store the escaped position and move to handle_escape state
+			escape_positions.push_back(position_buffer - start_buffer);
+			goto handle_escape;
+		}
+	}
+	if (!BufferRemainder()) {
+		if (buffer->buffer->IsCSVFileLastBuffer()) {
+			if (try_add_line) {
+				return false;
 			}
-			auto error_line = linenr - (parse_chunk.size() - row_idx) + 1;
+			// still in quoted state at the end of the file or at the end of a buffer when running multithreaded, error:
+			throw InvalidInputException("Error in file \"%s\" on line %s: unterminated quotes. (%s)", options.file_path,
+			                            GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+		} else {
+			goto final_state;
+		}
+	} else {
+		position_buffer--;
+		goto in_quotes;
+	}
-			if (options.auto_detect) {
-				throw InvalidInputException("%s in column %s, at line %llu. Parser "
-				                            "options: %s. Consider either increasing the sample size "
-				                            "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
-				                            "or skipping column conversion (ALL_VARCHAR=1)",
-				                            error_message, col_name, error_line, options.ToString());
+unquote:
+	/* state: unquote: this state handles the state directly after we unquote*/
+	//
+	// in this state we expect either another quote (entering the quoted state again, and escaping the quote)
+	// or a delimiter/newline, ending the current value and moving on to the next value
+	position_buffer++;
+	if (!BufferRemainder()) {
+		offset = 1;
+		goto final_state;
+	}
+	if ((*buffer)[position_buffer] == options.quote[0] &&
+	    (options.escape.empty() || options.escape[0] == options.quote[0])) {
+		// escaped quote, return to quoted state and store escape position
+		escape_positions.push_back(position_buffer - start_buffer);
+		goto in_quotes;
+	} else if ((*buffer)[position_buffer] == options.delimiter[0]) {
+		// delimiter, add value
+		offset = 1;
+		goto add_value;
+	} else if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
+		offset = 1;
+		D_ASSERT(column == insert_chunk.ColumnCount() - 1);
+		goto add_row;
+	} else if (position_buffer >= end_buffer) {
+		// reached end of buffer
+		offset = 1;
+		goto final_state;
+	} else {
+		error_message = StringUtil::Format(
+		    "Error in file \"%s\" on line %s: quote should be followed by end of value, end of "
+		    "row or another quote. (%s). ",
+		    options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+		return false;
+	}
+handle_escape : {
+	/* state: handle_escape */
+	// escape should be followed by a quote or another escape character
+	position_buffer++;
+	if (!BufferRemainder()) {
+		goto final_state;
+	}
+	if (position_buffer >= buffer_size && buffer->buffer->IsCSVFileLastBuffer()) {
+		error_message = StringUtil::Format(
+		    "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path,
+		    GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+		return false;
+	}
+	if ((*buffer)[position_buffer] != options.quote[0] && (*buffer)[position_buffer] != options.escape[0]) {
+		error_message = StringUtil::Format(
+		    "Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path,
+		    GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
+		return false;
+	}
+	// escape was followed by quote or escape, go back to quoted state
+	goto in_quotes;
+}
+carriage_return : {
+	/* state: carriage_return */
+	// this stage optionally skips a newline (\n) character, which allows \r\n to be interpreted as a single line
+	if ((*buffer)[position_buffer] == '\n') {
+		// newline after carriage return: skip
+		// increase position by 1 and move start to the new position
+		start_buffer = ++position_buffer;
+		if (position_buffer >= buffer_size) {
+			// file ends right after delimiter, go to final state
+			goto final_state;
+		}
+	}
+	goto value_start;
+}
+final_state : {
+	/* state: final_stage reached after we finished reading the end_buffer of the csv buffer */
+	// reset end buffer
+	end_buffer = buffer->buffer_end;
+	if (finished_chunk) {
+		return true;
+	}
+	// If this is the last buffer, we have to read the last value
+	if (buffer->buffer->IsCSVFileLastBuffer() || (buffer->next_buffer->IsCSVFileLastBuffer())) {
+		if (column > 0 || position_buffer > start_buffer) {
+			// remaining values to be added to the chunk
+			D_ASSERT(column == insert_chunk.ColumnCount() - 1);
+			AddValue(buffer->GetValue(start_buffer, position_buffer, offset), column, escape_positions, has_quotes);
+			if (try_add_line) {
+				bool success = column == sql_types.size();
+				if (success) {
+					AddRow(insert_chunk, column);
+					success = Flush(insert_chunk);
+				}
+				parse_chunk.Reset();
+				reached_remainder_state = false;
+				return success;
 			} else {
-				throw InvalidInputException("%s at line %llu in column %s. Parser options: %s ", error_message,
-				                            error_line, col_name, options.ToString());
+				AddRow(insert_chunk, column);
 			}
 		}
 	}
-	if (conversion_error_ignored) {
-		D_ASSERT(options.ignore_errors);
-		SelectionVector succesful_rows;
-		succesful_rows.Initialize(parse_chunk.size());
-		idx_t sel_size = 0;
+	// flush the parsed chunk and finalize parsing
+	if (mode == ParserMode::PARSING) {
+		Flush(insert_chunk);
+	}
+	return true;
+};
+}
-		for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
-			bool failed = false;
-			for (idx_t column_idx = 0; column_idx < sql_types.size(); column_idx++) {
+void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
+	string error_message;
+	if (!TryParseCSV(ParserMode::PARSING, insert_chunk, error_message)) {
+		throw InvalidInputException(error_message);
+	}
+}
-				auto &inserted_column = insert_chunk.data[column_idx];
-				auto &parsed_column = parse_chunk.data[column_idx];
+bool ParallelCSVReader::TryParseCSV(ParserMode mode) {
+	DataChunk dummy_chunk;
+	string error_message;
+	return TryParseCSV(mode, dummy_chunk, error_message);
+}
-				bool was_already_null = FlatVector::IsNull(parsed_column, row_idx);
-				if (!was_already_null && FlatVector::IsNull(inserted_column, row_idx)) {
-					failed = true;
-					break;
-				}
-			}
-			if (!failed) {
-				succesful_rows.set_index(sel_size++, row_idx);
-			}
-		}
-		insert_chunk.Slice(succesful_rows, sel_size);
+void ParallelCSVReader::ParseCSV(ParserMode mode) {
+	DataChunk dummy_chunk;
+	string error_message;
+	if (!TryParseCSV(mode, dummy_chunk, error_message)) {
+		throw InvalidInputException(error_message);
 	}
-	parse_chunk.Reset();
 }
+bool ParallelCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_chunk, string &error_message) {
+	mode = parser_mode;
+	return TryParseSimpleCSV(insert_chunk, error_message);
+}
 } // namespace duckdb
 //===----------------------------------------------------------------------===//
 //                         DuckDB
@@ -120938,6 +121531,28 @@ void StripAccentsFun::RegisterFunction(BuiltinFunctions &set) {
 namespace duckdb {
+static const int64_t SUPPORTED_UPPER_BOUND = NumericLimits<uint32_t>::Maximum();
+static const int64_t SUPPORTED_LOWER_BOUND = -SUPPORTED_UPPER_BOUND - 1;
+static inline void AssertInSupportedRange(idx_t input_size, int64_t offset, int64_t length) {
+	if (input_size > (uint64_t)SUPPORTED_UPPER_BOUND) {
+		throw OutOfRangeException("Substring input size is too large (> %d)", SUPPORTED_UPPER_BOUND);
+	}
+	if (offset < SUPPORTED_LOWER_BOUND) {
+		throw OutOfRangeException("Substring offset outside of supported range (< %d)", SUPPORTED_LOWER_BOUND);
+	}
+	if (offset > SUPPORTED_UPPER_BOUND) {
+		throw OutOfRangeException("Substring offset outside of supported range (> %d)", SUPPORTED_UPPER_BOUND);
+	}
+	if (length < SUPPORTED_LOWER_BOUND) {
+		throw OutOfRangeException("Substring length outside of supported range (< %d)", SUPPORTED_LOWER_BOUND);
+	}
+	if (length > SUPPORTED_UPPER_BOUND) {
+		throw OutOfRangeException("Substring length outside of supported range (> %d)", SUPPORTED_UPPER_BOUND);
+	}
+}
 string_t SubstringEmptyString(Vector &result) {
 	auto result_string = StringVector::EmptyString(result, 0);
 	result_string.Finalize();
@@ -120977,7 +121592,7 @@ bool SubstringStartEnd(int64_t input_size, int64_t offset, int64_t length, int64
 	} else {
 		// negative length: go backwards (i.e. end = start, start = start + length)
 		end = start;
-		start = MaxValue<int64_t>(0, end + length);
+		start = MaxValue<int64_t>(0, start + length);
 	}
 	if (start == end) {
 		return false;
@@ -120990,6 +121605,8 @@ string_t SubstringASCII(Vector &result, string_t input, int64_t offset, int64_t
 	auto input_data = input.GetDataUnsafe();
 	auto input_size = input.GetSize();
+	AssertInSupportedRange(input_size, offset, length);
 	int64_t start, end;
 	if (!SubstringStartEnd(input_size, offset, length, start, end)) {
 		return SubstringEmptyString(result);
@@ -121001,6 +121618,8 @@ string_t SubstringFun::SubstringUnicode(Vector &result, string_t input, int64_t
 	auto input_data = input.GetDataUnsafe();
 	auto input_size = input.GetSize();
+	AssertInSupportedRange(input_size, offset, length);
 	if (length == 0) {
 		return SubstringEmptyString(result);
 	}
@@ -121051,14 +121670,15 @@ string_t SubstringFun::SubstringUnicode(Vector &result, string_t input, int64_t
 		int64_t start, end;
 		// we express start and end as unicode codepoints from the front
+		offset--;
 		if (length < 0) {
 			// negative length
-			start = MaxValue<int64_t>(0, offset + length - 1);
-			end = offset - 1;
+			start = MaxValue<int64_t>(0, offset + length);
+			end = offset;
 		} else {
 			// positive length
-			start = MaxValue<int64_t>(0, offset - 1);
-			end = offset + length - 1;
+			start = MaxValue<int64_t>(0, offset);
+			end = offset + length;
 		}
 		int64_t current_character = 0;
@@ -121086,6 +121706,8 @@ string_t SubstringFun::SubstringGrapheme(Vector &result, string_t input, int64_t
 	auto input_data = input.GetDataUnsafe();
 	auto input_size = input.GetSize();
+	AssertInSupportedRange(input_size, offset, length);
 	// we don't know yet if the substring is ascii, but we assume it is (for now)
 	// first get the start and end as if this was an ascii string
 	int64_t start, end;
@@ -121170,7 +121792,7 @@ static void SubstringFunction(DataChunk &args, ExpressionState &state, Vector &r
 	} else {
 		BinaryExecutor::Execute<string_t, int64_t, string_t>(
 		    input_vector, offset_vector, result, args.size(), [&](string_t input_string, int64_t offset) {
-			    return OP::Substring(result, input_string, offset, NumericLimits<int64_t>::Maximum() - offset);
+			    return OP::Substring(result, input_string, offset, NumericLimits<uint32_t>::Maximum());
 		    });
 	}
 }
@@ -121189,7 +121811,7 @@ static void SubstringFunctionASCII(DataChunk &args, ExpressionState &state, Vect
 	} else {
 		BinaryExecutor::Execute<string_t, int64_t, string_t>(
 		    input_vector, offset_vector, result, args.size(), [&](string_t input_string, int64_t offset) {
-			    return SubstringASCII(result, input_string, offset, NumericLimits<int64_t>::Maximum() - offset);
+			    return SubstringASCII(result, input_string, offset, NumericLimits<uint32_t>::Maximum());
 		    });
 	}
 }
@@ -124179,72 +124801,6 @@ void CheckpointFunction::RegisterFunction(BuiltinFunctions &set) {
 }
 } // namespace duckdb
-//===----------------------------------------------------------------------===//
-//                         DuckDB
-//
-// duckdb/function/table/read_csv.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-namespace duckdb {
-struct BaseCSVData : public TableFunctionData {
-	//! The file path of the CSV file to read or write
-	vector<string> files;
-	//! The CSV reader options
-	BufferedCSVReaderOptions options;
-	//! Offsets for generated columns
-	idx_t filename_col_idx;
-	idx_t hive_partition_col_idx;
-	void Finalize();
-};
-struct WriteCSVData : public BaseCSVData {
-	WriteCSVData(string file_path, vector<LogicalType> sql_types, vector<string> names) : sql_types(move(sql_types)) {
-		files.push_back(move(file_path));
-		options.names = move(names);
-	}
-	//! The SQL types to write
-	vector<LogicalType> sql_types;
-	//! The newline string to write
-	string newline = "\n";
-	//! Whether or not we are writing a simple CSV (delimiter, quote and escape are all 1 byte in length)
-	bool is_simple;
-	//! The size of the CSV file (in bytes) that we buffer before we flush it to disk
-	idx_t flush_size = 4096 * 8;
-};
-struct ReadCSVData : public BaseCSVData {
-	//! The expected SQL types to read
-	vector<LogicalType> sql_types;
-	//! The initial reader (if any): this is used when automatic detection is used during binding.
-	//! In this case, the CSV reader is already created and might as well be re-used.
-	unique_ptr<BufferedCSVReader> initial_reader;
-	//! The union readers is created(when csv union_by_name option is on) during binding
-	//! Those reader can be re-used during ReadCSVFunction
-	vector<unique_ptr<BufferedCSVReader>> union_readers;
-};
-struct CSVCopyFunction {
-	static void RegisterFunction(BuiltinFunctions &set);
-};
-struct ReadCSVTableFunction {
-	static TableFunction GetFunction(bool list_parameter = false);
-	static TableFunction GetAutoFunction(bool list_parameter = false);
-	static void RegisterFunction(BuiltinFunctions &set);
-};
-} // namespace duckdb
@@ -124263,7 +124819,7 @@ void SubstringDetection(string &str_1, string &str_2, const string &name_str_1,
 	if (str_1.empty() || str_2.empty()) {
 		return;
 	}
-	if (str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos) {
+	if ((str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos) && str_1 != "NULL") {
 		throw BinderException("%s must not appear in the %s specification and vice versa", name_str_1, name_str_2);
 	}
 }
@@ -124338,12 +124894,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
 	bind_data->sql_types = expected_types;
 	string file_pattern = info.file_path;
+	vector<string> patterns {file_pattern};
-	auto &fs = FileSystem::GetFileSystem(context);
-	bind_data->files = fs.Glob(file_pattern, context);
-	if (bind_data->files.empty()) {
-		throw IOException("No files found that match the pattern \"%s\"", file_pattern);
-	}
+	bind_data->InitializeFiles(context, patterns);
 	auto &options = bind_data->options;
@@ -124358,7 +124911,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
 		// no FORCE_QUOTE specified: initialize to false
 		options.force_not_null.resize(expected_types.size(), false);
 	}
-	bind_data->Finalize();
+	bind_data->FinalizeRead(context);
 	return move(bind_data);
 }
@@ -125417,11 +125970,39 @@ void BuiltinFunctions::RegisterTableFunctions() {
 #include <limits>
 namespace duckdb {
+unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const BufferedCSVReaderOptions &options, ClientContext &context) {
+	auto &fs = FileSystem::GetFileSystem(context);
+	auto opener = FileSystem::GetFileOpener(context);
+	auto file_handle = fs.OpenFile(options.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
+	                               options.compression, opener);
+	return make_unique<CSVFileHandle>(move(file_handle));
+}
+void ReadCSVData::InitializeFiles(ClientContext &context, const vector<string> &patterns) {
+	auto &fs = FileSystem::GetFileSystem(context);
+	for (auto &file_pattern : patterns) {
+		auto found_files = fs.Glob(file_pattern, context);
+		if (found_files.empty()) {
+			throw IOException("No files found that match the pattern \"%s\"", file_pattern);
+		}
+		files.insert(files.end(), found_files.begin(), found_files.end());
+	}
+}
+void ReadCSVData::FinalizeRead(ClientContext &context) {
+	BaseCSVData::Finalize();
+	auto &config = DBConfig::GetConfig(context);
+	single_threaded = !config.options.experimental_parallel_csv_reader;
+	if (options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1) {
+		// not supported for parallel CSV reading
+		single_threaded = true;
+	}
+}
 static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
                                             vector<LogicalType> &return_types, vector<string> &names) {
 	auto &config = DBConfig::GetConfig(context);
@@ -125442,14 +126023,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 		patterns.push_back(StringValue::Get(input.inputs[0]));
 	}
-	auto &fs = FileSystem::GetFileSystem(context);
-	for (auto &file_pattern : patterns) {
-		auto files = fs.Glob(file_pattern, context);
-		if (files.empty()) {
-			throw IOException("No files found that match the pattern \"%s\"", file_pattern);
-		}
-		result->files.insert(result->files.end(), files.begin(), files.end());
-	}
+	result->InitializeFiles(context, patterns);
 	for (auto &kv : input.named_parameters) {
 		auto loption = StringUtil::Lower(kv.first);
@@ -125480,6 +126054,11 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 			options.include_file_name = BooleanValue::Get(kv.second);
 		} else if (loption == "hive_partitioning") {
 			options.include_parsed_hive_partitions = BooleanValue::Get(kv.second);
+		} else if (loption == "buffer_size") {
+			options.buffer_size = kv.second.GetValue<uint64_t>();
+			if (options.buffer_size == 0) {
+				throw InvalidInputException("Buffer Size option must be higher than 0");
+			}
 		} else {
 			options.SetReadOption(loption, kv.second, names);
 		}
@@ -125492,13 +126071,14 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	if (options.auto_detect) {
 		options.file_path = result->files[0];
 		auto initial_reader = make_unique<BufferedCSVReader>(context, options);
 		return_types.assign(initial_reader->sql_types.begin(), initial_reader->sql_types.end());
 		if (names.empty()) {
 			names.assign(initial_reader->col_names.begin(), initial_reader->col_names.end());
 		} else {
 			D_ASSERT(return_types.size() == names.size());
 		}
+		options = result->options;
+		result->sql_types = initial_reader->sql_types;
 		result->initial_reader = move(initial_reader);
 	} else {
 		result->sql_types = return_types;
@@ -125577,10 +126157,233 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 		}
 	}
 	result->options.names = names;
+	result->FinalizeRead(context);
 	return move(result);
 }
-struct ReadCSVOperatorData : public GlobalTableFunctionState {
+static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFunctionBindInput &input,
+                                                vector<LogicalType> &return_types, vector<string> &names) {
+	input.named_parameters["auto_detect"] = Value::BOOLEAN(true);
+	return ReadCSVBind(context, input, return_types, names);
+}
+//===--------------------------------------------------------------------===//
+// Parallel CSV Reader CSV Global State
+//===--------------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// Read CSV Global State
+//===--------------------------------------------------------------------===//
+struct ParallelCSVGlobalState : public GlobalTableFunctionState {
+public:
+	ParallelCSVGlobalState(unique_ptr<CSVFileHandle> file_handle_p, vector<string> &files_path_p,
+	                       idx_t system_threads_p, idx_t buffer_size_p, idx_t rows_to_skip)
+	    : file_handle(move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p) {
+		for (idx_t i = 0; i < rows_to_skip; i++) {
+			file_handle->ReadLine();
+		}
+		estimated_linenr = rows_to_skip;
+		file_size = file_handle->FileSize();
+		first_file_size = file_size;
+		bytes_read = 0;
+		if (buffer_size < file_size) {
+			bytes_per_local_state = buffer_size / MaxThreads();
+		} else {
+			bytes_per_local_state = file_size / MaxThreads();
+		}
+		current_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
+		next_buffer = current_buffer->Next(*file_handle, buffer_size);
+	}
+	ParallelCSVGlobalState() {
+	}
+	idx_t MaxThreads() const override;
+	//! Returns buffer and index that caller thread should read.
+	unique_ptr<CSVBufferRead> Next(ClientContext &context, ReadCSVData &bind_data);
+	//! If we finished reading all the CSV Files
+	bool Finished();
+	//! How many bytes were read up to this point
+	atomic<idx_t> bytes_read;
+	//! Size of current file
+	idx_t file_size;
+private:
+	//! File Handle for current file
+	unique_ptr<CSVFileHandle> file_handle;
+	shared_ptr<CSVBuffer> current_buffer;
+	shared_ptr<CSVBuffer> next_buffer;
+	//! The index of the next file to read (i.e. current file + 1)
+	idx_t file_index = 1;
+	//! Mutex to lock when getting next batch of bytes (Parallel Only)
+	mutex main_mutex;
+	//! Byte set from for last thread
+	idx_t next_byte = 0;
+	//! The current estimated line number
+	idx_t estimated_linenr;
+	//! How many bytes we should execute per local state
+	idx_t bytes_per_local_state;
+	//! Size of first file
+	idx_t first_file_size;
+	//! Basically max number of threads in DuckDB
+	idx_t system_threads;
+	//! Size of the buffers
+	idx_t buffer_size;
+	//! Current batch index
+	idx_t batch_index = 0;
+};
+idx_t ParallelCSVGlobalState::MaxThreads() const {
+	//	idx_t one_mb = 1000000;
+	//	idx_t threads_per_mb = first_file_size / one_mb + 1;
+	//	if (threads_per_mb < system_threads) {
+	//		return threads_per_mb;
+	//	}
+	return system_threads;
+}
+bool ParallelCSVGlobalState::Finished() {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	return !current_buffer;
+}
+unique_ptr<CSVBufferRead> ParallelCSVGlobalState::Next(ClientContext &context, ReadCSVData &bind_data) {
+	lock_guard<mutex> parallel_lock(main_mutex);
+	if (!current_buffer) {
+		// We are done scanning.
+		return nullptr;
+	}
+	// set up the current buffer
+	auto result = make_unique<CSVBufferRead>(current_buffer, next_buffer, next_byte, next_byte + bytes_per_local_state,
+	                                         batch_index++, estimated_linenr);
+	// move the byte index of the CSV reader to the next buffer
+	next_byte += bytes_per_local_state;
+	estimated_linenr += bytes_per_local_state / (bind_data.sql_types.size() * 5); // estimate 5 bytes per column
+	if (next_byte >= current_buffer->GetBufferSize()) {
+		// We replace the current buffer with the next buffer
+		next_byte = 0;
+		bytes_read += current_buffer->GetBufferSize();
+		current_buffer = next_buffer;
+		if (next_buffer) {
+			// Next buffer gets the next-next buffer
+			next_buffer = next_buffer->Next(*file_handle, buffer_size);
+		}
+	}
+	if (current_buffer && !next_buffer) {
+		// This means we are done with the current file, we need to go to the next one (if exists).
+		if (file_index < bind_data.files.size()) {
+			bind_data.options.file_path = bind_data.files[file_index++];
+			file_handle = ReadCSV::OpenCSV(bind_data.options, context);
+			next_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
+		}
+	}
+	return result;
+}
+static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext &context,
+                                                                  TableFunctionInitInput &input) {
+	auto &bind_data = (ReadCSVData &)*input.bind_data;
+	if (bind_data.files.empty()) {
+		// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
+		return make_unique<ParallelCSVGlobalState>();
+	}
+	unique_ptr<CSVFileHandle> file_handle;
+	if (bind_data.initial_reader) {
+		file_handle = move(bind_data.initial_reader->file_handle);
+		bind_data.initial_reader.reset();
+	} else {
+		bind_data.options.file_path = bind_data.files[0];
+		file_handle = ReadCSV::OpenCSV(bind_data.options, context);
+	}
+	idx_t rows_to_skip = bind_data.options.skip_rows + (bind_data.options.has_header ? 1 : 0);
+	return make_unique<ParallelCSVGlobalState>(move(file_handle), bind_data.files, context.db->NumberOfThreads(),
+	                                           bind_data.options.buffer_size, rows_to_skip);
+}
+//===--------------------------------------------------------------------===//
+// Read CSV Local State
+//===--------------------------------------------------------------------===//
+struct ParallelCSVLocalState : public LocalTableFunctionState {
+public:
+	explicit ParallelCSVLocalState(unique_ptr<ParallelCSVReader> csv_reader_p) : csv_reader(move(csv_reader_p)) {
+	}
+	//! The CSV reader
+	unique_ptr<ParallelCSVReader> csv_reader;
+	CSVBufferRead previous_buffer;
+};
+unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
+                                                     GlobalTableFunctionState *global_state_p) {
+	auto &csv_data = (ReadCSVData &)*input.bind_data;
+	if (csv_data.single_threaded) {
+		return nullptr;
+	}
+	auto &global_state = (ParallelCSVGlobalState &)*global_state_p;
+	auto next_local_buffer = global_state.Next(context.client, csv_data);
+	unique_ptr<ParallelCSVReader> csv_reader;
+	if (next_local_buffer) {
+		csv_reader = make_unique<ParallelCSVReader>(context.client, csv_data.options, move(next_local_buffer),
+		                                            csv_data.sql_types);
+	}
+	auto new_local_state = make_unique<ParallelCSVLocalState>(move(csv_reader));
+	return move(new_local_state);
+}
+static void ParallelReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
+	auto &bind_data = (ReadCSVData &)*data_p.bind_data;
+	auto &csv_global_state = (ParallelCSVGlobalState &)*data_p.global_state;
+	auto &csv_local_state = (ParallelCSVLocalState &)*data_p.local_state;
+	if (!csv_local_state.csv_reader) {
+		// no csv_reader was set, this can happen when a filename-based filter has filtered out all possible files
+		return;
+	}
+	do {
+		if (output.size() != 0 || (csv_global_state.Finished() && csv_local_state.csv_reader->position_buffer >=
+		                                                              csv_local_state.csv_reader->end_buffer)) {
+			break;
+		}
+		if (csv_local_state.csv_reader->position_buffer >= csv_local_state.csv_reader->end_buffer) {
+			auto next_chunk = csv_global_state.Next(context, bind_data);
+			if (!next_chunk) {
+				break;
+			}
+			//			csv_local_state.previous_buffer = csv_local_state.csv_reader->buffer;
+			csv_local_state.csv_reader->SetBufferRead(move(next_chunk));
+		}
+		csv_local_state.csv_reader->ParseCSV(output);
+	} while (true);
+	if (bind_data.options.union_by_name) {
+		throw InternalException("FIXME: union by name");
+	}
+	if (bind_data.options.include_file_name) {
+		throw InternalException("FIXME: output file name");
+	}
+	if (bind_data.options.include_parsed_hive_partitions) {
+		throw InternalException("FIXME: hive partitions");
+	}
+}
+static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
+                                    LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
+	auto &bind_data = (ReadCSVData &)*bind_data_p;
+	if (bind_data.single_threaded) {
+		return 0;
+	}
+	auto &data = (ParallelCSVLocalState &)*local_state;
+	return data.csv_reader->buffer->batch_index;
+}
+//===--------------------------------------------------------------------===//
+// Single-Threaded CSV Reader
+//===--------------------------------------------------------------------===//
+struct SingleThreadedCSVState : public GlobalTableFunctionState {
 	//! The CSV reader
 	unique_ptr<BufferedCSVReader> csv_reader;
 	//! The index of the next file to read (i.e. current file + 1)
@@ -125589,11 +126392,16 @@ struct ReadCSVOperatorData : public GlobalTableFunctionState {
 	idx_t file_size;
 	//! How many bytes were read up to this point
 	atomic<idx_t> bytes_read;
+	idx_t MaxThreads() const override {
+		return 1;
+	}
 };
-static unique_ptr<GlobalTableFunctionState> ReadCSVInit(ClientContext &context, TableFunctionInitInput &input) {
+static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext &context,
+                                                                  TableFunctionInitInput &input) {
 	auto &bind_data = (ReadCSVData &)*input.bind_data;
-	auto result = make_unique<ReadCSVOperatorData>();
+	auto result = make_unique<SingleThreadedCSVState>();
 	if (bind_data.initial_reader) {
 		result->csv_reader = move(bind_data.initial_reader);
 	} else if (bind_data.files.empty()) {
@@ -125603,20 +126411,14 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInit(ClientContext &context,
 		bind_data.options.file_path = bind_data.files[0];
 		result->csv_reader = make_unique<BufferedCSVReader>(context, bind_data.options, bind_data.sql_types);
 	}
-	result->file_size = result->csv_reader->GetFileSize();
+	result->file_size = result->csv_reader->file_handle->FileSize();
 	result->file_index = 1;
 	return move(result);
 }
-static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFunctionBindInput &input,
-                                                vector<LogicalType> &return_types, vector<string> &names) {
-	input.named_parameters["auto_detect"] = Value::BOOLEAN(true);
-	return ReadCSVBind(context, input, return_types, names);
-}
-static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
+static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
 	auto &bind_data = (ReadCSVData &)*data_p.bind_data;
-	auto &data = (ReadCSVOperatorData &)*data_p.global_state;
+	auto &data = (SingleThreadedCSVState &)*data_p.global_state;
 	if (!data.csv_reader) {
 		// no csv_reader was set, this can happen when a filename-based filter has filtered out all possible files
@@ -125675,6 +126477,27 @@ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p,
 	}
 }
+//===--------------------------------------------------------------------===//
+// Read CSV Functions
+//===--------------------------------------------------------------------===//
+static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
+	auto &bind_data = (ReadCSVData &)*input.bind_data;
+	if (bind_data.single_threaded) {
+		return SingleThreadedCSVInit(context, input);
+	} else {
+		return ParallelCSVInitGlobal(context, input);
+	}
+}
+static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
+	auto &bind_data = (ReadCSVData &)*data_p.bind_data;
+	if (bind_data.single_threaded) {
+		SingleThreadedCSVFunction(context, data_p, output);
+	} else {
+		ParallelReadCSVFunction(context, data_p, output);
+	}
+}
 static void ReadCSVAddNamedParameters(TableFunction &table_function) {
 	table_function.named_parameters["sep"] = LogicalType::VARCHAR;
 	table_function.named_parameters["delim"] = LogicalType::VARCHAR;
@@ -125699,15 +126522,26 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
 	table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
 	table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
 	table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
+	table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
 }
 double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
                          const GlobalTableFunctionState *global_state) {
-	auto &data = (const ReadCSVOperatorData &)*global_state;
-	if (data.file_size == 0) {
+	auto &bind_data = (ReadCSVData &)*bind_data_p;
+	idx_t file_size, bytes_read;
+	if (bind_data.single_threaded) {
+		auto &data = (const SingleThreadedCSVState &)*global_state;
+		file_size = data.file_size;
+		bytes_read = data.bytes_read;
+	} else {
+		auto &data = (const ParallelCSVGlobalState &)*global_state;
+		file_size = data.file_size;
+		bytes_read = data.bytes_read;
+	}
+	if (file_size == 0) {
 		return 100;
 	}
-	auto percentage = (data.bytes_read * 100.0) / data.file_size;
+	auto percentage = (bytes_read * 100.0) / file_size;
 	return percentage;
 }
@@ -125745,7 +126579,7 @@ void BufferedCSVReaderOptions::Serialize(FieldWriter &writer) const {
 	writer.WriteField<bool>(header);
 	writer.WriteField<bool>(ignore_errors);
 	writer.WriteField<idx_t>(num_cols);
-	writer.WriteField<idx_t>(buffer_size);
+	writer.WriteField<idx_t>(buffer_sample_size);
 	writer.WriteString(null_str);
 	writer.WriteField<FileCompressionType>(compression);
 	// read options
@@ -125777,7 +126611,7 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
 	header = reader.ReadRequired<bool>();
 	ignore_errors = reader.ReadRequired<bool>();
 	num_cols = reader.ReadRequired<idx_t>();
-	buffer_size = reader.ReadRequired<idx_t>();
+	buffer_sample_size = reader.ReadRequired<idx_t>();
 	null_str = reader.ReadRequired<string>();
 	compression = reader.ReadRequired<FileCompressionType>();
 	// read options
@@ -125804,6 +126638,7 @@ static void CSVReaderSerialize(FieldWriter &writer, const FunctionData *bind_dat
 	writer.WriteField<idx_t>(bind_data.filename_col_idx);
 	writer.WriteField<idx_t>(bind_data.hive_partition_col_idx);
 	bind_data.options.Serialize(writer);
+	writer.WriteField<bool>(bind_data.single_threaded);
 }
 static unique_ptr<FunctionData> CSVReaderDeserialize(ClientContext &context, FieldReader &reader,
@@ -125814,27 +126649,31 @@ static unique_ptr<FunctionData> CSVReaderDeserialize(ClientContext &context, Fie
 	result_data->filename_col_idx = reader.ReadRequired<idx_t>();
 	result_data->hive_partition_col_idx = reader.ReadRequired<idx_t>();
 	result_data->options.Deserialize(reader);
+	result_data->single_threaded = reader.ReadField<bool>(true);
 	return move(result_data);
 }
 TableFunction ReadCSVTableFunction::GetFunction(bool list_parameter) {
 	auto parameter = list_parameter ? LogicalType::LIST(LogicalType::VARCHAR) : LogicalType::VARCHAR;
-	TableFunction read_csv("read_csv", {parameter}, ReadCSVFunction, ReadCSVBind, ReadCSVInit);
+	TableFunction read_csv("read_csv", {parameter}, ReadCSVFunction, ReadCSVBind, ReadCSVInitGlobal, ReadCSVInitLocal);
 	read_csv.table_scan_progress = CSVReaderProgress;
 	read_csv.pushdown_complex_filter = CSVComplexFilterPushdown;
 	read_csv.serialize = CSVReaderSerialize;
 	read_csv.deserialize = CSVReaderDeserialize;
+	read_csv.get_batch_index = CSVReaderGetBatchIndex;
 	ReadCSVAddNamedParameters(read_csv);
 	return read_csv;
 }
 TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
 	auto parameter = list_parameter ? LogicalType::LIST(LogicalType::VARCHAR) : LogicalType::VARCHAR;
-	TableFunction read_csv_auto("read_csv_auto", {parameter}, ReadCSVFunction, ReadCSVAutoBind, ReadCSVInit);
+	TableFunction read_csv_auto("read_csv_auto", {parameter}, ReadCSVFunction, ReadCSVAutoBind, ReadCSVInitGlobal,
+	                            ReadCSVInitLocal);
 	read_csv_auto.table_scan_progress = CSVReaderProgress;
 	read_csv_auto.pushdown_complex_filter = CSVComplexFilterPushdown;
 	read_csv_auto.serialize = CSVReaderSerialize;
 	read_csv_auto.deserialize = CSVReaderDeserialize;
+	read_csv_auto.get_batch_index = CSVReaderGetBatchIndex;
 	ReadCSVAddNamedParameters(read_csv_auto);
 	return read_csv_auto;
 }
@@ -136024,6 +136863,14 @@ struct EnableProgressBarSetting {
 	static Value GetSetting(ClientContext &context);
 };
+struct ExperimentalParallelCSVSetting {
+	static constexpr const char *Name = "experimental_parallel_csv";
+	static constexpr const char *Description = "Whether or not to use the experimental parallel CSV reader";
+	static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
+	static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
+	static Value GetSetting(ClientContext &context);
+};
 struct ExplainOutputSetting {
 	static constexpr const char *Name = "explain_output";
 	static constexpr const char *Description = "Output of EXPLAIN statements (ALL, OPTIMIZED_ONLY, PHYSICAL_ONLY)";
@@ -136224,6 +137071,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
                                                  DUCKDB_GLOBAL(EnableObjectCacheSetting),
                                                  DUCKDB_LOCAL(EnableProfilingSetting),
                                                  DUCKDB_LOCAL(EnableProgressBarSetting),
+                                                 DUCKDB_GLOBAL(ExperimentalParallelCSVSetting),
                                                  DUCKDB_LOCAL(ExplainOutputSetting),
                                                  DUCKDB_GLOBAL(ExternalThreadsSetting),
                                                  DUCKDB_LOCAL(FileSearchPathSetting),
@@ -136668,6 +137516,7 @@ public:
 namespace duckdb {
 Connection::Connection(DatabaseInstance &database) : context(make_shared<ClientContext>(database.shared_from_this())) {
@@ -150518,6 +151367,18 @@ Value EnableProgressBarSetting::GetSetting(ClientContext &context) {
 	return Value::BOOLEAN(ClientConfig::GetConfig(context).enable_progress_bar);
 }
+//===--------------------------------------------------------------------===//
+// Experimental Parallel CSV
+//===--------------------------------------------------------------------===//
+void ExperimentalParallelCSVSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
+	config.options.experimental_parallel_csv_reader = input.GetValue<bool>();
+}
+Value ExperimentalParallelCSVSetting::GetSetting(ClientContext &context) {
+	auto &config = DBConfig::GetConfig(context);
+	return Value::BIGINT(config.options.experimental_parallel_csv_reader);
+}
 //===--------------------------------------------------------------------===//
 // Explain Output
 //===--------------------------------------------------------------------===//
@@ -185277,6 +186138,8 @@ BindResult ExpressionBinder::BindExpression(CollateExpression &expr, idx_t depth
 	if (child.expr->return_type.id() != LogicalTypeId::VARCHAR) {
 		throw BinderException("collations are only supported for type varchar");
 	}
+	// Validate the collation, but don't use it
+	PushCollation(context, child.expr->Copy(), expr.collation, false);
 	child.expr->return_type = LogicalType::VARCHAR_COLLATION(expr.collation);
 	return BindResult(move(child.expr));
 }