npm - duckdb - Versions diffs - 0.7.2-dev3441.0 → 0.7.2-dev3515.0 - Mend

duckdb 0.7.2-dev3441.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp CHANGED Viewed

@@ -34,7 +34,7 @@ string BaseCSVReader::GetLineNumberStr(idx_t line_error, bool is_line_estimated,
 BaseCSVReader::BaseCSVReader(ClientContext &context_p, BufferedCSVReaderOptions options_p,
                              const vector<LogicalType> &requested_types)
-    : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(Allocator::Get(context)),
+    : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(BufferAllocator::Get(context)),
       options(std::move(options_p)) {
 }
@@ -42,12 +42,7 @@ BaseCSVReader::~BaseCSVReader() {
 }
 unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
-	auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
-	                               options_p.compression);
-	if (file_handle->CanSeek()) {
-		file_handle->Reset();
-	}
-	return make_uniq<CSVFileHandle>(std::move(file_handle));
+	return CSVFileHandle::OpenFile(fs, allocator, options_p.file_path, options_p.compression, true);
 }
 void BaseCSVReader::InitParseChunk(idx_t num_cols) {

package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp CHANGED Viewed

@@ -239,18 +239,13 @@ void BufferedCSVReader::Initialize(const vector<LogicalType> &requested_types) {
 		if (return_types.empty()) {
 			throw InvalidInputException("Failed to detect column types from CSV: is the file a valid CSV file?");
 		}
-		if (cached_chunks.empty()) {
-			JumpToBeginning(options.skip_rows, options.header);
-		}
+		JumpToBeginning(options.skip_rows, options.header);
 	} else {
 		return_types = requested_types;
 		ResetBuffer();
 		SkipRowsAndReadHeader(options.skip_rows, options.header);
 	}
 	InitParseChunk(return_types.size());
-	// we only need reset support during the automatic CSV type detection
-	// since reset support might require caching (in the case of streams), we disable it for the remainder
-	file_handle->DisableReset();
 }
 void BufferedCSVReader::ResetBuffer() {
@@ -262,13 +257,7 @@ void BufferedCSVReader::ResetBuffer() {
 }
 void BufferedCSVReader::ResetStream() {
-	if (!file_handle->CanSeek()) {
-		// seeking to the beginning appears to not be supported in all compiler/os-scenarios,
-		// so we have to create a new stream source here for now
-		file_handle->Reset();
-	} else {
-		file_handle->Seek(0);
-	}
+	file_handle->Reset();
 	linenr = 0;
 	linenr_estimated = false;
 	bytes_per_line_avg = 0;
@@ -332,7 +321,7 @@ bool BufferedCSVReader::JumpToNextSample() {
 	// if we deal with any other sources than plaintext files, jumping_samples can be tricky. In that case
 	// we just read x continuous chunks from the stream TODO: make jumps possible for zipfiles.
-	if (!file_handle->PlainFileSource() || !jumping_samples) {
+	if (!file_handle->OnDiskFile() || !jumping_samples) {
 		sample_chunk_idx++;
 		return true;
 	}
@@ -802,21 +791,6 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
 					}
 				}
 			}
-			if (!jumping_samples) {
-				if ((sample_chunk_idx)*options.sample_chunk_size <= options.buffer_size) {
-					// cache parse chunk
-					// create a new chunk and fill it with the remainder
-					auto chunk = make_uniq<DataChunk>();
-					auto parse_chunk_types = parse_chunk.GetTypes();
-					chunk->Move(parse_chunk);
-					cached_chunks.push(std::move(chunk));
-				} else {
-					while (!cached_chunks.empty()) {
-						cached_chunks.pop();
-					}
-				}
-			}
 		}
 		// set sql types
@@ -1445,7 +1419,7 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
 		                            GetLineNumberStr(linenr, linenr_estimated));
 	}
-	buffer = unique_ptr<char[]>(new char[buffer_read_size + remaining + 1]);
+	buffer = make_unsafe_array<char>(buffer_read_size + remaining + 1);
 	buffer_size = remaining + buffer_read_size;
 	if (remaining > 0) {
 		// remaining from last buffer: copy it here
@@ -1474,17 +1448,6 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
 }
 void BufferedCSVReader::ParseCSV(DataChunk &insert_chunk) {
-	// if no auto-detect or auto-detect with jumping samples, we have nothing cached and start from the beginning
-	if (cached_chunks.empty()) {
-		cached_buffers.clear();
-	} else {
-		auto &chunk = cached_chunks.front();
-		parse_chunk.Move(*chunk);
-		cached_chunks.pop();
-		Flush(insert_chunk);
-		return;
-	}
 	string error_message;
 	if (!TryParseCSV(ParserMode::PARSING, insert_chunk, error_message)) {
 		throw InvalidInputException(error_message);

package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp ADDED Viewed

@@ -0,0 +1,158 @@
+#include "duckdb/execution/operator/persistent/csv_file_handle.hpp"
+namespace duckdb {
+CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p,
+                             const string &path_p, FileCompressionType compression, bool enable_reset)
+    : fs(fs), allocator(allocator), file_handle(std::move(file_handle_p)), path(path_p), compression(compression),
+      reset_enabled(enable_reset) {
+	can_seek = file_handle->CanSeek();
+	on_disk_file = file_handle->OnDiskFile();
+	file_size = file_handle->GetFileSize();
+}
+unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
+                                                     FileCompressionType compression) {
+	auto file_handle = fs.OpenFile(path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
+	if (file_handle->CanSeek()) {
+		file_handle->Reset();
+	}
+	return file_handle;
+}
+unique_ptr<CSVFileHandle> CSVFileHandle::OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
+                                                  FileCompressionType compression, bool enable_reset) {
+	auto file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
+	return make_uniq<CSVFileHandle>(fs, allocator, std::move(file_handle), path, compression, enable_reset);
+}
+bool CSVFileHandle::CanSeek() {
+	return can_seek;
+}
+void CSVFileHandle::Seek(idx_t position) {
+	if (!can_seek) {
+		throw InternalException("Cannot seek in this file");
+	}
+	file_handle->Seek(position);
+}
+idx_t CSVFileHandle::SeekPosition() {
+	if (!can_seek) {
+		throw InternalException("Cannot seek in this file");
+	}
+	return file_handle->SeekPosition();
+}
+void CSVFileHandle::Reset() {
+	requested_bytes = 0;
+	read_position = 0;
+	if (can_seek) {
+		// we can seek - reset the file handle
+		file_handle->Reset();
+	} else if (on_disk_file) {
+		// we cannot seek but it is an on-disk file - re-open the file
+		file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
+	} else {
+		if (!reset_enabled) {
+			throw InternalException("Reset called but reset is not enabled for this CSV Handle");
+		}
+		read_position = 0;
+	}
+}
+bool CSVFileHandle::OnDiskFile() {
+	return on_disk_file;
+}
+idx_t CSVFileHandle::FileSize() {
+	return file_size;
+}
+bool CSVFileHandle::FinishedReading() {
+	return requested_bytes >= file_size;
+}
+idx_t CSVFileHandle::Read(void *buffer, idx_t nr_bytes) {
+	requested_bytes += nr_bytes;
+	if (on_disk_file || can_seek) {
+		// if this is a plain file source OR we can seek we are not caching anything
+		return file_handle->Read(buffer, nr_bytes);
+	}
+	// not a plain file source: we need to do some bookkeeping around the reset functionality
+	idx_t result_offset = 0;
+	if (read_position < buffer_size) {
+		// we need to read from our cached buffer
+		auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
+		memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
+		result_offset += buffer_read_count;
+		read_position += buffer_read_count;
+		if (result_offset == nr_bytes) {
+			return nr_bytes;
+		}
+	} else if (!reset_enabled && cached_buffer.IsSet()) {
+		// reset is disabled, but we still have cached data
+		// we can remove any cached data
+		cached_buffer.Reset();
+		buffer_size = 0;
+		buffer_capacity = 0;
+		read_position = 0;
+	}
+	// we have data left to read from the file
+	// read directly into the buffer
+	auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
+	file_size = file_handle->GetFileSize();
+	read_position += bytes_read;
+	if (reset_enabled) {
+		// if reset caching is enabled, we need to cache the bytes that we have read
+		if (buffer_size + bytes_read >= buffer_capacity) {
+			// no space; first enlarge the buffer
+			buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
+			auto new_buffer = allocator.Allocate(buffer_capacity);
+			if (buffer_size > 0) {
+				memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
+			}
+			cached_buffer = std::move(new_buffer);
+		}
+		memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
+		buffer_size += bytes_read;
+	}
+	return result_offset + bytes_read;
+}
+string CSVFileHandle::ReadLine() {
+	bool carriage_return = false;
+	string result;
+	char buffer[1];
+	while (true) {
+		idx_t bytes_read = Read(buffer, 1);
+		if (bytes_read == 0) {
+			return result;
+		}
+		if (carriage_return) {
+			if (buffer[0] != '\n') {
+				if (!file_handle->CanSeek()) {
+					throw BinderException(
+					    "Carriage return newlines not supported when reading CSV files in which we cannot seek");
+				}
+				file_handle->Seek(file_handle->SeekPosition() - 1);
+				return result;
+			}
+		}
+		if (buffer[0] == '\n') {
+			return result;
+		}
+		if (buffer[0] != '\r') {
+			result += buffer[0];
+		} else {
+			carriage_return = true;
+		}
+	}
+}
+void CSVFileHandle::DisableReset() {
+	this->reset_enabled = false;
+}
+} // namespace duckdb

package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp CHANGED Viewed

@@ -19,7 +19,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
 	for (auto &aggr_expr : bound_pivot.aggregates) {
 		auto &aggr = (BoundAggregateExpression &)*aggr_expr;
 		// for each aggregate, initialize an empty aggregate state and finalize it immediately
-		auto state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
+		auto state = make_unsafe_array<data_t>(aggr.function.state_size());
 		aggr.function.initialize(state.get());
 		Vector state_vector(Value::POINTER((uintptr_t)state.get()));
 		Vector result_vector(aggr_expr->return_type);

package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp CHANGED Viewed

@@ -23,11 +23,11 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
 	tuple_size = layout.GetRowWidth();
 	// allocate and null initialize the data
-	owned_data = unique_ptr<data_t[]>(new data_t[tuple_size * total_groups]);
+	owned_data = make_unsafe_array<data_t>(tuple_size * total_groups);
 	data = owned_data.get();
 	// set up the empty payloads for every tuple, and initialize the "occupied" flag to false
-	group_is_set = unique_ptr<bool[]>(new bool[total_groups]);
+	group_is_set = make_unsafe_array<bool>(total_groups);
 	memset(group_is_set.get(), 0, total_groups * sizeof(bool));
 	// initialize the hash table for each entry

package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp CHANGED Viewed

@@ -334,7 +334,7 @@ public:
 	//! The current position to scan the HT for output tuples
 	idx_t ht_index;
 	//! The set of aggregate scan states
-	unique_ptr<TupleDataParallelScanState[]> ht_scan_states;
+	unsafe_array_ptr<TupleDataParallelScanState> ht_scan_states;
 	atomic<bool> initialized;
 	atomic<bool> finished;
 };
@@ -404,7 +404,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
 		for (idx_t i = 0; i < op.aggregates.size(); i++) {
 			D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
 			auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
-			auto aggr_state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
+			auto aggr_state = make_unsafe_array<data_t>(aggr.function.state_size());
 			aggr.function.initialize(aggr_state.get());
 			AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
@@ -433,8 +433,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
 		lock_guard<mutex> l(state.lock);
 		if (!state.initialized) {
 			auto &finalized_hts = gstate.finalized_hts;
-			state.ht_scan_states =
-			    unique_ptr<TupleDataParallelScanState[]>(new TupleDataParallelScanState[finalized_hts.size()]);
+			state.ht_scan_states = make_unsafe_array<TupleDataParallelScanState>(finalized_hts.size());
 			const auto &layout = gstate.finalized_hts[0]->GetDataCollection().GetLayout();
 			vector<column_t> column_ids;

package/src/duckdb/src/execution/window_segment_tree.cpp CHANGED Viewed

@@ -309,7 +309,7 @@ void WindowSegmentTree::ConstructTree() {
 		level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
 		internal_nodes += level_nodes;
 	} while (level_nodes > 1);
-	levels_flat_native = unique_ptr<data_t[]>(new data_t[internal_nodes * state.size()]);
+	levels_flat_native = make_unsafe_array<data_t>(internal_nodes * state.size());
 	levels_flat_start.push_back(0);
 	idx_t levels_flat_offset = 0;

package/src/duckdb/src/function/pragma/pragma_queries.cpp CHANGED Viewed

@@ -139,7 +139,7 @@ string PragmaImportDatabase(ClientContext &context, const FunctionParameters &pa
 		auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
 		                          FileSystem::DEFAULT_COMPRESSION);
 		auto fsize = fs.GetFileSize(*handle);
-		auto buffer = unique_ptr<char[]>(new char[fsize]);
+		auto buffer = make_unsafe_array<char>(fsize);
 		fs.Read(*handle, buffer.get(), fsize);
 		auto query = string(buffer.get(), fsize);
 		// Replace the placeholder with the path provided to IMPORT

package/src/duckdb/src/function/scalar/strftime_format.cpp CHANGED Viewed

@@ -408,7 +408,7 @@ string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
 	auto time = Timestamp::GetTime(timestamp);
 	auto len = format.GetLength(date, time, 0, nullptr);
-	auto result = unique_ptr<char[]>(new char[len]);
+	auto result = make_unsafe_array<char>(len);
 	format.FormatString(date, time, result.get());
 	return string(result.get(), len);
 }

package/src/duckdb/src/function/scalar/string/concat.cpp CHANGED Viewed

@@ -118,7 +118,7 @@ static void TemplatedConcatWS(DataChunk &args, string_t *sep_data, const Selecti
                               const SelectionVector &rsel, idx_t count, Vector &result) {
 	vector<idx_t> result_lengths(args.size(), 0);
 	vector<bool> has_results(args.size(), false);
-	auto orrified_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[args.ColumnCount() - 1]);
+	auto orrified_data = make_unsafe_array<UnifiedVectorFormat>(args.ColumnCount() - 1);
 	for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
 		args.data[col_idx].ToUnifiedFormat(args.size(), orrified_data[col_idx - 1]);
 	}

package/src/duckdb/src/function/scalar/string/like.cpp CHANGED Viewed

@@ -395,11 +395,11 @@ bool ILikeOperatorFunction(string_t &str, string_t &pattern, char escape = '\0')
 	// lowercase both the str and the pattern
 	idx_t str_llength = LowerFun::LowerLength(str_data, str_size);
-	auto str_ldata = unique_ptr<char[]>(new char[str_llength]);
+	auto str_ldata = make_unsafe_array<char>(str_llength);
 	LowerFun::LowerCase(str_data, str_size, str_ldata.get());
 	idx_t pat_llength = LowerFun::LowerLength(pat_data, pat_size);
-	auto pat_ldata = unique_ptr<char[]>(new char[pat_llength]);
+	auto pat_ldata = make_unsafe_array<char>(pat_llength);
 	LowerFun::LowerCase(pat_data, pat_size, pat_ldata.get());
 	string_t str_lcase(str_ldata.get(), str_llength);
 	string_t pat_lcase(pat_ldata.get(), pat_llength);

package/src/duckdb/src/function/scalar/system/aggregate_export.cpp CHANGED Viewed

@@ -36,12 +36,12 @@ struct ExportAggregateBindData : public FunctionData {
 struct CombineState : public FunctionLocalState {
 	idx_t state_size;
-	unique_ptr<data_t[]> state_buffer0, state_buffer1;
+	unsafe_array_ptr<data_t> state_buffer0, state_buffer1;
 	Vector state_vector0, state_vector1;
 	explicit CombineState(idx_t state_size_p)
-	    : state_size(state_size_p), state_buffer0(unique_ptr<data_t[]>(new data_t[state_size_p])),
-	      state_buffer1(unique_ptr<data_t[]>(new data_t[state_size_p])),
+	    : state_size(state_size_p), state_buffer0(make_unsafe_array<data_t>(state_size_p)),
+	      state_buffer1(make_unsafe_array<data_t>(state_size_p)),
 	      state_vector0(Value::POINTER((uintptr_t)state_buffer0.get())),
 	      state_vector1(Value::POINTER((uintptr_t)state_buffer1.get())) {
 	}
@@ -55,12 +55,12 @@ static unique_ptr<FunctionLocalState> InitCombineState(ExpressionState &state, c
 struct FinalizeState : public FunctionLocalState {
 	idx_t state_size;
-	unique_ptr<data_t[]> state_buffer;
+	unsafe_array_ptr<data_t> state_buffer;
 	Vector addresses;
 	explicit FinalizeState(idx_t state_size_p)
 	    : state_size(state_size_p),
-	      state_buffer(unique_ptr<data_t[]>(new data_t[STANDARD_VECTOR_SIZE * AlignValue(state_size_p)])),
+	      state_buffer(make_unsafe_array<data_t>(STANDARD_VECTOR_SIZE * AlignValue(state_size_p))),
 	      addresses(LogicalType::POINTER) {
 	}
 };

package/src/duckdb/src/function/table/copy_csv.cpp CHANGED Viewed

@@ -78,7 +78,7 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
 	bind_data->is_simple = bind_data->options.delimiter.size() == 1 && bind_data->options.escape.size() == 1 &&
 	                       bind_data->options.quote.size() == 1;
 	if (bind_data->is_simple) {
-		bind_data->requires_quotes = unique_ptr<bool[]>(new bool[256]);
+		bind_data->requires_quotes = make_unsafe_array<bool>(256);
 		memset(bind_data->requires_quotes.get(), 0, sizeof(bool) * 256);
 		bind_data->requires_quotes['\n'] = true;
 		bind_data->requires_quotes['\r'] = true;

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -4,7 +4,6 @@
 #include "duckdb/main/database.hpp"
 #include "duckdb/common/string_util.hpp"
 #include "duckdb/common/enum_util.hpp"
-#include "duckdb/common/hive_partitioning.hpp"
 #include "duckdb/common/union_by_name.hpp"
 #include "duckdb/main/config.hpp"
 #include "duckdb/parser/expression/constant_expression.hpp"
@@ -15,7 +14,6 @@
 #include "duckdb/common/multi_file_reader.hpp"
 #include "duckdb/main/client_data.hpp"
 #include "duckdb/execution/operator/persistent/csv_line_info.hpp"
 #include <limits>
 namespace duckdb {
@@ -23,11 +21,8 @@ namespace duckdb {
 unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompressionType compression,
                                            ClientContext &context) {
 	auto &fs = FileSystem::GetFileSystem(context);
-	auto file_handle = fs.OpenFile(file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
-	if (file_handle->CanSeek()) {
-		file_handle->Reset();
-	}
-	return make_uniq<CSVFileHandle>(std::move(file_handle), false);
+	auto &allocator = BufferAllocator::Get(context);
+	return CSVFileHandle::OpenFile(fs, allocator, file_path, compression, false);
 }
 void ReadCSVData::FinalizeRead(ClientContext &context) {
@@ -238,14 +233,6 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	} else {
 		result->reader_bind = MultiFileReader::BindOptions(options.file_options, result->files, return_types, names);
 	}
-	auto &fs = FileSystem::GetFileSystem(context);
-	for (auto &file : result->files) {
-		if (fs.IsPipe(file)) {
-			result->is_pipe = true;
-			result->single_threaded = true;
-			break;
-		}
-	}
 	result->return_types = return_types;
 	result->return_names = names;
 	result->FinalizeRead(context);
@@ -265,7 +252,7 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
 struct ParallelCSVGlobalState : public GlobalTableFunctionState {
 public:
 	ParallelCSVGlobalState(ClientContext &context, unique_ptr<CSVFileHandle> file_handle_p,
-	                       vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
+	                       const vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
 	                       idx_t rows_to_skip, bool force_parallelism_p, vector<column_t> column_ids_p, bool has_header)
 	    : file_handle(std::move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p),
 	      force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
@@ -278,6 +265,7 @@ public:
 		}
 		file_size = file_handle->FileSize();
 		first_file_size = file_size;
+		on_disk_file = file_handle->OnDiskFile();
 		bytes_read = 0;
 		if (buffer_size < file_size || file_size == 0) {
 			bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
@@ -335,7 +323,7 @@ public:
 	bool Finished();
-	double GetProgress(ReadCSVData &bind_data) const {
+	double GetProgress(const ReadCSVData &bind_data) const {
 		idx_t total_files = bind_data.files.size();
 		// get the progress WITHIN the current file
@@ -369,6 +357,8 @@ private:
 	idx_t bytes_per_local_state;
 	//! Size of first file
 	idx_t first_file_size;
+	//! Whether or not this is an on-disk file
+	bool on_disk_file = true;
 	//! Basically max number of threads in DuckDB
 	idx_t system_threads;
 	//! Size of the buffers
@@ -402,7 +392,7 @@ private:
 };
 idx_t ParallelCSVGlobalState::MaxThreads() const {
-	if (force_parallelism) {
+	if (force_parallelism || !on_disk_file) {
 		return system_threads;
 	}
 	idx_t one_mb = 1000000; // We initialize max one thread per Mb
@@ -628,7 +618,7 @@ idx_t LineInfo::GetLine(idx_t batch_idx, idx_t line_error, idx_t file_idx, idx_t
 static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext &context,
                                                                   TableFunctionInitInput &input) {
-	auto &bind_data = (ReadCSVData &)*input.bind_data;
+	auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
 	if (bind_data.files.empty()) {
 		// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
 		return make_uniq<ParallelCSVGlobalState>();
@@ -636,7 +626,15 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
 	unique_ptr<CSVFileHandle> file_handle;
 	bind_data.options.file_path = bind_data.files[0];
-	file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
+	if (bind_data.initial_reader) {
+		file_handle = std::move(bind_data.initial_reader->file_handle);
+		file_handle->Reset();
+		file_handle->DisableReset();
+		bind_data.initial_reader.reset();
+	} else {
+		file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
+	}
 	return make_uniq<ParallelCSVGlobalState>(
 	    context, std::move(file_handle), bind_data.files, context.db->NumberOfThreads(), bind_data.options.buffer_size,
 	    bind_data.options.skip_rows, ClientConfig::GetConfig(context).verify_parallelism, input.column_ids,
@@ -738,7 +736,7 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
 		return total_files;
 	}
-	double GetProgress(ReadCSVData &bind_data) const {
+	double GetProgress(const ReadCSVData &bind_data) const {
 		D_ASSERT(total_files == bind_data.files.size());
 		D_ASSERT(progress_in_files <= total_files * 100);
 		return (double(progress_in_files) / double(total_files));
@@ -746,6 +744,16 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
 	unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
 	                                           idx_t &total_size) {
+		auto reader = GetCSVReaderInternal(context, bind_data, file_index, total_size);
+		if (reader) {
+			reader->file_handle->DisableReset();
+		}
+		return reader;
+	}
+private:
+	unique_ptr<BufferedCSVReader> GetCSVReaderInternal(ClientContext &context, ReadCSVData &bind_data,
+	                                                   idx_t &file_index, idx_t &total_size) {
 		BufferedCSVReaderOptions options;
 		{
 			lock_guard<mutex> l(csv_lock);
@@ -799,14 +807,14 @@ public:
 static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext &context,
                                                                   TableFunctionInitInput &input) {
-	auto &bind_data = (ReadCSVData &)*input.bind_data;
+	auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
 	auto result = make_uniq<SingleThreadedCSVState>(bind_data.files.size());
 	if (bind_data.files.empty()) {
 		// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
 		return std::move(result);
 	} else {
 		bind_data.options.file_path = bind_data.files[0];
-		if (bind_data.initial_reader && bind_data.is_pipe) {
+		if (bind_data.initial_reader) {
 			// If this is a pipe and an initial reader already exists due to read_csv_auto
 			// We must re-use it, since we can't restart the reader due for it being a pipe.
 			result->initial_reader = std::move(bind_data.initial_reader);
@@ -904,7 +912,7 @@ static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput
 // Read CSV Functions
 //===--------------------------------------------------------------------===//
 static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
-	auto &bind_data = (ReadCSVData &)*input.bind_data;
+	auto &bind_data = input.bind_data->Cast<ReadCSVData>();
 	if (bind_data.single_threaded) {
 		return SingleThreadedCSVInit(context, input);
 	} else {
@@ -914,7 +922,7 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
 unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
                                                      GlobalTableFunctionState *global_state_p) {
-	auto &csv_data = (ReadCSVData &)*input.bind_data;
+	auto &csv_data = input.bind_data->Cast<ReadCSVData>();
 	if (csv_data.single_threaded) {
 		return SingleThreadedReadCSVInitLocal(context, input, global_state_p);
 	} else {
@@ -923,7 +931,7 @@ unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context,
 }
 static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
-	auto &bind_data = (ReadCSVData &)*data_p.bind_data;
+	auto &bind_data = data_p.bind_data->Cast<ReadCSVData>();
 	if (bind_data.single_threaded) {
 		SingleThreadedCSVFunction(context, data_p, output);
 	} else {
@@ -933,7 +941,7 @@ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p,
 static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
                                     LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
-	auto &bind_data = (ReadCSVData &)*bind_data_p;
+	auto &bind_data = bind_data_p->Cast<ReadCSVData>();
 	if (bind_data.single_threaded) {
 		auto &data = local_state->Cast<SingleThreadedCSVLocalState>();
 		return data.file_index;
@@ -980,28 +988,28 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
 double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
                          const GlobalTableFunctionState *global_state) {
-	auto &bind_data = (ReadCSVData &)*bind_data_p;
+	auto &bind_data = bind_data_p->Cast<ReadCSVData>();
 	if (bind_data.single_threaded) {
-		auto &data = (SingleThreadedCSVState &)*global_state;
+		auto &data = global_state->Cast<SingleThreadedCSVState>();
 		return data.GetProgress(bind_data);
 	} else {
-		auto &data = (const ParallelCSVGlobalState &)*global_state;
+		auto &data = global_state->Cast<ParallelCSVGlobalState>();
 		return data.GetProgress(bind_data);
 	}
 }
 void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
                               vector<unique_ptr<Expression>> &filters) {
-	auto data = (ReadCSVData *)bind_data_p;
+	auto &data = bind_data_p->Cast<ReadCSVData>();
 	auto reset_reader =
-	    MultiFileReader::ComplexFilterPushdown(context, data->files, data->options.file_options, get, filters);
+	    MultiFileReader::ComplexFilterPushdown(context, data.files, data.options.file_options, get, filters);
 	if (reset_reader) {
-		MultiFileReader::PruneReaders(*data);
+		MultiFileReader::PruneReaders(data);
 	}
 }
 unique_ptr<NodeStatistics> CSVReaderCardinality(ClientContext &context, const FunctionData *bind_data_p) {
-	auto &bind_data = (ReadCSVData &)*bind_data_p;
+	auto &bind_data = bind_data_p->Cast<ReadCSVData>();
 	idx_t per_file_cardinality = 0;
 	if (bind_data.initial_reader && bind_data.initial_reader->file_handle) {
 		auto estimated_row_width = (bind_data.csv_types.size() * 5);
@@ -1086,7 +1094,7 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
 }
 static void CSVReaderSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
-	auto &bind_data = (ReadCSVData &)*bind_data_p;
+	auto &bind_data = bind_data_p->Cast<ReadCSVData>();
 	writer.WriteList<string>(bind_data.files);
 	writer.WriteRegularSerializableList<LogicalType>(bind_data.csv_types);
 	writer.WriteList<string>(bind_data.csv_names);