npm - duckdb - Versions diffs - 0.7.2-dev3353.0 → 0.7.2-dev3402.0 - Mend

duckdb 0.7.2-dev3353.0 → 0.7.2-dev3402.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/src/duckdb/extension/json/json_functions.cpp CHANGED Viewed

@@ -6,8 +6,8 @@
 #include "duckdb/function/replacement_scan.hpp"
 #include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/parser/expression/function_expression.hpp"
-#include "duckdb/parser/tableref/table_function_ref.hpp"
 #include "duckdb/parser/parsed_data/create_pragma_function_info.hpp"
+#include "duckdb/parser/tableref/table_function_ref.hpp"
 namespace duckdb {
@@ -115,6 +115,14 @@ unique_ptr<FunctionLocalState> JSONFunctionLocalState::Init(ExpressionState &sta
 	return make_uniq<JSONFunctionLocalState>(state.GetContext());
 }
+unique_ptr<FunctionLocalState> JSONFunctionLocalState::InitCastLocalState(CastLocalStateParameters &parameters) {
+	if (parameters.context) {
+		return make_uniq<JSONFunctionLocalState>(*parameters.context);
+	} else {
+		return make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
+	}
+}
 JSONFunctionLocalState &JSONFunctionLocalState::ResetAndGet(ExpressionState &state) {
 	auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<JSONFunctionLocalState>();
 	lstate.json_allocator.Reset();
@@ -197,14 +205,6 @@ unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context,
 	return std::move(table_function);
 }
-static duckdb::unique_ptr<FunctionLocalState> InitJSONCastLocalState(CastLocalStateParameters &parameters) {
-	if (parameters.context) {
-		return make_uniq<JSONFunctionLocalState>(*parameters.context);
-	} else {
-		return make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
-	}
-}
 static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
 	auto &lstate = parameters.local_state->Cast<JSONFunctionLocalState>();
 	lstate.json_allocator.Reset();
@@ -215,15 +215,17 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
 	    source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) {
 		    auto data = (char *)(input.GetData());
 		    auto length = input.GetSize();
-		    yyjson_read_err error;
+		    yyjson_read_err error;
 		    auto doc = JSONCommon::ReadDocumentUnsafe(data, length, JSONCommon::READ_FLAG, alc, &error);
 		    if (!doc) {
-			    HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error),
-			                                 parameters.error_message);
 			    mask.SetInvalid(idx);
-			    success = false;
+			    if (success) {
+				    HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error),
+				                                 parameters.error_message);
+				    success = false;
+			    }
 		    }
 		    return input;
 	    });
@@ -231,13 +233,13 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
 	return success;
 }
-void JSONFunctions::RegisterCastFunctions(CastFunctionSet &casts) {
+void JSONFunctions::RegisterSimpleCastFunctions(CastFunctionSet &casts) {
 	// JSON to VARCHAR is basically free
 	casts.RegisterCastFunction(JSONCommon::JSONType(), LogicalType::VARCHAR, DefaultCasts::ReinterpretCast, 1);
 	// VARCHAR to JSON requires a parse so it's not free. Let's make it 1 more than a cast to STRUCT
 	auto varchar_to_json_cost = casts.ImplicitCastCost(LogicalType::SQLNULL, LogicalTypeId::STRUCT) + 1;
-	BoundCastInfo info(CastVarcharToJSON, nullptr, InitJSONCastLocalState);
+	BoundCastInfo info(CastVarcharToJSON, nullptr, JSONFunctionLocalState::InitCastLocalState);
 	casts.RegisterCastFunction(LogicalType::VARCHAR, JSONCommon::JSONType(), std::move(info), varchar_to_json_cost);
 	// Register NULL to JSON with a different cost than NULL to VARCHAR so the binder can disambiguate functions

package/src/duckdb/extension/json/json_scan.cpp CHANGED Viewed

@@ -1,10 +1,10 @@
 #include "json_scan.hpp"
+#include "duckdb/common/multi_file_reader.hpp"
 #include "duckdb/main/database.hpp"
 #include "duckdb/main/extension_helper.hpp"
 #include "duckdb/parallel/task_scheduler.hpp"
 #include "duckdb/storage/buffer_manager.hpp"
-#include "duckdb/common/multi_file_reader.hpp"
 namespace duckdb {
@@ -59,11 +59,15 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
 }
 void JSONScanData::InitializeFormats() {
+	InitializeFormats(auto_detect);
+}
+void JSONScanData::InitializeFormats(bool auto_detect_p) {
 	// Set defaults for date/timestamp formats if we need to
-	if (!auto_detect && date_format.empty()) {
+	if (!auto_detect_p && date_format.empty()) {
 		date_format = "%Y-%m-%d";
 	}
-	if (!auto_detect && timestamp_format.empty()) {
+	if (!auto_detect_p && timestamp_format.empty()) {
 		timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ";
 	}
@@ -75,7 +79,7 @@ void JSONScanData::InitializeFormats() {
 		date_format_map.AddFormat(LogicalTypeId::TIMESTAMP, timestamp_format);
 	}
-	if (auto_detect) {
+	if (auto_detect_p) {
 		static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
 		    {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
 		    {LogicalTypeId::TIMESTAMP,

package/src/duckdb/extension/parquet/column_reader.cpp CHANGED Viewed

@@ -589,8 +589,8 @@ StringColumnReader::StringColumnReader(ParquetReader &reader, LogicalType type_p
 	}
 }
-uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) {
-	if (Type() != LogicalTypeId::VARCHAR) {
+uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) {
+	if (!is_varchar) {
 		return str_len;
 	}
 	// verify if a string is actually UTF8, and if there are no null bytes in the middle of the string
@@ -605,6 +605,10 @@ uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len
 	return str_len;
 }
+uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) {
+	return VerifyString(str_data, str_len, Type() == LogicalTypeId::VARCHAR);
+}
 void StringColumnReader::Dictionary(shared_ptr<ResizeableBuffer> data, idx_t num_entries) {
 	dict = std::move(data);
 	dict_strings = duckdb::unique_ptr<string_t[]>(new string_t[num_entries]);

package/src/duckdb/extension/parquet/include/parquet_reader.hpp CHANGED Viewed

@@ -80,15 +80,14 @@ public:
 class ParquetReader {
 public:
-	ParquetReader(Allocator &allocator, unique_ptr<FileHandle> file_handle_p);
 	ParquetReader(ClientContext &context, string file_name, ParquetOptions parquet_options);
 	ParquetReader(ClientContext &context, ParquetOptions parquet_options,
 	              shared_ptr<ParquetFileMetadataCache> metadata);
 	~ParquetReader();
+	FileSystem &fs;
 	Allocator &allocator;
 	string file_name;
-	FileOpener *file_opener;
 	vector<LogicalType> return_types;
 	vector<string> names;
 	shared_ptr<ParquetFileMetadataCache> metadata;

package/src/duckdb/extension/parquet/include/parquet_writer.hpp CHANGED Viewed

@@ -32,8 +32,8 @@ struct PreparedRowGroup {
 class ParquetWriter {
 public:
-	ParquetWriter(FileSystem &fs, string file_name, FileOpener *file_opener, vector<LogicalType> types,
-	              vector<string> names, duckdb_parquet::format::CompressionCodec::type codec);
+	ParquetWriter(FileSystem &fs, string file_name, vector<LogicalType> types, vector<string> names,
+	              duckdb_parquet::format::CompressionCodec::type codec);
 public:
 	void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);

package/src/duckdb/extension/parquet/include/string_column_reader.hpp CHANGED Viewed

@@ -39,6 +39,7 @@ public:
 	void PrepareDeltaByteArray(ResizeableBuffer &buffer) override;
 	void DeltaByteArray(uint8_t *defines, idx_t num_values, parquet_filter_t &filter, idx_t result_offset,
 	                    Vector &result) override;
+	static uint32_t VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
 	uint32_t VerifyString(const char *str_data, uint32_t str_len);
 protected:

package/src/duckdb/extension/parquet/include/thrift_tools.hpp CHANGED Viewed

@@ -51,8 +51,7 @@ struct ReadHeadComparator {
 // 1: register all ranges that will be read, merging ranges that are consecutive
 // 2: prefetch all registered ranges
 struct ReadAheadBuffer {
-	ReadAheadBuffer(Allocator &allocator, FileHandle &handle, FileOpener &opener)
-	    : allocator(allocator), handle(handle), file_opener(opener) {
+	ReadAheadBuffer(Allocator &allocator, FileHandle &handle) : allocator(allocator), handle(handle) {
 	}
 	// The list of read heads
@@ -62,7 +61,6 @@ struct ReadAheadBuffer {
 	Allocator &allocator;
 	FileHandle &handle;
-	FileOpener &file_opener;
 	idx_t total_size = 0;
@@ -124,8 +122,8 @@ class ThriftFileTransport : public duckdb_apache::thrift::transport::TVirtualTra
 public:
 	static constexpr uint64_t PREFETCH_FALLBACK_BUFFERSIZE = 1000000;
-	ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, FileOpener &opener, bool prefetch_mode_p)
-	    : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p, opener)),
+	ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, bool prefetch_mode_p)
+	    : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p)),
 	      prefetch_mode(prefetch_mode_p) {
 	}

package/src/duckdb/extension/parquet/parquet-extension.cpp CHANGED Viewed

@@ -239,8 +239,7 @@ public:
 					// missing metadata entry in cache, no usable stats
 					return nullptr;
 				}
-				auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
-				                          FileSystem::DEFAULT_COMPRESSION, FileSystem::GetFileOpener(context));
+				auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
 				// we need to check if the metadata cache entries are current
 				if (fs.GetLastModifiedTime(*handle) >= metadata->read_time) {
 					// missing or invalid metadata entry in cache, no usable stats overall
@@ -627,8 +626,7 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
 	auto &fs = FileSystem::GetFileSystem(context);
 	global_state->writer =
-	    make_uniq<ParquetWriter>(fs, file_path, FileSystem::GetFileOpener(context), parquet_bind.sql_types,
-	                             parquet_bind.column_names, parquet_bind.codec);
+	    make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec);
 	return std::move(global_state);
 }

package/src/duckdb/extension/parquet/parquet_reader.cpp CHANGED Viewed

@@ -49,16 +49,15 @@ using duckdb_parquet::format::Statistics;
 using duckdb_parquet::format::Type;
 static duckdb::unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
-CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, FileOpener &opener, bool prefetch_mode) {
-	auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, opener, prefetch_mode);
+CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, bool prefetch_mode) {
+	auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, prefetch_mode);
 	return make_uniq<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>>(std::move(transport));
 }
-static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, FileHandle &file_handle,
-                                                         FileOpener &opener) {
+static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, FileHandle &file_handle) {
 	auto current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
-	auto proto = CreateThriftProtocol(allocator, file_handle, opener, false);
+	auto proto = CreateThriftProtocol(allocator, file_handle, false);
 	auto &transport = ((ThriftFileTransport &)*proto->getTransport());
 	auto file_size = transport.GetSize();
 	if (file_size < 12) {
@@ -428,20 +427,11 @@ ParquetOptions::ParquetOptions(ClientContext &context) {
 	}
 }
-ParquetReader::ParquetReader(Allocator &allocator_p, unique_ptr<FileHandle> file_handle_p) : allocator(allocator_p) {
-	file_name = file_handle_p->path;
-	file_handle = std::move(file_handle_p);
-	metadata = LoadMetadata(allocator, *file_handle, *file_opener);
-	InitializeSchema();
-}
 ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, ParquetOptions parquet_options_p)
-    : allocator(BufferAllocator::Get(context_p)), file_opener(FileSystem::GetFileOpener(context_p)),
+    : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
       parquet_options(parquet_options_p) {
-	auto &fs = FileSystem::GetFileSystem(context_p);
 	file_name = std::move(file_name_p);
-	file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
-	                          FileSystem::DEFAULT_COMPRESSION, file_opener);
+	file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
 	if (!file_handle->CanSeek()) {
 		throw NotImplementedException(
 		    "Reading parquet files from a FIFO stream is not supported and cannot be efficiently supported since "
@@ -451,12 +441,12 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
 	// or if this file has cached metadata
 	// or if the cached version already expired
 	if (!ObjectCache::ObjectCacheEnabled(context_p)) {
-		metadata = LoadMetadata(allocator, *file_handle, *file_opener);
+		metadata = LoadMetadata(allocator, *file_handle);
 	} else {
 		auto last_modify_time = fs.GetLastModifiedTime(*file_handle);
 		metadata = ObjectCache::GetObjectCache(context_p).Get<ParquetFileMetadataCache>(file_name);
 		if (!metadata || (last_modify_time + 10 >= metadata->read_time)) {
-			metadata = LoadMetadata(allocator, *file_handle, *file_opener);
+			metadata = LoadMetadata(allocator, *file_handle);
 			ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
 		}
 	}
@@ -466,7 +456,7 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
 ParquetReader::ParquetReader(ClientContext &context_p, ParquetOptions parquet_options_p,
                              shared_ptr<ParquetFileMetadataCache> metadata_p)
-    : allocator(BufferAllocator::Get(context_p)), file_opener(FileSystem::GetFileOpener(context_p)),
+    : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
       metadata(std::move(metadata_p)), parquet_options(parquet_options_p) {
 	InitializeSchema();
 }
@@ -634,11 +624,10 @@ void ParquetReader::InitializeScan(ParquetReaderScanState &state, vector<idx_t>
 			state.prefetch_mode = false;
 		}
-		state.file_handle = file_handle->file_system.OpenFile(file_handle->path, flags, FileSystem::DEFAULT_LOCK,
-		                                                      FileSystem::DEFAULT_COMPRESSION, file_opener);
+		state.file_handle = fs.OpenFile(file_handle->path, flags);
 	}
-	state.thrift_file_proto = CreateThriftProtocol(allocator, *state.file_handle, *file_opener, state.prefetch_mode);
+	state.thrift_file_proto = CreateThriftProtocol(allocator, *state.file_handle, state.prefetch_mode);
 	state.root_reader = CreateReader();
 	state.define_buf.resize(allocator, STANDARD_VECTOR_SIZE);
 	state.repeat_buf.resize(allocator, STANDARD_VECTOR_SIZE);

package/src/duckdb/extension/parquet/parquet_statistics.cpp CHANGED Viewed

@@ -1,6 +1,7 @@
 #include "parquet_statistics.hpp"
 #include "parquet_decimal_utils.hpp"
 #include "parquet_timestamp.hpp"
+#include "string_column_reader.hpp"
 #include "duckdb.hpp"
 #ifndef DUCKDB_AMALGAMATION
 #include "duckdb/common/types/blob.hpp"
@@ -253,15 +254,19 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
 	case LogicalTypeId::VARCHAR: {
 		auto string_stats = StringStats::CreateEmpty(type);
 		if (parquet_stats.__isset.min) {
+			StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true);
 			StringStats::Update(string_stats, parquet_stats.min);
 		} else if (parquet_stats.__isset.min_value) {
+			StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true);
 			StringStats::Update(string_stats, parquet_stats.min_value);
 		} else {
 			return nullptr;
 		}
 		if (parquet_stats.__isset.max) {
+			StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true);
 			StringStats::Update(string_stats, parquet_stats.max);
 		} else if (parquet_stats.__isset.max_value) {
+			StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true);
 			StringStats::Update(string_stats, parquet_stats.max_value);
 		} else {
 			return nullptr;

package/src/duckdb/extension/parquet/parquet_writer.cpp CHANGED Viewed

@@ -225,12 +225,12 @@ void VerifyUniqueNames(const vector<string> &names) {
 #endif
 }
-ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, FileOpener *file_opener_p, vector<LogicalType> types_p,
-                             vector<string> names_p, CompressionCodec::type codec)
+ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, vector<LogicalType> types_p, vector<string> names_p,
+                             CompressionCodec::type codec)
     : file_name(std::move(file_name_p)), sql_types(std::move(types_p)), column_names(std::move(names_p)), codec(codec) {
 	// initialize the file writer
-	writer = make_uniq<BufferedFileWriter>(
-	    fs, file_name.c_str(), FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW, file_opener_p);
+	writer = make_uniq<BufferedFileWriter>(fs, file_name.c_str(),
+	                                       FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
 	// parquet files start with the string "PAR1"
 	writer->WriteData((const_data_ptr_t) "PAR1", 4);
 	TCompactProtocolFactoryT<MyTransport> tproto_factory;

package/src/duckdb/src/common/file_system.cpp CHANGED Viewed

@@ -40,11 +40,8 @@ FileSystem::~FileSystem() {
 }
 FileSystem &FileSystem::GetFileSystem(ClientContext &context) {
-	return FileSystem::GetFileSystem(*context.db);
-}
-FileOpener *FileSystem::GetFileOpener(ClientContext &context) {
-	return ClientData::Get(context).file_opener.get();
+	auto &client_data = ClientData::Get(context);
+	return *client_data.client_file_system;
 }
 bool PathMatched(const string &path, const string &sub_path) {
@@ -193,7 +190,7 @@ string FileSystem::ExtractBaseName(const string &path) {
 	return vec[0];
 }
-string FileSystem::GetHomeDirectory(FileOpener *opener) {
+string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) {
 	// read the home_directory setting first, if it is set
 	if (opener) {
 		Value result;
@@ -215,7 +212,11 @@ string FileSystem::GetHomeDirectory(FileOpener *opener) {
 	return string();
 }
-string FileSystem::ExpandPath(const string &path, FileOpener *opener) {
+string FileSystem::GetHomeDirectory() {
+	return GetHomeDirectory(nullptr);
+}
+string FileSystem::ExpandPath(const string &path, optional_ptr<FileOpener> opener) {
 	if (path.empty()) {
 		return path;
 	}
@@ -225,6 +226,10 @@ string FileSystem::ExpandPath(const string &path, FileOpener *opener) {
 	return path;
 }
+string FileSystem::ExpandPath(const string &path) {
+	return FileSystem::ExpandPath(path, nullptr);
+}
 // LCOV_EXCL_START
 unique_ptr<FileHandle> FileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock,
                                             FileCompressionType compression, FileOpener *opener) {
@@ -247,14 +252,6 @@ int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) {
 	throw NotImplementedException("%s: Write is not implemented!", GetName());
 }
-string FileSystem::GetFileExtension(FileHandle &handle) {
-	auto dot_location = handle.path.rfind('.');
-	if (dot_location != std::string::npos) {
-		return handle.path.substr(dot_location + 1, std::string::npos);
-	}
-	return string();
-}
 int64_t FileSystem::GetFileSize(FileHandle &handle) {
 	throw NotImplementedException("%s: GetFileSize is not implemented!", GetName());
 }
@@ -312,10 +309,6 @@ vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
 	throw NotImplementedException("%s: Glob is not implemented!", GetName());
 }
-vector<string> FileSystem::Glob(const string &path, ClientContext &context) {
-	return Glob(path, GetFileOpener(context));
-}
 void FileSystem::RegisterSubSystem(unique_ptr<FileSystem> sub_fs) {
 	throw NotImplementedException("%s: Can't register a sub system on a non-virtual file system", GetName());
 }
@@ -337,7 +330,7 @@ bool FileSystem::CanHandleFile(const string &fpath) {
 }
 vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &context, FileGlobOptions options) {
-	auto result = Glob(pattern, context);
+	auto result = Glob(pattern);
 	if (result.empty()) {
 		string required_extension;
 		const string prefixes[] = {"http://", "https://", "s3://"};

package/src/duckdb/src/common/serializer/buffered_file_writer.cpp CHANGED Viewed

@@ -8,9 +8,9 @@ namespace duckdb {
 // Remove this when we switch C++17: https://stackoverflow.com/a/53350948
 constexpr uint8_t BufferedFileWriter::DEFAULT_OPEN_FLAGS;
-BufferedFileWriter::BufferedFileWriter(FileSystem &fs, const string &path_p, uint8_t open_flags, FileOpener *opener)
+BufferedFileWriter::BufferedFileWriter(FileSystem &fs, const string &path_p, uint8_t open_flags)
     : fs(fs), path(path_p), data(unique_ptr<data_t[]>(new data_t[FILE_BUFFER_SIZE])), offset(0), total_written(0) {
-	handle = fs.OpenFile(path, open_flags, FileLockType::WRITE_LOCK, FileSystem::DEFAULT_COMPRESSION, opener);
+	handle = fs.OpenFile(path, open_flags, FileLockType::WRITE_LOCK);
 }
 int64_t BufferedFileWriter::GetFileSize() {

package/src/duckdb/src/execution/index/art/art.cpp CHANGED Viewed

@@ -130,6 +130,9 @@ static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_
 		auto idx = idata.sel->get_index(i);
 		if (idata.validity.RowIsValid(idx)) {
 			ARTKey::CreateARTKey<T>(allocator, input.GetType(), keys[i], input_data[idx]);
+		} else {
+			// we need to possibly reset the former key value in the keys vector
+			keys[i] = ARTKey();
 		}
 	}
 }
@@ -680,7 +683,6 @@ Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
 			}
 			return node;
 		}
 		auto &node_prefix = node.GetPrefix(*this);
 		if (node_prefix.count) {
 			for (idx_t pos = 0; pos < node_prefix.count; pos++) {

package/src/duckdb/src/execution/operator/join/physical_index_join.cpp CHANGED Viewed

@@ -167,7 +167,6 @@ void PhysicalIndexJoin::GetRHSMatches(ExecutionContext &context, DataChunk &inpu
 	auto &state = state_p.Cast<IndexJoinOperatorState>();
 	auto &art = index.Cast<ART>();
-	;
 	// generate the keys for this chunk
 	state.arena_allocator.Reset();

package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp CHANGED Viewed

@@ -35,7 +35,7 @@ string BaseCSVReader::GetLineNumberStr(idx_t line_error, bool is_line_estimated,
 BaseCSVReader::BaseCSVReader(ClientContext &context_p, BufferedCSVReaderOptions options_p,
                              const vector<LogicalType> &requested_types)
     : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(Allocator::Get(context)),
-      opener(FileSystem::GetFileOpener(context)), options(std::move(options_p)) {
+      options(std::move(options_p)) {
 }
 BaseCSVReader::~BaseCSVReader() {
@@ -43,7 +43,7 @@ BaseCSVReader::~BaseCSVReader() {
 unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
 	auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
-	                               options_p.compression, this->opener);
+	                               options_p.compression);
 	if (file_handle->CanSeek()) {
 		file_handle->Reset();
 	}

package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp CHANGED Viewed

@@ -636,10 +636,10 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
 }
 idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
 	while (true) {
 		if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
 			auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
+			// line errors are 1-indexed
 			return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
 		}
 	}

package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp CHANGED Viewed

@@ -207,8 +207,7 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
 			fs.CreateDirectory(file_path);
 		} else if (!overwrite_or_ignore) {
 			idx_t n_files = 0;
-			fs.ListFiles(
-			    file_path, [&n_files](const string &path, bool) { n_files++; }, FileOpener::Get(context));
+			fs.ListFiles(file_path, [&n_files](const string &path, bool) { n_files++; });
 			if (n_files > 0) {
 				throw IOException("Directory %s is not empty! Enable OVERWRITE_OR_IGNORE option to force writing",
 				                  file_path);

package/src/duckdb/src/execution/operator/persistent/physical_export.cpp CHANGED Viewed

@@ -27,10 +27,10 @@ static void WriteCatalogEntries(stringstream &ss, vector<reference<CatalogEntry>
 	ss << std::endl;
 }
-static void WriteStringStreamToFile(FileSystem &fs, FileOpener *opener, stringstream &ss, const string &path) {
+static void WriteStringStreamToFile(FileSystem &fs, stringstream &ss, const string &path) {
 	auto ss_string = ss.str();
 	auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW,
-	                          FileLockType::WRITE_LOCK, FileSystem::DEFAULT_COMPRESSION, opener);
+	                          FileLockType::WRITE_LOCK);
 	fs.Write(*handle, (void *)ss_string.c_str(), ss_string.size());
 	handle.reset();
 }
@@ -108,7 +108,6 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
 	auto &ccontext = context.client;
 	auto &fs = FileSystem::GetFileSystem(ccontext);
-	auto *opener = FileSystem::GetFileOpener(ccontext);
 	// gather all catalog types to export
 	vector<reference<CatalogEntry>> schemas;
@@ -172,7 +171,7 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
 	WriteCatalogEntries(ss, indexes);
 	WriteCatalogEntries(ss, macros);
-	WriteStringStreamToFile(fs, opener, ss, fs.JoinPath(info->file_path, "schema.sql"));
+	WriteStringStreamToFile(fs, ss, fs.JoinPath(info->file_path, "schema.sql"));
 	// write the load.sql file
 	// for every table, we write COPY INTO statement with the specified options
@@ -181,7 +180,7 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
 		auto exported_table_info = exported_tables.data[i].table_data;
 		WriteCopyStatement(fs, load_ss, *info, exported_table_info, function);
 	}
-	WriteStringStreamToFile(fs, opener, load_ss, fs.JoinPath(info->file_path, "load.sql"));
+	WriteStringStreamToFile(fs, load_ss, fs.JoinPath(info->file_path, "load.sql"));
 	state.finished = true;
 	return SourceResultType::FINISHED;

package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp CHANGED Viewed

@@ -11,7 +11,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
 	bool preserve_insertion_order = PhysicalPlanGenerator::PreserveInsertionOrder(context, *plan);
 	bool supports_batch_index = PhysicalPlanGenerator::UseBatchIndex(context, *plan);
 	auto &fs = FileSystem::GetFileSystem(context);
-	op.file_path = fs.ExpandPath(op.file_path, FileSystem::GetFileOpener(context));
+	op.file_path = fs.ExpandPath(op.file_path);
 	if (op.use_tmp_file) {
 		op.file_path += ".tmp";
 	}