npm - duckdb - Versions diffs - 0.8.2-dev4572.0 → 0.8.2-dev4653.0 - Mend

duckdb 0.8.2-dev4572.0 → 0.8.2-dev4653.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.8.2-dev4572.0",
+  "version": "0.8.2-dev4653.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/json/buffered_json_reader.cpp CHANGED Viewed

@@ -1,9 +1,8 @@
 #include "buffered_json_reader.hpp"
 #include "duckdb/common/file_opener.hpp"
-#include "duckdb/common/printer.hpp"
-#include "duckdb/common/serializer/serializer.hpp"
 #include "duckdb/common/serializer/deserializer.hpp"
+#include "duckdb/common/serializer/serializer.hpp"
 #include <utility>
@@ -24,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
 }
 void JSONFileHandle::Close() {
-	if (IsOpen()) {
+	if (IsOpen() && plain_file_source) {
 		file_handle->Close();
 		file_handle = nullptr;
 	}
@@ -174,12 +173,13 @@ BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReade
 }
 void BufferedJSONReader::OpenJSONFile() {
-	D_ASSERT(!IsOpen());
 	lock_guard<mutex> guard(lock);
-	auto &file_system = FileSystem::GetFileSystem(context);
-	auto regular_file_handle =
-	    file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, options.compression);
-	file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
+	if (!IsOpen()) {
+		auto &file_system = FileSystem::GetFileSystem(context);
+		auto regular_file_handle = file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ,
+		                                                FileLockType::NO_LOCK, options.compression);
+		file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
+	}
 	Reset();
 }

package/src/duckdb/extension/json/json_functions/read_json.cpp CHANGED Viewed

@@ -17,6 +17,7 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
 	Vector string_vector(LogicalType::VARCHAR);
 	// Loop through the files (if union_by_name, else just sample the first file)
+	idx_t remaining = bind_data.sample_size;
 	for (idx_t file_idx = 0; file_idx < bind_data.files.size(); file_idx++) {
 		// Create global/local state and place the reader in the right field
 		JSONScanGlobalState gstate(context, bind_data);
@@ -28,7 +29,6 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
 		}
 		// Read and detect schema
-		idx_t remaining = bind_data.sample_size;
 		while (remaining != 0) {
 			allocator.Reset();
 			auto read_count = lstate.ReadNext(gstate);
@@ -56,7 +56,11 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
 		}
 		// Close the file and stop detection if not union_by_name
-		if (!bind_data.options.file_options.union_by_name) {
+		if (bind_data.options.file_options.union_by_name) {
+			// When union_by_name=true we sample sample_size per file
+			remaining = bind_data.sample_size;
+		} else if (remaining == 0) {
+			// When union_by_name=false, we sample sample_size in total (across the first files)
 			break;
 		}
 	}

package/src/duckdb/extension/json/json_scan.cpp CHANGED Viewed

@@ -2,11 +2,11 @@
 #include "duckdb/common/enum_util.hpp"
 #include "duckdb/common/multi_file_reader.hpp"
+#include "duckdb/common/serializer/deserializer.hpp"
+#include "duckdb/common/serializer/serializer.hpp"
 #include "duckdb/main/extension_helper.hpp"
 #include "duckdb/parallel/task_scheduler.hpp"
 #include "duckdb/storage/buffer_manager.hpp"
-#include "duckdb/common/serializer/serializer.hpp"
-#include "duckdb/common/serializer/deserializer.hpp"
 namespace duckdb {
@@ -558,10 +558,8 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
 		if (current_reader) {
 			// If we performed the final read of this reader in the previous iteration, close it now
 			if (is_last) {
-				if (gstate.bind_data.type != JSONScanType::SAMPLE) {
-					TryIncrementFileIndex(gstate);
-					current_reader->CloseJSONFile();
-				}
+				TryIncrementFileIndex(gstate);
+				current_reader->CloseJSONFile();
 				current_reader = nullptr;
 				continue;
 			}

package/src/duckdb/src/common/enum_util.cpp CHANGED Viewed

@@ -11,6 +11,7 @@
 #include "duckdb/common/enum_util.hpp"
 #include "duckdb/catalog/catalog_entry/table_column_type.hpp"
+#include "duckdb/common/box_renderer.hpp"
 #include "duckdb/common/enums/access_mode.hpp"
 #include "duckdb/common/enums/aggregate_handling.hpp"
 #include "duckdb/common/enums/catalog_type.hpp"
@@ -4797,6 +4798,29 @@ RelationType EnumUtil::FromString<RelationType>(const char *value) {
 	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
 }
+template<>
+const char* EnumUtil::ToChars<RenderMode>(RenderMode value) {
+	switch(value) {
+	case RenderMode::ROWS:
+		return "ROWS";
+	case RenderMode::COLUMNS:
+		return "COLUMNS";
+	default:
+		throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
+	}
+}
+template<>
+RenderMode EnumUtil::FromString<RenderMode>(const char *value) {
+	if (StringUtil::Equals(value, "ROWS")) {
+		return RenderMode::ROWS;
+	}
+	if (StringUtil::Equals(value, "COLUMNS")) {
+		return RenderMode::COLUMNS;
+	}
+	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
+}
 template<>
 const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value) {
 	switch(value) {

package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp CHANGED Viewed

@@ -2,6 +2,8 @@
 #include "duckdb/common/bind_helpers.hpp"
 #include "duckdb/common/vector_size.hpp"
 #include "duckdb/common/string_util.hpp"
+#include "duckdb/common/enum_util.hpp"
+#include "duckdb/common/multi_file_reader.hpp"
 namespace duckdb {
@@ -60,6 +62,10 @@ static int64_t ParseInteger(const Value &value, const string &loption) {
 	return value.GetValue<int64_t>();
 }
+bool CSVReaderOptions::GetHeader() const {
+	return this->dialect_options.header;
+}
 void CSVReaderOptions::SetHeader(bool input) {
 	this->dialect_options.header = input;
 	this->has_header = true;
@@ -69,6 +75,10 @@ void CSVReaderOptions::SetCompression(const string &compression_p) {
 	this->compression = FileCompressionTypeFromString(compression_p);
 }
+string CSVReaderOptions::GetEscape() const {
+	return std::string(1, this->dialect_options.state_machine_options.escape);
+}
 void CSVReaderOptions::SetEscape(const string &input) {
 	auto escape_str = input;
 	if (escape_str.size() > 1) {
@@ -81,6 +91,19 @@ void CSVReaderOptions::SetEscape(const string &input) {
 	this->has_escape = true;
 }
+int64_t CSVReaderOptions::GetSkipRows() const {
+	return this->dialect_options.skip_rows;
+}
+void CSVReaderOptions::SetSkipRows(int64_t skip_rows) {
+	dialect_options.skip_rows = skip_rows;
+	skip_rows_set = true;
+}
+string CSVReaderOptions::GetDelimiter() const {
+	return std::string(1, this->dialect_options.state_machine_options.delimiter);
+}
 void CSVReaderOptions::SetDelimiter(const string &input) {
 	auto delim_str = StringUtil::Replace(input, "\\t", "\t");
 	if (delim_str.size() > 1) {
@@ -93,6 +116,10 @@ void CSVReaderOptions::SetDelimiter(const string &input) {
 	this->dialect_options.state_machine_options.delimiter = delim_str[0];
 }
+string CSVReaderOptions::GetQuote() const {
+	return std::string(1, this->dialect_options.state_machine_options.quote);
+}
 void CSVReaderOptions::SetQuote(const string &quote_p) {
 	auto quote_str = quote_p;
 	if (quote_str.size() > 1) {
@@ -105,6 +132,10 @@ void CSVReaderOptions::SetQuote(const string &quote_p) {
 	this->has_quote = true;
 }
+NewLineIdentifier CSVReaderOptions::GetNewline() const {
+	return dialect_options.new_line;
+}
 void CSVReaderOptions::SetNewline(const string &input) {
 	if (input == "\\n" || input == "\\r") {
 		dialect_options.new_line = NewLineIdentifier::SINGLE;
@@ -152,8 +183,7 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 			sample_chunks = sample_size / STANDARD_VECTOR_SIZE + 1;
 		}
 	} else if (loption == "skip") {
-		dialect_options.skip_rows = ParseInteger(value, loption);
-		skip_rows_set = true;
+		SetSkipRows(ParseInteger(value, loption));
 	} else if (loption == "max_line_size" || loption == "maximum_line_size") {
 		maximum_line_size = ParseInteger(value, loption);
 	} else if (loption == "sample_chunk_size") {
@@ -296,4 +326,185 @@ string CSVReaderOptions::ToString() const {
 	       "\n  ignore_errors=" + std::to_string(ignore_errors) + "\n  all_varchar=" + std::to_string(all_varchar);
 }
+static Value StringVectorToValue(const vector<string> &vec) {
+	vector<Value> content;
+	content.reserve(vec.size());
+	for (auto &item : vec) {
+		content.push_back(Value(item));
+	}
+	return Value::LIST(std::move(content));
+}
+static uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
+	//! Const ht with accepted auto_types and their weights in specificity
+	const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
+	    {(uint8_t)LogicalTypeId::VARCHAR, 0},  {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
+	    {(uint8_t)LogicalTypeId::DATE, 2},     {(uint8_t)LogicalTypeId::TIME, 3},
+	    {(uint8_t)LogicalTypeId::DOUBLE, 4},   {(uint8_t)LogicalTypeId::FLOAT, 5},
+	    {(uint8_t)LogicalTypeId::BIGINT, 6},   {(uint8_t)LogicalTypeId::INTEGER, 7},
+	    {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
+	    {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
+	auto id = (uint8_t)candidate_type.id();
+	auto it = auto_type_candidates_specificity.find(id);
+	if (it == auto_type_candidates_specificity.end()) {
+		throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
+		                      EnumUtil::ToString(candidate_type.id()));
+	}
+	return it->second;
+}
+void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientContext &context,
+                                           vector<LogicalType> &return_types, vector<string> &names) {
+	for (auto &kv : in) {
+		if (MultiFileReader::ParseOption(kv.first, kv.second, file_options, context)) {
+			continue;
+		}
+		auto loption = StringUtil::Lower(kv.first);
+		if (loption == "columns") {
+			explicitly_set_columns = true;
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::STRUCT) {
+				throw BinderException("read_csv columns requires a struct as input");
+			}
+			auto &struct_children = StructValue::GetChildren(kv.second);
+			D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
+			for (idx_t i = 0; i < struct_children.size(); i++) {
+				auto &name = StructType::GetChildName(child_type, i);
+				auto &val = struct_children[i];
+				names.push_back(name);
+				if (val.type().id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("read_csv requires a type specification as string");
+				}
+				return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
+			}
+			if (names.empty()) {
+				throw BinderException("read_csv requires at least a single column as input!");
+			}
+		} else if (loption == "auto_type_candidates") {
+			auto_type_candidates.clear();
+			map<uint8_t, LogicalType> candidate_types;
+			// We always have the extremes of Null and Varchar, so we can default to varchar if the
+			// sniffer is not able to confidently detect that column type
+			candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
+			candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::LIST) {
+				throw BinderException("read_csv auto_types requires a list as input");
+			}
+			auto &list_children = ListValue::GetChildren(kv.second);
+			if (list_children.empty()) {
+				throw BinderException("auto_type_candidates requires at least one type");
+			}
+			for (auto &child : list_children) {
+				if (child.type().id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("auto_type_candidates requires a type specification as string");
+				}
+				auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
+				candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
+			}
+			for (auto &candidate_type : candidate_types) {
+				auto_type_candidates.emplace_back(candidate_type.second);
+			}
+		} else if (loption == "column_names" || loption == "names") {
+			if (!name_list.empty()) {
+				throw BinderException("read_csv_auto column_names/names can only be supplied once");
+			}
+			if (kv.second.IsNull()) {
+				throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
+			}
+			auto &children = ListValue::GetChildren(kv.second);
+			for (auto &child : children) {
+				name_list.push_back(StringValue::Get(child));
+			}
+		} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
+				throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
+			}
+			if (!sql_type_list.empty()) {
+				throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
+			}
+			vector<string> sql_type_names;
+			if (child_type.id() == LogicalTypeId::STRUCT) {
+				auto &struct_children = StructValue::GetChildren(kv.second);
+				D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
+				for (idx_t i = 0; i < struct_children.size(); i++) {
+					auto &name = StructType::GetChildName(child_type, i);
+					auto &val = struct_children[i];
+					if (val.type().id() != LogicalTypeId::VARCHAR) {
+						throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
+					}
+					sql_type_names.push_back(StringValue::Get(val));
+					sql_types_per_column[name] = i;
+				}
+			} else {
+				auto &list_child = ListType::GetChildType(child_type);
+				if (list_child.id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
+				}
+				auto &children = ListValue::GetChildren(kv.second);
+				for (auto &child : children) {
+					sql_type_names.push_back(StringValue::Get(child));
+				}
+			}
+			sql_type_list.reserve(sql_type_names.size());
+			for (auto &sql_type : sql_type_names) {
+				auto def_type = TransformStringToLogicalType(sql_type);
+				if (def_type.id() == LogicalTypeId::USER) {
+					throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
+					                      kv.first);
+				}
+				sql_type_list.push_back(std::move(def_type));
+			}
+		} else if (loption == "all_varchar") {
+			all_varchar = BooleanValue::Get(kv.second);
+		} else if (loption == "normalize_names") {
+			normalize_names = BooleanValue::Get(kv.second);
+		} else {
+			SetReadOption(loption, kv.second, names);
+		}
+	}
+}
+//! This function is used to remember options set by the sniffer, for use in ReadCSVRelation
+void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) {
+	if (has_delimiter) {
+		named_params["delim"] = Value(GetDelimiter());
+	}
+	if (has_newline) {
+		named_params["newline"] = Value(EnumUtil::ToString(GetNewline()));
+	}
+	if (has_quote) {
+		named_params["quote"] = Value(GetQuote());
+	}
+	if (has_escape) {
+		named_params["escape"] = Value(GetEscape());
+	}
+	if (has_header) {
+		named_params["header"] = Value(GetHeader());
+	}
+	named_params["max_line_size"] = Value::BIGINT(maximum_line_size);
+	if (skip_rows_set) {
+		named_params["skip"] = Value::BIGINT(GetSkipRows());
+	}
+	named_params["sample_chunks"] = Value::BIGINT(sample_chunks);
+	named_params["sample_chunk_size"] = Value::BIGINT(sample_chunk_size);
+	named_params["null_padding"] = Value::BOOLEAN(null_padding);
+	if (!date_format.at(LogicalType::DATE).format_specifier.empty()) {
+		named_params["dateformat"] = Value(date_format.at(LogicalType::DATE).format_specifier);
+	}
+	if (!date_format.at(LogicalType::TIMESTAMP).format_specifier.empty()) {
+		named_params["timestampformat"] = Value(date_format.at(LogicalType::TIMESTAMP).format_specifier);
+	}
+	named_params["normalize_names"] = Value::BOOLEAN(normalize_names);
+	if (!name_list.empty() && !named_params.count("column_names") && !named_params.count("names")) {
+		named_params["column_names"] = StringVectorToValue(name_list);
+	}
+	named_params["all_varchar"] = Value::BOOLEAN(all_varchar);
+	named_params["maximum_line_size"] = Value::BIGINT(maximum_line_size);
+}
 } // namespace duckdb

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -85,25 +85,6 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
 	}
 }
-uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
-	//! Const ht with accepted auto_types and their weights in specificity
-	const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
-	    {(uint8_t)LogicalTypeId::VARCHAR, 0},  {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
-	    {(uint8_t)LogicalTypeId::DATE, 2},     {(uint8_t)LogicalTypeId::TIME, 3},
-	    {(uint8_t)LogicalTypeId::DOUBLE, 4},   {(uint8_t)LogicalTypeId::FLOAT, 5},
-	    {(uint8_t)LogicalTypeId::BIGINT, 6},   {(uint8_t)LogicalTypeId::INTEGER, 7},
-	    {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
-	    {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
-	auto id = (uint8_t)candidate_type.id();
-	auto it = auto_type_candidates_specificity.find(id);
-	if (it == auto_type_candidates_specificity.end()) {
-		throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
-		                      EnumUtil::ToString(candidate_type.id()));
-	}
-	return it->second;
-}
 static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
                                             vector<LogicalType> &return_types, vector<string> &names) {
@@ -111,117 +92,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	auto &options = result->options;
 	result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
-	bool explicitly_set_columns = false;
-	for (auto &kv : input.named_parameters) {
-		if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options, context)) {
-			continue;
-		}
-		auto loption = StringUtil::Lower(kv.first);
-		if (loption == "columns") {
-			explicitly_set_columns = true;
-			auto &child_type = kv.second.type();
-			if (child_type.id() != LogicalTypeId::STRUCT) {
-				throw BinderException("read_csv columns requires a struct as input");
-			}
-			auto &struct_children = StructValue::GetChildren(kv.second);
-			D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
-			for (idx_t i = 0; i < struct_children.size(); i++) {
-				auto &name = StructType::GetChildName(child_type, i);
-				auto &val = struct_children[i];
-				names.push_back(name);
-				if (val.type().id() != LogicalTypeId::VARCHAR) {
-					throw BinderException("read_csv requires a type specification as string");
-				}
-				return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
-			}
-			if (names.empty()) {
-				throw BinderException("read_csv requires at least a single column as input!");
-			}
-		} else if (loption == "auto_type_candidates") {
-			options.auto_type_candidates.clear();
-			map<uint8_t, LogicalType> candidate_types;
-			// We always have the extremes of Null and Varchar, so we can default to varchar if the
-			// sniffer is not able to confidently detect that column type
-			candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
-			candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
-			auto &child_type = kv.second.type();
-			if (child_type.id() != LogicalTypeId::LIST) {
-				throw BinderException("read_csv auto_types requires a list as input");
-			}
-			auto &list_children = ListValue::GetChildren(kv.second);
-			if (list_children.empty()) {
-				throw BinderException("auto_type_candidates requires at least one type");
-			}
-			for (auto &child : list_children) {
-				if (child.type().id() != LogicalTypeId::VARCHAR) {
-					throw BinderException("auto_type_candidates requires a type specification as string");
-				}
-				auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
-				candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
-			}
-			for (auto &candidate_type : candidate_types) {
-				options.auto_type_candidates.emplace_back(candidate_type.second);
-			}
-		} else if (loption == "column_names" || loption == "names") {
-			if (!options.name_list.empty()) {
-				throw BinderException("read_csv_auto column_names/names can only be supplied once");
-			}
-			if (kv.second.IsNull()) {
-				throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
-			}
-			auto &children = ListValue::GetChildren(kv.second);
-			for (auto &child : children) {
-				options.name_list.push_back(StringValue::Get(child));
-			}
-		} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
-			auto &child_type = kv.second.type();
-			if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
-				throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
-			}
-			if (!options.sql_type_list.empty()) {
-				throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
-			}
-			vector<string> sql_type_names;
-			if (child_type.id() == LogicalTypeId::STRUCT) {
-				auto &struct_children = StructValue::GetChildren(kv.second);
-				D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
-				for (idx_t i = 0; i < struct_children.size(); i++) {
-					auto &name = StructType::GetChildName(child_type, i);
-					auto &val = struct_children[i];
-					if (val.type().id() != LogicalTypeId::VARCHAR) {
-						throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
-					}
-					sql_type_names.push_back(StringValue::Get(val));
-					options.sql_types_per_column[name] = i;
-				}
-			} else {
-				auto &list_child = ListType::GetChildType(child_type);
-				if (list_child.id() != LogicalTypeId::VARCHAR) {
-					throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
-				}
-				auto &children = ListValue::GetChildren(kv.second);
-				for (auto &child : children) {
-					sql_type_names.push_back(StringValue::Get(child));
-				}
-			}
-			options.sql_type_list.reserve(sql_type_names.size());
-			for (auto &sql_type : sql_type_names) {
-				auto def_type = TransformStringToLogicalType(sql_type);
-				if (def_type.id() == LogicalTypeId::USER) {
-					throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
-					                      kv.first);
-				}
-				options.sql_type_list.push_back(std::move(def_type));
-			}
-		} else if (loption == "all_varchar") {
-			options.all_varchar = BooleanValue::Get(kv.second);
-		} else if (loption == "normalize_names") {
-			options.normalize_names = BooleanValue::Get(kv.second);
-		} else {
-			options.SetReadOption(loption, kv.second, names);
-		}
-	}
+	options.FromNamedParameters(input.named_parameters, context, return_types, names);
+	bool explicitly_set_columns = options.explicitly_set_columns;
 	options.file_options.AutoDetectHivePartitioning(result->files, context);
 	if (!options.auto_detect && return_types.empty()) {

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.8.2-dev4572"
+#define DUCKDB_VERSION "0.8.2-dev4653"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "53dc13de5c"
+#define DUCKDB_SOURCE_ID "bb287d4b22"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/common/box_renderer.hpp CHANGED Viewed

@@ -18,7 +18,7 @@ class ColumnDataCollection;
 class ColumnDataRowCollection;
 enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT };
-enum class RenderMode { ROWS, COLUMNS };
+enum class RenderMode : uint8_t { ROWS, COLUMNS };
 struct BoxRendererConfig {
 	// a max_width of 0 means we default to the terminal width

package/src/duckdb/src/include/duckdb/common/enum_util.hpp CHANGED Viewed

@@ -216,6 +216,8 @@ enum class QuoteRule : uint8_t;
 enum class RelationType : uint8_t;
+enum class RenderMode : uint8_t;
 enum class ResultModifierType : uint8_t;
 enum class SampleMethod : uint8_t;
@@ -565,6 +567,9 @@ const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value);
 template<>
 const char* EnumUtil::ToChars<RelationType>(RelationType value);
+template<>
+const char* EnumUtil::ToChars<RenderMode>(RenderMode value);
 template<>
 const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value);
@@ -950,6 +955,9 @@ QuoteRule EnumUtil::FromString<QuoteRule>(const char *value);
 template<>
 RelationType EnumUtil::FromString<RelationType>(const char *value);
+template<>
+RenderMode EnumUtil::FromString<RenderMode>(const char *value);
 template<>
 ResultModifierType EnumUtil::FromString<ResultModifierType>(const char *value);

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp CHANGED Viewed

@@ -159,18 +159,33 @@ struct CSVReaderOptions {
 	string suffix;
 	string write_newline;
+	//! The date format to use (if any is specified)
+	map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
 	//! The date format to use for writing (if any is specified)
 	map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
+	//! Whether or not a type format is specified
+	map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
 	void Serialize(Serializer &serializer) const;
 	static CSVReaderOptions Deserialize(Deserializer &deserializer);
 	void SetCompression(const string &compression);
+	bool GetHeader() const;
 	void SetHeader(bool has_header);
+	string GetEscape() const;
 	void SetEscape(const string &escape);
+	int64_t GetSkipRows() const;
+	void SetSkipRows(int64_t rows);
+	string GetQuote() const;
 	void SetQuote(const string &quote);
 	void SetDelimiter(const string &delimiter);
+	string GetDelimiter() const;
+	NewLineIdentifier GetNewline() const;
 	void SetNewline(const string &input);
 	//! Set an option that is supported by both reading and writing functions, called by
 	//! the SetReadOption and SetWriteOption methods
@@ -182,7 +197,16 @@ struct CSVReaderOptions {
 	void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
 	void SetWriteOption(const string &loption, const Value &value);
 	void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
+	void ToNamedParameters(named_parameter_map_t &out);
+	void FromNamedParameters(named_parameter_map_t &in, ClientContext &context, vector<LogicalType> &return_types,
+	                         vector<string> &names);
 	string ToString() const;
+	named_parameter_map_t OutputReadSettings();
+public:
+	//! Whether columns were explicitly provided through named parameters
+	bool explicitly_set_columns = false;
 };
 } // namespace duckdb

package/src/duckdb/src/include/duckdb/main/connection.hpp CHANGED Viewed

@@ -131,7 +131,7 @@ public:
 	//! Reads CSV file
 	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
-	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, CSVReaderOptions &options);
+	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, named_parameter_map_t &&options);
 	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
 	//! Reads Parquet file

package/src/duckdb/src/include/duckdb/main/extension_entries.hpp CHANGED Viewed

@@ -118,6 +118,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
     {"st_dwithin_spheroid", "spatial"},
     {"st_envelope", "spatial"},
     {"st_equals", "spatial"},
+    {"st_extent", "spatial"},
     {"st_flipcoordinates", "spatial"},
     {"st_geometrytype", "spatial"},
     {"st_geomfromgeojson", "spatial"},
@@ -126,6 +127,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
     {"st_geomfromtext", "spatial"},
     {"st_geomfromwkb", "spatial"},
     {"st_intersection", "spatial"},
+    {"st_intersection_agg", "spatial"},
     {"st_intersects", "spatial"},
     {"st_isclosed", "spatial"},
     {"st_isempty", "spatial"},
@@ -159,9 +161,14 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
     {"st_touches", "spatial"},
     {"st_transform", "spatial"},
     {"st_union", "spatial"},
+    {"st_union_agg", "spatial"},
     {"st_within", "spatial"},
     {"st_x", "spatial"},
+    {"st_xmax", "spatial"},
+    {"st_xmin", "spatial"},
     {"st_y", "spatial"},
+    {"st_ymax", "spatial"},
+    {"st_ymin", "spatial"},
     {"stem", "fts"},
     {"text", "excel"},
     {"to_arrow_ipc", "arrow"},
@@ -220,10 +227,9 @@ static constexpr ExtensionEntry EXTENSION_FILE_PREFIXES[] = {
 // Note: these are currently hardcoded in scripts/generate_extensions_function.py
 // TODO: automate by passing though to script via duckdb
-static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {{".parquet", "parquet"},
-                                                              {".json", "json"},
-                                                              {".jsonl", "json"},
-                                                              {".ndjson", "json"}}; // END_OF_EXTENSION_FILE_POSTFIXES
+static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {
+    {".parquet", "parquet"}, {".json", "json"},    {".jsonl", "json"}, {".ndjson", "json"},
+    {".shp", "spatial"},     {".gpkg", "spatial"}, {".fgb", "spatial"}}; // END_OF_EXTENSION_FILE_POSTFIXES
 // Note: these are currently hardcoded in scripts/generate_extensions_function.py
 // TODO: automate by passing though to script via duckdb

package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp CHANGED Viewed

@@ -10,16 +10,16 @@
 #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
 #include "duckdb/main/relation/table_function_relation.hpp"
+#include "duckdb/common/shared_ptr.hpp"
+#include "duckdb/common/case_insensitive_map.hpp"
 namespace duckdb {
-struct CSVReaderOptions;
 class ReadCSVRelation : public TableFunctionRelation {
 public:
 	ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, vector<ColumnDefinition> columns,
 	                string alias = string());
-	ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, CSVReaderOptions options,
+	ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, named_parameter_map_t &&options,
 	                string alias = string());
 	string alias;

package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp CHANGED Viewed

@@ -35,6 +35,7 @@ public:
 	string ToString(idx_t depth) override;
 	string GetAlias() override;
 	void AddNamedParameter(const string &name, Value argument);
+	void SetNamedParameters(named_parameter_map_t &&named_parameters);
 private:
 	void InitializeColumns();

package/src/duckdb/src/include/duckdb.h CHANGED Viewed

@@ -1740,7 +1740,7 @@ DUCKDB_API duckdb_vector duckdb_struct_vector_get_child(duckdb_vector vector, id
 /*!
 Returns whether or not a row is valid (i.e. not NULL) in the given validity mask.
-* validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`
+* validity: The validity mask, as obtained through `duckdb_vector_get_validity`
 * row: The row index
 * returns: true if the row is valid, false otherwise
 */
@@ -1749,10 +1749,10 @@ DUCKDB_API bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row);
 /*!
 In a validity mask, sets a specific row to either valid or invalid.
-Note that `duckdb_data_chunk_ensure_validity_writable` should be called before calling `duckdb_data_chunk_get_validity`,
+Note that `duckdb_vector_ensure_validity_writable` should be called before calling `duckdb_vector_get_validity`,
 to ensure that there is a validity mask to write to.
-* validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`.
+* validity: The validity mask, as obtained through `duckdb_vector_get_validity`.
 * row: The row index
 * valid: Whether or not to set the row to valid, or invalid
 */

package/src/duckdb/src/main/connection.cpp CHANGED Viewed

@@ -219,14 +219,12 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
 }
 shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
-	CSVReaderOptions options;
-	return ReadCSV(csv_file, options);
+	named_parameter_map_t options;
+	return ReadCSV(csv_file, std::move(options));
 }
-shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, CSVReaderOptions &options) {
-	options.file_path = csv_file;
-	options.auto_detect = true;
-	return make_shared<ReadCSVRelation>(context, csv_file, options);
+shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, named_parameter_map_t &&options) {
+	return make_shared<ReadCSVRelation>(context, csv_file, std::move(options));
 }
 shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, const vector<string> &columns) {

package/src/duckdb/src/main/extension/extension_install.cpp CHANGED Viewed

@@ -158,11 +158,12 @@ void WriteExtensionFileToDisk(FileSystem &fs, const string &path, void *data, id
 }
 string ExtensionHelper::ExtensionUrlTemplate(optional_ptr<const ClientConfig> client_config, const string &repository) {
-	string default_endpoint = "http://extensions.duckdb.org";
 	string versioned_path = "/${REVISION}/${PLATFORM}/${NAME}.duckdb_extension";
 #ifdef WASM_LOADABLE_EXTENSIONS
+	string default_endpoint = "https://extensions.duckdb.org";
 	versioned_path = "/duckdb-wasm" + versioned_path + ".wasm";
 #else
+	string default_endpoint = "http://extensions.duckdb.org";
 	versioned_path = versioned_path + ".gz";
 #endif
 	string custom_endpoint = client_config ? client_config->custom_extension_repo : string();

package/src/duckdb/src/main/relation/read_csv_relation.cpp CHANGED Viewed

@@ -1,6 +1,5 @@
 #include "duckdb/main/relation/read_csv_relation.hpp"
-#include "duckdb/common/string_util.hpp"
 #include "duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp"
 #include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
 #include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
@@ -8,6 +7,9 @@
 #include "duckdb/parser/expression/comparison_expression.hpp"
 #include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/parser/expression/function_expression.hpp"
+#include "duckdb/common/string_util.hpp"
+#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
+#include "duckdb/common/multi_file_reader.hpp"
 #include "duckdb/parser/expression/star_expression.hpp"
 #include "duckdb/parser/query_node/select_node.hpp"
 #include "duckdb/parser/tableref/basetableref.hpp"
@@ -34,8 +36,8 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
 	AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
 }
-ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
-                                 CSVReaderOptions options, string alias_p)
+ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
+                                 named_parameter_map_t &&options, string alias_p)
     : TableFunctionRelation(context, "read_csv_auto", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
       auto_detect(true) {
@@ -43,12 +45,24 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
 		alias = StringUtil::Split(csv_file, ".")[0];
 	}
-	// Force auto_detect for this constructor
-	options.auto_detect = true;
-	auto bm_file_handle = BaseCSVReader::OpenCSV(*context, options);
-	auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), options);
+	auto files = MultiFileReader::GetFileList(*context, csv_file, "CSV");
+	D_ASSERT(!files.empty());
+	auto &file_name = files[0];
+	options["auto_detect"] = Value::BOOLEAN(true);
+	CSVReaderOptions csv_options;
+	csv_options.file_path = file_name;
+	vector<string> empty;
+	vector<LogicalType> unused_types;
+	vector<string> unused_names;
+	csv_options.FromNamedParameters(options, *context, unused_types, unused_names);
+	// Run the auto-detect, populating the options with the detected settings
+	auto bm_file_handle = BaseCSVReader::OpenCSV(*context, csv_options);
+	auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), csv_options);
 	CSVStateMachineCache state_machine_cache;
-	CSVSniffer sniffer(options, buffer_manager, state_machine_cache);
+	CSVSniffer sniffer(csv_options, buffer_manager, state_machine_cache);
 	auto sniffer_result = sniffer.SniffCSV();
 	auto &types = sniffer_result.return_types;
 	auto &names = sniffer_result.names;
@@ -56,7 +70,12 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
 		columns.emplace_back(names[i], types[i]);
 	}
-	AddNamedParameter("auto_detect", Value::BOOLEAN(true));
+	//! Capture the options potentially set/altered by the auto detection phase
+	csv_options.ToNamedParameters(options);
+	// No need to auto-detect again
+	options["auto_detect"] = Value::BOOLEAN(false);
+	SetNamedParameters(std::move(options));
 }
 string ReadCSVRelation::GetAlias() {

package/src/duckdb/src/main/relation/table_function_relation.cpp CHANGED Viewed

@@ -9,6 +9,7 @@
 #include "duckdb/main/client_context.hpp"
 #include "duckdb/parser/expression/comparison_expression.hpp"
 #include "duckdb/parser/expression/columnref_expression.hpp"
+#include "duckdb/common/shared_ptr.hpp"
 namespace duckdb {
@@ -16,7 +17,12 @@ void TableFunctionRelation::AddNamedParameter(const string &name, Value argument
 	named_parameters[name] = std::move(argument);
 }
-TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
+void TableFunctionRelation::SetNamedParameters(named_parameter_map_t &&options) {
+	D_ASSERT(named_parameters.empty());
+	named_parameters = std::move(options);
+}
+TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
                                              vector<Value> parameters_p, named_parameter_map_t named_parameters,
                                              shared_ptr<Relation> input_relation_p, bool auto_init)
     : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
@@ -25,7 +31,7 @@ TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext
 	InitializeColumns();
 }
-TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
+TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
                                              vector<Value> parameters_p, shared_ptr<Relation> input_relation_p,
                                              bool auto_init)
     : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),

package/src/duckdb/src/storage/checkpoint_manager.cpp CHANGED Viewed

@@ -131,11 +131,11 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
 		throw FatalException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
 	}
-	// truncate the WAL
-	wal->Truncate(0);
 	// truncate the file
 	block_manager.Truncate();
+	// truncate the WAL
+	wal->Truncate(0);
 }
 void CheckpointReader::LoadCheckpoint(ClientContext &context, MetadataReader &reader) {

package/src/duckdb/src/storage/table/table_statistics.cpp CHANGED Viewed

@@ -102,9 +102,7 @@ void TableStatistics::CopyStats(TableStatistics &other) {
 }
 void TableStatistics::Serialize(Serializer &serializer) const {
-	auto column_count = column_stats.size();
-	serializer.WriteList(100, "column_stats", column_count,
-	                     [&](Serializer::List &list, idx_t i) { list.WriteElement(column_stats[i]); });
+	serializer.WriteProperty(100, "column_stats", column_stats);
 }
 void TableStatistics::Deserialize(Deserializer &deserializer, ColumnList &columns) {

package/src/duckdb/src/storage/wal_replay.cpp CHANGED Viewed

@@ -57,7 +57,10 @@ bool WriteAheadLog::Replay(AttachedDatabase &database, string &path) {
 				deserializer.End();
 			}
 		}
-	} catch (std::exception &ex) { // LCOV_EXCL_START
+	} catch (SerializationException &ex) { // LCOV_EXCL_START
+		                                   // serialization exception - torn WAL
+		                                   // continue reading
+	} catch (std::exception &ex) {
 		Printer::PrintF("Exception in WAL playback during initial read: %s\n", ex.what());
 		return false;
 	} catch (...) {
@@ -104,7 +107,10 @@ bool WriteAheadLog::Replay(AttachedDatabase &database, string &path) {
 				deserializer.End();
 			}
 		}
-	} catch (std::exception &ex) { // LCOV_EXCL_START
+	} catch (SerializationException &ex) { // LCOV_EXCL_START
+		// serialization error during WAL replay: rollback
+		con.Rollback();
+	} catch (std::exception &ex) {
 		// FIXME: this should report a proper warning in the connection
 		Printer::PrintF("Exception in WAL playback: %s\n", ex.what());
 		// exception thrown in WAL replay: rollback