npm - duckdb - Versions diffs - 0.8.2-dev4514.0 → 0.8.2-dev4623.0 - Mend

duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -85,25 +85,6 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
 	}
 }
-uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
-	//! Const ht with accepted auto_types and their weights in specificity
-	const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
-	    {(uint8_t)LogicalTypeId::VARCHAR, 0},  {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
-	    {(uint8_t)LogicalTypeId::DATE, 2},     {(uint8_t)LogicalTypeId::TIME, 3},
-	    {(uint8_t)LogicalTypeId::DOUBLE, 4},   {(uint8_t)LogicalTypeId::FLOAT, 5},
-	    {(uint8_t)LogicalTypeId::BIGINT, 6},   {(uint8_t)LogicalTypeId::INTEGER, 7},
-	    {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
-	    {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
-	auto id = (uint8_t)candidate_type.id();
-	auto it = auto_type_candidates_specificity.find(id);
-	if (it == auto_type_candidates_specificity.end()) {
-		throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
-		                      EnumUtil::ToString(candidate_type.id()));
-	}
-	return it->second;
-}
 static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
                                             vector<LogicalType> &return_types, vector<string> &names) {
@@ -111,117 +92,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 	auto &options = result->options;
 	result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
-	bool explicitly_set_columns = false;
-	for (auto &kv : input.named_parameters) {
-		if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options, context)) {
-			continue;
-		}
-		auto loption = StringUtil::Lower(kv.first);
-		if (loption == "columns") {
-			explicitly_set_columns = true;
-			auto &child_type = kv.second.type();
-			if (child_type.id() != LogicalTypeId::STRUCT) {
-				throw BinderException("read_csv columns requires a struct as input");
-			}
-			auto &struct_children = StructValue::GetChildren(kv.second);
-			D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
-			for (idx_t i = 0; i < struct_children.size(); i++) {
-				auto &name = StructType::GetChildName(child_type, i);
-				auto &val = struct_children[i];
-				names.push_back(name);
-				if (val.type().id() != LogicalTypeId::VARCHAR) {
-					throw BinderException("read_csv requires a type specification as string");
-				}
-				return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
-			}
-			if (names.empty()) {
-				throw BinderException("read_csv requires at least a single column as input!");
-			}
-		} else if (loption == "auto_type_candidates") {
-			options.auto_type_candidates.clear();
-			map<uint8_t, LogicalType> candidate_types;
-			// We always have the extremes of Null and Varchar, so we can default to varchar if the
-			// sniffer is not able to confidently detect that column type
-			candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
-			candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
-			auto &child_type = kv.second.type();
-			if (child_type.id() != LogicalTypeId::LIST) {
-				throw BinderException("read_csv auto_types requires a list as input");
-			}
-			auto &list_children = ListValue::GetChildren(kv.second);
-			if (list_children.empty()) {
-				throw BinderException("auto_type_candidates requires at least one type");
-			}
-			for (auto &child : list_children) {
-				if (child.type().id() != LogicalTypeId::VARCHAR) {
-					throw BinderException("auto_type_candidates requires a type specification as string");
-				}
-				auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
-				candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
-			}
-			for (auto &candidate_type : candidate_types) {
-				options.auto_type_candidates.emplace_back(candidate_type.second);
-			}
-		} else if (loption == "column_names" || loption == "names") {
-			if (!options.name_list.empty()) {
-				throw BinderException("read_csv_auto column_names/names can only be supplied once");
-			}
-			if (kv.second.IsNull()) {
-				throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
-			}
-			auto &children = ListValue::GetChildren(kv.second);
-			for (auto &child : children) {
-				options.name_list.push_back(StringValue::Get(child));
-			}
-		} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
-			auto &child_type = kv.second.type();
-			if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
-				throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
-			}
-			if (!options.sql_type_list.empty()) {
-				throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
-			}
-			vector<string> sql_type_names;
-			if (child_type.id() == LogicalTypeId::STRUCT) {
-				auto &struct_children = StructValue::GetChildren(kv.second);
-				D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
-				for (idx_t i = 0; i < struct_children.size(); i++) {
-					auto &name = StructType::GetChildName(child_type, i);
-					auto &val = struct_children[i];
-					if (val.type().id() != LogicalTypeId::VARCHAR) {
-						throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
-					}
-					sql_type_names.push_back(StringValue::Get(val));
-					options.sql_types_per_column[name] = i;
-				}
-			} else {
-				auto &list_child = ListType::GetChildType(child_type);
-				if (list_child.id() != LogicalTypeId::VARCHAR) {
-					throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
-				}
-				auto &children = ListValue::GetChildren(kv.second);
-				for (auto &child : children) {
-					sql_type_names.push_back(StringValue::Get(child));
-				}
-			}
-			options.sql_type_list.reserve(sql_type_names.size());
-			for (auto &sql_type : sql_type_names) {
-				auto def_type = TransformStringToLogicalType(sql_type);
-				if (def_type.id() == LogicalTypeId::USER) {
-					throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
-					                      kv.first);
-				}
-				options.sql_type_list.push_back(std::move(def_type));
-			}
-		} else if (loption == "all_varchar") {
-			options.all_varchar = BooleanValue::Get(kv.second);
-		} else if (loption == "normalize_names") {
-			options.normalize_names = BooleanValue::Get(kv.second);
-		} else {
-			options.SetReadOption(loption, kv.second, names);
-		}
-	}
+	options.FromNamedParameters(input.named_parameters, context, return_types, names);
+	bool explicitly_set_columns = options.explicitly_set_columns;
 	options.file_options.AutoDetectHivePartitioning(result->files, context);
 	if (!options.auto_detect && return_types.empty()) {

package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp ADDED Viewed

@@ -0,0 +1,83 @@
+#include "duckdb/function/table/system_functions.hpp"
+#include "duckdb/catalog/catalog.hpp"
+#include "duckdb/storage/database_size.hpp"
+#include "duckdb/main/database_manager.hpp"
+#include "duckdb/function/function_set.hpp"
+namespace duckdb {
+struct PragmaMetadataFunctionData : public TableFunctionData {
+	explicit PragmaMetadataFunctionData() {
+	}
+	vector<MetadataBlockInfo> metadata_info;
+};
+struct PragmaMetadataOperatorData : public GlobalTableFunctionState {
+	PragmaMetadataOperatorData() : offset(0) {
+	}
+	idx_t offset;
+};
+static unique_ptr<FunctionData> PragmaMetadataInfoBind(ClientContext &context, TableFunctionBindInput &input,
+                                                       vector<LogicalType> &return_types, vector<string> &names) {
+	names.emplace_back("block_id");
+	return_types.emplace_back(LogicalType::BIGINT);
+	names.emplace_back("total_blocks");
+	return_types.emplace_back(LogicalType::BIGINT);
+	names.emplace_back("free_blocks");
+	return_types.emplace_back(LogicalType::BIGINT);
+	names.emplace_back("free_list");
+	return_types.emplace_back(LogicalType::LIST(LogicalType::BIGINT));
+	string db_name =
+	    input.inputs.empty() ? DatabaseManager::GetDefaultDatabase(context) : StringValue::Get(input.inputs[0]);
+	auto &catalog = Catalog::GetCatalog(context, db_name);
+	auto result = make_uniq<PragmaMetadataFunctionData>();
+	result->metadata_info = catalog.GetMetadataInfo(context);
+	return std::move(result);
+}
+unique_ptr<GlobalTableFunctionState> PragmaMetadataInfoInit(ClientContext &context, TableFunctionInitInput &input) {
+	return make_uniq<PragmaMetadataOperatorData>();
+}
+static void PragmaMetadataInfoFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
+	auto &bind_data = data_p.bind_data->Cast<PragmaMetadataFunctionData>();
+	auto &data = data_p.global_state->Cast<PragmaMetadataOperatorData>();
+	idx_t count = 0;
+	while (data.offset < bind_data.metadata_info.size() && count < STANDARD_VECTOR_SIZE) {
+		auto &entry = bind_data.metadata_info[data.offset++];
+		idx_t col_idx = 0;
+		// block_id
+		output.SetValue(col_idx++, count, Value::BIGINT(entry.block_id));
+		// total_blocks
+		output.SetValue(col_idx++, count, Value::BIGINT(entry.total_blocks));
+		// free_blocks
+		output.SetValue(col_idx++, count, Value::BIGINT(entry.free_list.size()));
+		// free_list
+		vector<Value> list_values;
+		for (auto &free_id : entry.free_list) {
+			list_values.push_back(Value::BIGINT(free_id));
+		}
+		output.SetValue(col_idx++, count, Value::LIST(LogicalType::BIGINT, std::move(list_values)));
+		count++;
+	}
+	output.SetCardinality(count);
+}
+void PragmaMetadataInfo::RegisterFunction(BuiltinFunctions &set) {
+	TableFunctionSet metadata_info("pragma_metadata_info");
+	metadata_info.AddFunction(
+	    TableFunction({}, PragmaMetadataInfoFunction, PragmaMetadataInfoBind, PragmaMetadataInfoInit));
+	metadata_info.AddFunction(TableFunction({LogicalType::VARCHAR}, PragmaMetadataInfoFunction, PragmaMetadataInfoBind,
+	                                        PragmaMetadataInfoInit));
+	set.AddFunction(metadata_info);
+}
+} // namespace duckdb

package/src/duckdb/src/function/table/system/pragma_storage_info.cpp CHANGED Viewed

@@ -76,6 +76,9 @@ static unique_ptr<FunctionData> PragmaStorageInfoBind(ClientContext &context, Ta
 	names.emplace_back("block_offset");
 	return_types.emplace_back(LogicalType::BIGINT);
+	names.emplace_back("segment_info");
+	return_types.emplace_back(LogicalType::VARCHAR);
 	auto qname = QualifiedName::Parse(input.inputs[0].GetValue<string>());
 	// look up the table name in the catalog
@@ -133,6 +136,8 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
 			output.SetValue(col_idx++, count, Value());
 			output.SetValue(col_idx++, count, Value());
 		}
+		// segment_info
+		output.SetValue(col_idx++, count, Value(entry.segment_info));
 		count++;
 	}
 	output.SetCardinality(count);

package/src/duckdb/src/function/table/system_functions.cpp CHANGED Viewed

@@ -14,6 +14,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
 	PragmaCollations::RegisterFunction(*this);
 	PragmaTableInfo::RegisterFunction(*this);
 	PragmaStorageInfo::RegisterFunction(*this);
+	PragmaMetadataInfo::RegisterFunction(*this);
 	PragmaDatabaseSize::RegisterFunction(*this);
 	PragmaLastProfilingOutput::RegisterFunction(*this);
 	PragmaDetailedProfilingOutput::RegisterFunction(*this);

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.8.2-dev4514"
+#define DUCKDB_VERSION "0.8.2-dev4623"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "38c6e8ccce"
+#define DUCKDB_SOURCE_ID "52a47a6b31"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/catalog/catalog.hpp CHANGED Viewed

@@ -34,6 +34,7 @@ struct CreateIndexInfo;
 struct CreateTypeInfo;
 struct CreateTableInfo;
 struct DatabaseSize;
+struct MetadataBlockInfo;
 class AttachedDatabase;
 class ClientContext;
@@ -266,6 +267,7 @@ public:
 	                                                    unique_ptr<LogicalOperator> plan) = 0;
 	virtual DatabaseSize GetDatabaseSize(ClientContext &context) = 0;
+	virtual vector<MetadataBlockInfo> GetMetadataInfo(ClientContext &context);
 	virtual bool InMemory() = 0;
 	virtual string GetDBPath() = 0;

package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp CHANGED Viewed

@@ -54,6 +54,7 @@ public:
 	                                                       unique_ptr<LogicalOperator> plan) override;
 	DatabaseSize GetDatabaseSize(ClientContext &context) override;
+	vector<MetadataBlockInfo> GetMetadataInfo(ClientContext &context) override;
 	DUCKDB_API bool InMemory() override;
 	DUCKDB_API string GetDBPath() override;

package/src/duckdb/src/include/duckdb/common/box_renderer.hpp CHANGED Viewed

@@ -18,7 +18,7 @@ class ColumnDataCollection;
 class ColumnDataRowCollection;
 enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT };
-enum class RenderMode { ROWS, COLUMNS };
+enum class RenderMode : uint8_t { ROWS, COLUMNS };
 struct BoxRendererConfig {
 	// a max_width of 0 means we default to the terminal width

package/src/duckdb/src/include/duckdb/common/enum_util.hpp CHANGED Viewed

@@ -216,6 +216,8 @@ enum class QuoteRule : uint8_t;
 enum class RelationType : uint8_t;
+enum class RenderMode : uint8_t;
 enum class ResultModifierType : uint8_t;
 enum class SampleMethod : uint8_t;
@@ -565,6 +567,9 @@ const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value);
 template<>
 const char* EnumUtil::ToChars<RelationType>(RelationType value);
+template<>
+const char* EnumUtil::ToChars<RenderMode>(RenderMode value);
 template<>
 const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value);
@@ -950,6 +955,9 @@ QuoteRule EnumUtil::FromString<QuoteRule>(const char *value);
 template<>
 RelationType EnumUtil::FromString<RelationType>(const char *value);
+template<>
+RenderMode EnumUtil::FromString<RenderMode>(const char *value);
 template<>
 ResultModifierType EnumUtil::FromString<ResultModifierType>(const char *value);

package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp CHANGED Viewed

@@ -15,10 +15,12 @@
 namespace duckdb {
 class ClientContext;
 class Catalog;
+class DatabaseInstance;
 enum class ExpressionType : uint8_t;
 struct DeserializationData {
 	stack<reference<ClientContext>> contexts;
+	stack<reference<DatabaseInstance>> databases;
 	stack<idx_t> enums;
 	stack<reference<bound_parameter_map_t>> parameter_data;
 	stack<reference<LogicalType>> types;
@@ -74,6 +76,23 @@ inline void DeserializationData::Unset<LogicalOperatorType>() {
 	enums.pop();
 }
+template <>
+inline void DeserializationData::Set(CompressionType type) {
+	enums.push(idx_t(type));
+}
+template <>
+inline CompressionType DeserializationData::Get() {
+	AssertNotEmpty(enums);
+	return CompressionType(enums.top());
+}
+template <>
+inline void DeserializationData::Unset<CompressionType>() {
+	AssertNotEmpty(enums);
+	enums.pop();
+}
 template <>
 inline void DeserializationData::Set(CatalogType type) {
 	enums.push(idx_t(type));
@@ -108,6 +127,23 @@ inline void DeserializationData::Unset<ClientContext>() {
 	contexts.pop();
 }
+template <>
+inline void DeserializationData::Set(DatabaseInstance &db) {
+	databases.push(db);
+}
+template <>
+inline DatabaseInstance &DeserializationData::Get() {
+	AssertNotEmpty(databases);
+	return databases.top();
+}
+template <>
+inline void DeserializationData::Unset<DatabaseInstance>() {
+	AssertNotEmpty(databases);
+	databases.pop();
+}
 template <>
 inline void DeserializationData::Set(bound_parameter_map_t &context) {
 	parameter_data.push(context);

package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp CHANGED Viewed

@@ -159,18 +159,33 @@ struct CSVReaderOptions {
 	string suffix;
 	string write_newline;
+	//! The date format to use (if any is specified)
+	map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
 	//! The date format to use for writing (if any is specified)
 	map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
+	//! Whether or not a type format is specified
+	map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
 	void Serialize(Serializer &serializer) const;
 	static CSVReaderOptions Deserialize(Deserializer &deserializer);
 	void SetCompression(const string &compression);
+	bool GetHeader() const;
 	void SetHeader(bool has_header);
+	string GetEscape() const;
 	void SetEscape(const string &escape);
+	int64_t GetSkipRows() const;
+	void SetSkipRows(int64_t rows);
+	string GetQuote() const;
 	void SetQuote(const string &quote);
 	void SetDelimiter(const string &delimiter);
+	string GetDelimiter() const;
+	NewLineIdentifier GetNewline() const;
 	void SetNewline(const string &input);
 	//! Set an option that is supported by both reading and writing functions, called by
 	//! the SetReadOption and SetWriteOption methods
@@ -182,7 +197,16 @@ struct CSVReaderOptions {
 	void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
 	void SetWriteOption(const string &loption, const Value &value);
 	void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
+	void ToNamedParameters(named_parameter_map_t &out);
+	void FromNamedParameters(named_parameter_map_t &in, ClientContext &context, vector<LogicalType> &return_types,
+	                         vector<string> &names);
 	string ToString() const;
+	named_parameter_map_t OutputReadSettings();
+public:
+	//! Whether columns were explicitly provided through named parameters
+	bool explicitly_set_columns = false;
 };
 } // namespace duckdb

package/src/duckdb/src/include/duckdb/function/compression_function.hpp CHANGED Viewed

@@ -14,6 +14,7 @@
 #include "duckdb/common/map.hpp"
 #include "duckdb/storage/storage_info.hpp"
 #include "duckdb/common/mutex.hpp"
+#include "duckdb/storage/data_pointer.hpp"
 namespace duckdb {
 class DatabaseInstance;
@@ -21,6 +22,7 @@ class ColumnData;
 class ColumnDataCheckpointer;
 class ColumnSegment;
 class SegmentStatistics;
+struct ColumnSegmentState;
 struct ColumnFetchState;
 struct ColumnScanState;
@@ -62,6 +64,11 @@ struct CompressedSegmentState {
 	virtual ~CompressedSegmentState() {
 	}
+	//! Display info for PRAGMA storage_info
+	virtual string GetSegmentInfo() const { // LCOV_EXCL_START
+		return "";
+	} // LCOV_EXCL_STOP
 	template <class TARGET>
 	TARGET &Cast() {
 		D_ASSERT(dynamic_cast<TARGET *>(this));
@@ -75,7 +82,7 @@ struct CompressedSegmentState {
 };
 struct CompressionAppendState {
-	CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
+	explicit CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
 	}
 	virtual ~CompressionAppendState() {
 	}
@@ -139,13 +146,24 @@ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &stat
 //===--------------------------------------------------------------------===//
 // Append (optional)
 //===--------------------------------------------------------------------===//
-typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
+typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(
+    ColumnSegment &segment, block_id_t block_id, optional_ptr<ColumnSegmentState> segment_state);
 typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
 typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
                                       SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
 typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
 typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
+//===--------------------------------------------------------------------===//
+// Serialization (optional)
+//===--------------------------------------------------------------------===//
+//! Function prototype for serializing the segment state
+typedef unique_ptr<ColumnSegmentState> (*compression_serialize_state_t)(ColumnSegment &segment);
+//! Function prototype for deserializing the segment state
+typedef unique_ptr<ColumnSegmentState> (*compression_deserialize_state_t)(Deserializer &deserializer);
+//! Function prototype for cleaning up the segment state when the column data is dropped
+typedef void (*compression_cleanup_state_t)(ColumnSegment &segment);
 class CompressionFunction {
 public:
 	CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
@@ -157,12 +175,16 @@ public:
 	                    compression_init_segment_t init_segment = nullptr,
 	                    compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
 	                    compression_finalize_append_t finalize_append = nullptr,
-	                    compression_revert_append_t revert_append = nullptr)
+	                    compression_revert_append_t revert_append = nullptr,
+	                    compression_serialize_state_t serialize_state = nullptr,
+	                    compression_deserialize_state_t deserialize_state = nullptr,
+	                    compression_cleanup_state_t cleanup_state = nullptr)
 	    : type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
 	      init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
 	      init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
 	      init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
-	      revert_append(revert_append) {
+	      revert_append(revert_append), serialize_state(serialize_state), deserialize_state(deserialize_state),
+	      cleanup_state(cleanup_state) {
 	}
 	//! Compression type
@@ -218,6 +240,16 @@ public:
 	compression_finalize_append_t finalize_append;
 	//! Revert append (optional)
 	compression_revert_append_t revert_append;
+	// State serialize functions
+	//! This is only necessary if the segment state has information that must be written to disk in the metadata
+	//! Serialize the segment state to the metadata (optional)
+	compression_serialize_state_t serialize_state;
+	//! Deserialize the segment state to the metadata (optional)
+	compression_deserialize_state_t deserialize_state;
+	//! Cleanup the segment state (optional)
+	compression_cleanup_state_t cleanup_state;
 };
 //! The set of compression functions

package/src/duckdb/src/include/duckdb/function/table/arrow.hpp CHANGED Viewed

@@ -129,6 +129,8 @@ public:
 	//! Scan Function
 	static void ArrowScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output);
+	static void PopulateArrowTableType(ArrowTableType &arrow_table, ArrowSchemaWrapper &schema_p, vector<string> &names,
+	                                   vector<LogicalType> &return_types);
 protected:
 	//! Defines Maximum Number of Threads

package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp CHANGED Viewed

@@ -25,6 +25,10 @@ struct PragmaStorageInfo {
 	static void RegisterFunction(BuiltinFunctions &set);
 };
+struct PragmaMetadataInfo {
+	static void RegisterFunction(BuiltinFunctions &set);
+};
 struct PragmaLastProfilingOutput {
 	static void RegisterFunction(BuiltinFunctions &set);
 };

package/src/duckdb/src/include/duckdb/main/connection.hpp CHANGED Viewed

@@ -131,7 +131,7 @@ public:
 	//! Reads CSV file
 	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
-	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, CSVReaderOptions &options);
+	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, named_parameter_map_t &&options);
 	DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
 	//! Reads Parquet file

package/src/duckdb/src/include/duckdb/main/extension_entries.hpp CHANGED Viewed

@@ -118,6 +118,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
     {"st_dwithin_spheroid", "spatial"},
     {"st_envelope", "spatial"},
     {"st_equals", "spatial"},
+    {"st_extent", "spatial"},
     {"st_flipcoordinates", "spatial"},
     {"st_geometrytype", "spatial"},
     {"st_geomfromgeojson", "spatial"},
@@ -126,6 +127,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
     {"st_geomfromtext", "spatial"},
     {"st_geomfromwkb", "spatial"},
     {"st_intersection", "spatial"},
+    {"st_intersection_agg", "spatial"},
     {"st_intersects", "spatial"},
     {"st_isclosed", "spatial"},
     {"st_isempty", "spatial"},
@@ -159,9 +161,14 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
     {"st_touches", "spatial"},
     {"st_transform", "spatial"},
     {"st_union", "spatial"},
+    {"st_union_agg", "spatial"},
     {"st_within", "spatial"},
     {"st_x", "spatial"},
+    {"st_xmax", "spatial"},
+    {"st_xmin", "spatial"},
     {"st_y", "spatial"},
+    {"st_ymax", "spatial"},
+    {"st_ymin", "spatial"},
     {"stem", "fts"},
     {"text", "excel"},
     {"to_arrow_ipc", "arrow"},
@@ -220,10 +227,9 @@ static constexpr ExtensionEntry EXTENSION_FILE_PREFIXES[] = {
 // Note: these are currently hardcoded in scripts/generate_extensions_function.py
 // TODO: automate by passing though to script via duckdb
-static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {{".parquet", "parquet"},
-                                                              {".json", "json"},
-                                                              {".jsonl", "json"},
-                                                              {".ndjson", "json"}}; // END_OF_EXTENSION_FILE_POSTFIXES
+static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {
+    {".parquet", "parquet"}, {".json", "json"},    {".jsonl", "json"}, {".ndjson", "json"},
+    {".shp", "spatial"},     {".gpkg", "spatial"}, {".fgb", "spatial"}}; // END_OF_EXTENSION_FILE_POSTFIXES
 // Note: these are currently hardcoded in scripts/generate_extensions_function.py
 // TODO: automate by passing though to script via duckdb

package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp CHANGED Viewed

@@ -10,16 +10,16 @@
 #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
 #include "duckdb/main/relation/table_function_relation.hpp"
+#include "duckdb/common/shared_ptr.hpp"
+#include "duckdb/common/case_insensitive_map.hpp"
 namespace duckdb {
-struct CSVReaderOptions;
 class ReadCSVRelation : public TableFunctionRelation {
 public:
 	ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, vector<ColumnDefinition> columns,
 	                string alias = string());
-	ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, CSVReaderOptions options,
+	ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, named_parameter_map_t &&options,
 	                string alias = string());
 	string alias;

package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp CHANGED Viewed

@@ -35,6 +35,7 @@ public:
 	string ToString(idx_t depth) override;
 	string GetAlias() override;
 	void AddNamedParameter(const string &name, Value argument);
+	void SetNamedParameters(named_parameter_map_t &&named_parameters);
 private:
 	void InitializeColumns();