npm - duckdb - Versions diffs - 0.8.2-dev4514.0 → 0.8.2-dev4623.0 - Mend

duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/lib/duckdb.js CHANGED Viewed

@@ -412,6 +412,13 @@ Connection.prototype.register_buffer;
  */
 Connection.prototype.unregister_buffer;
+/**
+ * Closes connection
+ * @method
+ * @param callback
+ * @return {void}
+ */
+Connection.prototype.close;
 /**
  * Closes database instance
@@ -420,7 +427,10 @@ Connection.prototype.unregister_buffer;
  * @return {void}
  */
 Database.prototype.close = function() {
-    this.default_connection = null
+    if (this.default_connection) {
+        this.default_connection.close(); // this queues up a job in the internals, which blocks the below close call
+        this.default_connection = null;
+    }
     this.close_internal.apply(this, arguments);
 };

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.8.2-dev4514.0",
+  "version": "0.8.2-dev4623.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {
@@ -29,12 +29,14 @@
   "devDependencies": {
     "@types/chai": "^4.3.4",
     "@types/chai-as-promised": "^7.1.5",
+    "@types/fs-extra": "^11.0.1",
     "@types/mocha": "^10.0.0",
     "@types/node": "^18.11.0",
     "apache-arrow": "^9.0.0",
     "aws-sdk": "^2.790.0",
     "chai": "^4.3.6",
     "chai-as-promised": "^7.1.1",
+    "fs-extra": "^11.1.1",
     "jsdoc3-parser": "^2.0.0",
     "mocha": "^8.3.0",
     "ts-node": "^10.9.1",

package/src/connection.cpp CHANGED Viewed

@@ -12,13 +12,13 @@ namespace node_duckdb {
 Napi::FunctionReference Connection::Init(Napi::Env env, Napi::Object exports) {
 	Napi::HandleScope scope(env);
-	Napi::Function t =
-	    DefineClass(env, "Connection",
-	                {InstanceMethod("prepare", &Connection::Prepare), InstanceMethod("exec", &Connection::Exec),
-	                 InstanceMethod("register_udf_bulk", &Connection::RegisterUdf),
-	                 InstanceMethod("register_buffer", &Connection::RegisterBuffer),
-	                 InstanceMethod("unregister_udf", &Connection::UnregisterUdf),
-	                 InstanceMethod("unregister_buffer", &Connection::UnRegisterBuffer)});
+	Napi::Function t = DefineClass(
+	    env, "Connection",
+	    {InstanceMethod("prepare", &Connection::Prepare), InstanceMethod("exec", &Connection::Exec),
+	     InstanceMethod("register_udf_bulk", &Connection::RegisterUdf),
+	     InstanceMethod("register_buffer", &Connection::RegisterBuffer),
+	     InstanceMethod("unregister_udf", &Connection::UnregisterUdf), InstanceMethod("close", &Connection::Close),
+	     InstanceMethod("unregister_buffer", &Connection::UnRegisterBuffer)});
 	exports.Set("Connection", t);
@@ -407,6 +407,36 @@ struct ExecTaskWithCallback : public ExecTask {
 	std::function<void(void)> cpp_callback;
 };
+struct CloseConnectionTask : public Task {
+	CloseConnectionTask(Connection &connection, Napi::Function callback) : Task(connection, callback) {
+	}
+	void DoWork() override {
+		auto &connection = Get<Connection>();
+		if (connection.connection) {
+			connection.connection.reset();
+			success = true;
+		} else {
+			success = false;
+		}
+	}
+	void Callback() override {
+		auto &connection = Get<Connection>();
+		auto env = connection.Env();
+		Napi::HandleScope scope(env);
+		auto cb = callback.Value();
+		if (!success) {
+			cb.MakeCallback(connection.Value(), {Utils::CreateError(env, "Connection was already closed")});
+			return;
+		}
+		cb.MakeCallback(connection.Value(), {env.Null(), connection.Value()});
+	}
+	bool success = false;
+};
 Napi::Value Connection::Exec(const Napi::CallbackInfo &info) {
 	auto env = info.Env();
@@ -512,6 +542,17 @@ Napi::Value Connection::UnRegisterBuffer(const Napi::CallbackInfo &info) {
 	return Value();
 }
+Napi::Value Connection::Close(const Napi::CallbackInfo &info) {
+	Napi::Function callback;
+	if (info.Length() > 0 && info[0].IsFunction()) {
+		callback = info[0].As<Napi::Function>();
+	}
+	database_ref->Schedule(info.Env(), duckdb::make_uniq<CloseConnectionTask>(*this, callback));
+	return info.Env().Undefined();
+}
 Napi::Object Connection::NewInstance(const Napi::Value &db) {
 	return NodeDuckDB::GetData(db.Env())->connection_constructor.New({db});
 }

package/src/duckdb/src/catalog/catalog.cpp CHANGED Viewed

@@ -35,6 +35,7 @@
 #include "duckdb/main/database_manager.hpp"
 #include "duckdb/function/built_in_functions.hpp"
 #include "duckdb/catalog/similar_catalog_entry.hpp"
+#include "duckdb/storage/database_size.hpp"
 #include <algorithm>
 namespace duckdb {
@@ -831,6 +832,10 @@ void Catalog::Alter(ClientContext &context, AlterInfo &info) {
 	return lookup.schema->Alter(context, info);
 }
+vector<MetadataBlockInfo> Catalog::GetMetadataInfo(ClientContext &context) {
+	return vector<MetadataBlockInfo>();
+}
 void Catalog::Verify() {
 }

package/src/duckdb/src/catalog/duck_catalog.cpp CHANGED Viewed

@@ -132,6 +132,10 @@ DatabaseSize DuckCatalog::GetDatabaseSize(ClientContext &context) {
 	return db.GetStorageManager().GetDatabaseSize();
 }
+vector<MetadataBlockInfo> DuckCatalog::GetMetadataInfo(ClientContext &context) {
+	return db.GetStorageManager().GetMetadataInfo();
+}
 bool DuckCatalog::InMemory() {
 	return db.GetStorageManager().InMemory();
 }

package/src/duckdb/src/common/enum_util.cpp CHANGED Viewed

@@ -11,6 +11,7 @@
 #include "duckdb/common/enum_util.hpp"
 #include "duckdb/catalog/catalog_entry/table_column_type.hpp"
+#include "duckdb/common/box_renderer.hpp"
 #include "duckdb/common/enums/access_mode.hpp"
 #include "duckdb/common/enums/aggregate_handling.hpp"
 #include "duckdb/common/enums/catalog_type.hpp"
@@ -4797,6 +4798,29 @@ RelationType EnumUtil::FromString<RelationType>(const char *value) {
 	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
 }
+template<>
+const char* EnumUtil::ToChars<RenderMode>(RenderMode value) {
+	switch(value) {
+	case RenderMode::ROWS:
+		return "ROWS";
+	case RenderMode::COLUMNS:
+		return "COLUMNS";
+	default:
+		throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
+	}
+}
+template<>
+RenderMode EnumUtil::FromString<RenderMode>(const char *value) {
+	if (StringUtil::Equals(value, "ROWS")) {
+		return RenderMode::ROWS;
+	}
+	if (StringUtil::Equals(value, "COLUMNS")) {
+		return RenderMode::COLUMNS;
+	}
+	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
+}
 template<>
 const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value) {
 	switch(value) {

package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp CHANGED Viewed

@@ -2,6 +2,8 @@
 #include "duckdb/common/bind_helpers.hpp"
 #include "duckdb/common/vector_size.hpp"
 #include "duckdb/common/string_util.hpp"
+#include "duckdb/common/enum_util.hpp"
+#include "duckdb/common/multi_file_reader.hpp"
 namespace duckdb {
@@ -60,6 +62,10 @@ static int64_t ParseInteger(const Value &value, const string &loption) {
 	return value.GetValue<int64_t>();
 }
+bool CSVReaderOptions::GetHeader() const {
+	return this->dialect_options.header;
+}
 void CSVReaderOptions::SetHeader(bool input) {
 	this->dialect_options.header = input;
 	this->has_header = true;
@@ -69,6 +75,10 @@ void CSVReaderOptions::SetCompression(const string &compression_p) {
 	this->compression = FileCompressionTypeFromString(compression_p);
 }
+string CSVReaderOptions::GetEscape() const {
+	return std::string(1, this->dialect_options.state_machine_options.escape);
+}
 void CSVReaderOptions::SetEscape(const string &input) {
 	auto escape_str = input;
 	if (escape_str.size() > 1) {
@@ -81,6 +91,19 @@ void CSVReaderOptions::SetEscape(const string &input) {
 	this->has_escape = true;
 }
+int64_t CSVReaderOptions::GetSkipRows() const {
+	return this->dialect_options.skip_rows;
+}
+void CSVReaderOptions::SetSkipRows(int64_t skip_rows) {
+	dialect_options.skip_rows = skip_rows;
+	skip_rows_set = true;
+}
+string CSVReaderOptions::GetDelimiter() const {
+	return std::string(1, this->dialect_options.state_machine_options.delimiter);
+}
 void CSVReaderOptions::SetDelimiter(const string &input) {
 	auto delim_str = StringUtil::Replace(input, "\\t", "\t");
 	if (delim_str.size() > 1) {
@@ -93,6 +116,10 @@ void CSVReaderOptions::SetDelimiter(const string &input) {
 	this->dialect_options.state_machine_options.delimiter = delim_str[0];
 }
+string CSVReaderOptions::GetQuote() const {
+	return std::string(1, this->dialect_options.state_machine_options.quote);
+}
 void CSVReaderOptions::SetQuote(const string &quote_p) {
 	auto quote_str = quote_p;
 	if (quote_str.size() > 1) {
@@ -105,6 +132,10 @@ void CSVReaderOptions::SetQuote(const string &quote_p) {
 	this->has_quote = true;
 }
+NewLineIdentifier CSVReaderOptions::GetNewline() const {
+	return dialect_options.new_line;
+}
 void CSVReaderOptions::SetNewline(const string &input) {
 	if (input == "\\n" || input == "\\r") {
 		dialect_options.new_line = NewLineIdentifier::SINGLE;
@@ -152,8 +183,7 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
 			sample_chunks = sample_size / STANDARD_VECTOR_SIZE + 1;
 		}
 	} else if (loption == "skip") {
-		dialect_options.skip_rows = ParseInteger(value, loption);
-		skip_rows_set = true;
+		SetSkipRows(ParseInteger(value, loption));
 	} else if (loption == "max_line_size" || loption == "maximum_line_size") {
 		maximum_line_size = ParseInteger(value, loption);
 	} else if (loption == "sample_chunk_size") {
@@ -296,4 +326,185 @@ string CSVReaderOptions::ToString() const {
 	       "\n  ignore_errors=" + std::to_string(ignore_errors) + "\n  all_varchar=" + std::to_string(all_varchar);
 }
+static Value StringVectorToValue(const vector<string> &vec) {
+	vector<Value> content;
+	content.reserve(vec.size());
+	for (auto &item : vec) {
+		content.push_back(Value(item));
+	}
+	return Value::LIST(std::move(content));
+}
+static uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
+	//! Const ht with accepted auto_types and their weights in specificity
+	const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
+	    {(uint8_t)LogicalTypeId::VARCHAR, 0},  {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
+	    {(uint8_t)LogicalTypeId::DATE, 2},     {(uint8_t)LogicalTypeId::TIME, 3},
+	    {(uint8_t)LogicalTypeId::DOUBLE, 4},   {(uint8_t)LogicalTypeId::FLOAT, 5},
+	    {(uint8_t)LogicalTypeId::BIGINT, 6},   {(uint8_t)LogicalTypeId::INTEGER, 7},
+	    {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
+	    {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
+	auto id = (uint8_t)candidate_type.id();
+	auto it = auto_type_candidates_specificity.find(id);
+	if (it == auto_type_candidates_specificity.end()) {
+		throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
+		                      EnumUtil::ToString(candidate_type.id()));
+	}
+	return it->second;
+}
+void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientContext &context,
+                                           vector<LogicalType> &return_types, vector<string> &names) {
+	for (auto &kv : in) {
+		if (MultiFileReader::ParseOption(kv.first, kv.second, file_options, context)) {
+			continue;
+		}
+		auto loption = StringUtil::Lower(kv.first);
+		if (loption == "columns") {
+			explicitly_set_columns = true;
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::STRUCT) {
+				throw BinderException("read_csv columns requires a struct as input");
+			}
+			auto &struct_children = StructValue::GetChildren(kv.second);
+			D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
+			for (idx_t i = 0; i < struct_children.size(); i++) {
+				auto &name = StructType::GetChildName(child_type, i);
+				auto &val = struct_children[i];
+				names.push_back(name);
+				if (val.type().id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("read_csv requires a type specification as string");
+				}
+				return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
+			}
+			if (names.empty()) {
+				throw BinderException("read_csv requires at least a single column as input!");
+			}
+		} else if (loption == "auto_type_candidates") {
+			auto_type_candidates.clear();
+			map<uint8_t, LogicalType> candidate_types;
+			// We always have the extremes of Null and Varchar, so we can default to varchar if the
+			// sniffer is not able to confidently detect that column type
+			candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
+			candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::LIST) {
+				throw BinderException("read_csv auto_types requires a list as input");
+			}
+			auto &list_children = ListValue::GetChildren(kv.second);
+			if (list_children.empty()) {
+				throw BinderException("auto_type_candidates requires at least one type");
+			}
+			for (auto &child : list_children) {
+				if (child.type().id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("auto_type_candidates requires a type specification as string");
+				}
+				auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
+				candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
+			}
+			for (auto &candidate_type : candidate_types) {
+				auto_type_candidates.emplace_back(candidate_type.second);
+			}
+		} else if (loption == "column_names" || loption == "names") {
+			if (!name_list.empty()) {
+				throw BinderException("read_csv_auto column_names/names can only be supplied once");
+			}
+			if (kv.second.IsNull()) {
+				throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
+			}
+			auto &children = ListValue::GetChildren(kv.second);
+			for (auto &child : children) {
+				name_list.push_back(StringValue::Get(child));
+			}
+		} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
+				throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
+			}
+			if (!sql_type_list.empty()) {
+				throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
+			}
+			vector<string> sql_type_names;
+			if (child_type.id() == LogicalTypeId::STRUCT) {
+				auto &struct_children = StructValue::GetChildren(kv.second);
+				D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
+				for (idx_t i = 0; i < struct_children.size(); i++) {
+					auto &name = StructType::GetChildName(child_type, i);
+					auto &val = struct_children[i];
+					if (val.type().id() != LogicalTypeId::VARCHAR) {
+						throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
+					}
+					sql_type_names.push_back(StringValue::Get(val));
+					sql_types_per_column[name] = i;
+				}
+			} else {
+				auto &list_child = ListType::GetChildType(child_type);
+				if (list_child.id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
+				}
+				auto &children = ListValue::GetChildren(kv.second);
+				for (auto &child : children) {
+					sql_type_names.push_back(StringValue::Get(child));
+				}
+			}
+			sql_type_list.reserve(sql_type_names.size());
+			for (auto &sql_type : sql_type_names) {
+				auto def_type = TransformStringToLogicalType(sql_type);
+				if (def_type.id() == LogicalTypeId::USER) {
+					throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
+					                      kv.first);
+				}
+				sql_type_list.push_back(std::move(def_type));
+			}
+		} else if (loption == "all_varchar") {
+			all_varchar = BooleanValue::Get(kv.second);
+		} else if (loption == "normalize_names") {
+			normalize_names = BooleanValue::Get(kv.second);
+		} else {
+			SetReadOption(loption, kv.second, names);
+		}
+	}
+}
+//! This function is used to remember options set by the sniffer, for use in ReadCSVRelation
+void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) {
+	if (has_delimiter) {
+		named_params["delim"] = Value(GetDelimiter());
+	}
+	if (has_newline) {
+		named_params["newline"] = Value(EnumUtil::ToString(GetNewline()));
+	}
+	if (has_quote) {
+		named_params["quote"] = Value(GetQuote());
+	}
+	if (has_escape) {
+		named_params["escape"] = Value(GetEscape());
+	}
+	if (has_header) {
+		named_params["header"] = Value(GetHeader());
+	}
+	named_params["max_line_size"] = Value::BIGINT(maximum_line_size);
+	if (skip_rows_set) {
+		named_params["skip"] = Value::BIGINT(GetSkipRows());
+	}
+	named_params["sample_chunks"] = Value::BIGINT(sample_chunks);
+	named_params["sample_chunk_size"] = Value::BIGINT(sample_chunk_size);
+	named_params["null_padding"] = Value::BOOLEAN(null_padding);
+	if (!date_format.at(LogicalType::DATE).format_specifier.empty()) {
+		named_params["dateformat"] = Value(date_format.at(LogicalType::DATE).format_specifier);
+	}
+	if (!date_format.at(LogicalType::TIMESTAMP).format_specifier.empty()) {
+		named_params["timestampformat"] = Value(date_format.at(LogicalType::TIMESTAMP).format_specifier);
+	}
+	named_params["normalize_names"] = Value::BOOLEAN(normalize_names);
+	if (!name_list.empty()) {
+		named_params["column_names"] = StringVectorToValue(name_list);
+	}
+	named_params["all_varchar"] = Value::BOOLEAN(all_varchar);
+	named_params["maximum_line_size"] = Value::BIGINT(maximum_line_size);
+}
 } // namespace duckdb

package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp CHANGED Viewed

@@ -6,6 +6,7 @@
 #include "duckdb/storage/table_io_manager.hpp"
 #include "duckdb/transaction/local_storage.hpp"
 #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
+#include "duckdb/transaction/duck_transaction.hpp"
 #include "duckdb/storage/table/append_state.hpp"
 #include "duckdb/storage/table/scan_state.hpp"
@@ -119,6 +120,7 @@ public:
 	idx_t insert_count;
 	vector<RowGroupBatchEntry> collections;
 	idx_t next_start = 0;
+	bool optimistically_written = false;
 	void FindMergeCollections(idx_t min_batch_index, optional_idx &merged_batch_index,
 	                          vector<unique_ptr<RowGroupCollection>> &result) {
@@ -176,10 +178,12 @@ public:
 	unique_ptr<RowGroupCollection> MergeCollections(ClientContext &context,
 	                                                vector<unique_ptr<RowGroupCollection>> merge_collections,
 	                                                OptimisticDataWriter &writer) {
+		D_ASSERT(!merge_collections.empty());
 		CollectionMerger merger(context);
 		for (auto &collection : merge_collections) {
 			merger.AddCollection(std::move(collection));
 		}
+		optimistically_written = true;
 		return merger.Flush(writer);
 	}
@@ -373,48 +377,65 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
                                                OperatorSinkFinalizeInput &input) const {
 	auto &gstate = input.global_state.Cast<BatchInsertGlobalState>();
-	// in the finalize, do a final pass over all of the collections we created and try to merge smaller collections
-	// together
-	vector<unique_ptr<CollectionMerger>> mergers;
-	unique_ptr<CollectionMerger> current_merger;
-	auto &storage = gstate.table.GetStorage();
-	for (auto &entry : gstate.collections) {
-		if (entry.type == RowGroupBatchType::NOT_FLUSHED) {
-			// this collection has not been flushed: add it to the merge set
-			if (!current_merger) {
-				current_merger = make_uniq<CollectionMerger>(context);
-			}
-			current_merger->AddCollection(std::move(entry.collection));
-		} else {
-			// this collection has been flushed: it does not need to be merged
-			// create a separate collection merger only for this entry
-			if (current_merger) {
-				// we have small collections remaining: flush them
-				mergers.push_back(std::move(current_merger));
-				current_merger.reset();
+	if (gstate.optimistically_written || gstate.insert_count >= LocalStorage::MERGE_THRESHOLD) {
+		// we have written data to disk optimistically or are inserting a large amount of data
+		// perform a final pass over all of the row groups and merge them together
+		vector<unique_ptr<CollectionMerger>> mergers;
+		unique_ptr<CollectionMerger> current_merger;
+		auto &storage = gstate.table.GetStorage();
+		for (auto &entry : gstate.collections) {
+			if (entry.type == RowGroupBatchType::NOT_FLUSHED) {
+				// this collection has not been flushed: add it to the merge set
+				if (!current_merger) {
+					current_merger = make_uniq<CollectionMerger>(context);
+				}
+				current_merger->AddCollection(std::move(entry.collection));
+			} else {
+				// this collection has been flushed: it does not need to be merged
+				// create a separate collection merger only for this entry
+				if (current_merger) {
+					// we have small collections remaining: flush them
+					mergers.push_back(std::move(current_merger));
+					current_merger.reset();
+				}
+				auto larger_merger = make_uniq<CollectionMerger>(context);
+				larger_merger->AddCollection(std::move(entry.collection));
+				mergers.push_back(std::move(larger_merger));
 			}
-			auto larger_merger = make_uniq<CollectionMerger>(context);
-			larger_merger->AddCollection(std::move(entry.collection));
-			mergers.push_back(std::move(larger_merger));
 		}
-	}
-	if (current_merger) {
-		mergers.push_back(std::move(current_merger));
-	}
+		if (current_merger) {
+			mergers.push_back(std::move(current_merger));
+		}
-	// now that we have created all of the mergers, perform the actual merging
-	vector<unique_ptr<RowGroupCollection>> final_collections;
-	final_collections.reserve(mergers.size());
-	auto &writer = storage.CreateOptimisticWriter(context);
-	for (auto &merger : mergers) {
-		final_collections.push_back(merger->Flush(writer));
-	}
-	storage.FinalizeOptimisticWriter(context, writer);
+		// now that we have created all of the mergers, perform the actual merging
+		vector<unique_ptr<RowGroupCollection>> final_collections;
+		final_collections.reserve(mergers.size());
+		auto &writer = storage.CreateOptimisticWriter(context);
+		for (auto &merger : mergers) {
+			final_collections.push_back(merger->Flush(writer));
+		}
+		storage.FinalizeOptimisticWriter(context, writer);
-	// finally, merge the row groups into the local storage
-	for (auto &collection : final_collections) {
-		storage.LocalMerge(context, *collection);
+		// finally, merge the row groups into the local storage
+		for (auto &collection : final_collections) {
+			storage.LocalMerge(context, *collection);
+		}
+	} else {
+		// we are writing a small amount of data to disk
+		// append directly to transaction local storage
+		auto &table = gstate.table;
+		auto &storage = table.GetStorage();
+		LocalAppendState append_state;
+		storage.InitializeLocalAppend(append_state, context);
+		auto &transaction = DuckTransaction::Get(context, table.catalog);
+		for (auto &entry : gstate.collections) {
+			entry.collection->Scan(transaction, [&](DataChunk &insert_chunk) {
+				storage.LocalAppend(append_state, table, context, insert_chunk);
+				return true;
+			});
+		}
+		storage.FinalizeLocalAppend(append_state);
 	}
 	return SinkFinalizeType::READY;
 }

package/src/duckdb/src/function/pragma/pragma_queries.cpp CHANGED Viewed

@@ -187,9 +187,14 @@ string PragmaStorageInfo(ClientContext &context, const FunctionParameters &param
 	return StringUtil::Format("SELECT * FROM pragma_storage_info('%s');", parameters.values[0].ToString());
 }
+string PragmaMetadataInfo(ClientContext &context, const FunctionParameters &parameters) {
+	return "SELECT * FROM pragma_metadata_info();";
+}
 void PragmaQueries::RegisterFunction(BuiltinFunctions &set) {
 	set.AddFunction(PragmaFunction::PragmaCall("table_info", PragmaTableInfo, {LogicalType::VARCHAR}));
 	set.AddFunction(PragmaFunction::PragmaCall("storage_info", PragmaStorageInfo, {LogicalType::VARCHAR}));
+	set.AddFunction(PragmaFunction::PragmaCall("metadata_info", PragmaMetadataInfo, {}));
 	set.AddFunction(PragmaFunction::PragmaStatement("show_tables", PragmaShowTables));
 	set.AddFunction(PragmaFunction::PragmaStatement("show_tables_expanded", PragmaShowTablesExpanded));
 	set.AddFunction(PragmaFunction::PragmaStatement("show_databases", PragmaShowDatabases));

package/src/duckdb/src/function/table/arrow.cpp CHANGED Viewed

@@ -208,18 +208,10 @@ void ArrowTableFunction::RenameArrowColumns(vector<string> &names) {
 	}
 }
-unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
-                                                           vector<LogicalType> &return_types, vector<string> &names) {
-	auto stream_factory_ptr = input.inputs[0].GetPointer();
-	auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer();       // NOLINT
-	auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer(); // NOLINT
-	auto res = make_uniq<ArrowScanFunctionData>(stream_factory_produce, stream_factory_ptr);
-	auto &data = *res;
-	stream_factory_get_schema(stream_factory_ptr, data.schema_root);
-	for (idx_t col_idx = 0; col_idx < (idx_t)data.schema_root.arrow_schema.n_children; col_idx++) {
-		auto &schema = *data.schema_root.arrow_schema.children[col_idx];
+void ArrowTableFunction::PopulateArrowTableType(ArrowTableType &arrow_table, ArrowSchemaWrapper &schema_p,
+                                                vector<string> &names, vector<LogicalType> &return_types) {
+	for (idx_t col_idx = 0; col_idx < (idx_t)schema_p.arrow_schema.n_children; col_idx++) {
+		auto &schema = *schema_p.arrow_schema.children[col_idx];
 		if (!schema.release) {
 			throw InvalidInputException("arrow_scan: released schema passed");
 		}
@@ -233,7 +225,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
 		} else {
 			return_types.emplace_back(arrow_type->GetDuckType());
 		}
-		res->arrow_table.AddColumn(col_idx, std::move(arrow_type));
+		arrow_table.AddColumn(col_idx, std::move(arrow_type));
 		auto format = string(schema.format);
 		auto name = string(schema.name);
 		if (name.empty()) {
@@ -241,6 +233,19 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
 		}
 		names.push_back(name);
 	}
+}
+unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
+                                                           vector<LogicalType> &return_types, vector<string> &names) {
+	auto stream_factory_ptr = input.inputs[0].GetPointer();
+	auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer();       // NOLINT
+	auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer(); // NOLINT
+	auto res = make_uniq<ArrowScanFunctionData>(stream_factory_produce, stream_factory_ptr);
+	auto &data = *res;
+	stream_factory_get_schema(stream_factory_ptr, data.schema_root);
+	PopulateArrowTableType(res->arrow_table, data.schema_root, names, return_types);
 	RenameArrowColumns(names);
 	res->all_types = return_types;
 	return std::move(res);