npm - duckdb - Versions diffs - 0.7.2-dev3353.0 → 0.7.2-dev3441.0 - Mend

duckdb 0.7.2-dev3353.0 → 0.7.2-dev3441.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/src/duckdb/src/main/database.cpp CHANGED Viewed

@@ -230,7 +230,10 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf
 	if (!config.options.database_type.empty()) {
 		// if we are opening an extension database - load the extension
-		ExtensionHelper::LoadExternalExtension(*this, nullptr, config.options.database_type);
+		if (!config.file_system) {
+			throw InternalException("No file system!?");
+		}
+		ExtensionHelper::LoadExternalExtension(*this, *config.file_system, config.options.database_type);
 	}
 	if (!config.options.unrecognized_options.empty()) {

package/src/duckdb/src/main/extension/extension_install.cpp CHANGED Viewed

@@ -38,7 +38,7 @@ const vector<string> ExtensionHelper::PathComponents() {
 	return vector<string> {".duckdb", "extensions", GetVersionDirectoryName(), DuckDB::Platform()};
 }
-string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, FileOpener *opener) {
+string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs) {
 #ifdef WASM_LOADABLE_EXTENSIONS
 	static_assertion(0, "ExtensionDirectory functionality is not supported in duckdb-wasm");
 #endif
@@ -49,7 +49,7 @@ string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, Fil
 		// convert random separators to platform-canonic
 		extension_directory = fs.ConvertSeparators(extension_directory);
 		// expand ~ in extension directory
-		extension_directory = fs.ExpandPath(extension_directory, opener);
+		extension_directory = fs.ExpandPath(extension_directory);
 		if (!fs.DirectoryExists(extension_directory)) {
 			auto sep = fs.PathSeparator();
 			auto splits = StringUtil::Split(extension_directory, sep);
@@ -66,7 +66,7 @@ string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, Fil
 			}
 		}
 	} else { // otherwise default to home
-		string home_directory = fs.GetHomeDirectory(opener);
+		string home_directory = fs.GetHomeDirectory();
 		// exception if the home directory does not exist, don't create whatever we think is home
 		if (!fs.DirectoryExists(home_directory)) {
 			throw IOException("Can't find the home directory at '%s'\nSpecify a home directory using the SET "
@@ -90,8 +90,7 @@ string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, Fil
 string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
 	auto &config = DBConfig::GetConfig(context);
 	auto &fs = FileSystem::GetFileSystem(context);
-	auto opener = FileSystem::GetFileOpener(context);
-	return ExtensionDirectory(config, fs, opener);
+	return ExtensionDirectory(config, fs);
 }
 bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &message) {
@@ -118,7 +117,7 @@ void ExtensionHelper::InstallExtension(DBConfig &config, FileSystem &fs, const s
 	// Install is currently a no-op
 	return;
 #endif
-	string local_path = ExtensionDirectory(config, fs, nullptr);
+	string local_path = ExtensionDirectory(config, fs);
 	InstallExtensionInternal(config, nullptr, fs, local_path, extension, force_install);
 }

package/src/duckdb/src/main/extension/extension_load.cpp CHANGED Viewed

@@ -44,24 +44,22 @@ static void ComputeSHA256FileSegment(FileHandle *handle, const idx_t start, cons
 	ComputeSHA256String(file_content, res);
 }
-bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileOpener *opener, const string &extension,
+bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const string &extension,
                                      ExtensionInitResult &result, string &error) {
 	if (!config.options.enable_external_access) {
 		throw PermissionException("Loading external extensions is disabled through configuration");
 	}
-	VirtualFileSystem fallback_file_system; // config may not contain one yet
-	auto &fs = config.file_system ? *config.file_system : fallback_file_system;
 	auto filename = fs.ConvertSeparators(extension);
 	// shorthand case
 	if (!ExtensionHelper::IsFullPath(extension)) {
-		string local_path = !config.options.extension_directory.empty() ? config.options.extension_directory
-		                                                                : fs.GetHomeDirectory(opener);
+		string local_path =
+		    !config.options.extension_directory.empty() ? config.options.extension_directory : fs.GetHomeDirectory();
 		// convert random separators to platform-canonic
 		local_path = fs.ConvertSeparators(local_path);
 		// expand ~ in extension directory
-		local_path = fs.ExpandPath(local_path, opener);
+		local_path = fs.ExpandPath(local_path);
 		auto path_components = PathComponents();
 		for (auto &path_ele : path_components) {
 			local_path = fs.JoinPath(local_path, path_ele);
@@ -201,20 +199,17 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileOpener *opener, const
 	return true;
 }
-ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileOpener *opener, const string &extension) {
+ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileSystem &fs, const string &extension) {
 	string error;
 	ExtensionInitResult result;
-	if (!TryInitialLoad(config, opener, extension, result, error)) {
+	if (!TryInitialLoad(config, fs, extension, result, error)) {
 		if (!ExtensionHelper::AllowAutoInstall(extension)) {
 			throw IOException(error);
 		}
 		// the extension load failed - try installing the extension
-		if (!config.file_system) {
-			throw InternalException("Attempting to install an extension without a file system");
-		}
-		ExtensionHelper::InstallExtension(config, *config.file_system, extension, false);
+		ExtensionHelper::InstallExtension(config, fs, extension, false);
 		// try loading again
-		if (!TryInitialLoad(config, nullptr, extension, result, error)) {
+		if (!TryInitialLoad(config, fs, extension, result, error)) {
 			throw IOException(error);
 		}
 	}
@@ -242,12 +237,12 @@ string ExtensionHelper::GetExtensionName(const string &original_name) {
 	return ExtensionHelper::ApplyExtensionAlias(splits.front());
 }
-void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileOpener *opener, const string &extension) {
+void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileSystem &fs, const string &extension) {
 	if (db.ExtensionIsLoaded(extension)) {
 		return;
 	}
-	auto res = InitialLoad(DBConfig::GetConfig(db), opener, extension);
+	auto res = InitialLoad(DBConfig::GetConfig(db), fs, extension);
 	auto init_fun_name = res.basename + "_init";
 	ext_init_fun_t init_fun;
@@ -264,7 +259,7 @@ void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileOpener *op
 }
 void ExtensionHelper::LoadExternalExtension(ClientContext &context, const string &extension) {
-	LoadExternalExtension(DatabaseInstance::GetDatabase(context), FileSystem::GetFileOpener(context), extension);
+	LoadExternalExtension(DatabaseInstance::GetDatabase(context), FileSystem::GetFileSystem(context), extension);
 }
 string ExtensionHelper::ExtractExtensionPrefixFromPath(const string &path) {

package/src/duckdb/src/main/settings/settings.cpp CHANGED Viewed

@@ -735,9 +735,8 @@ void LogQueryPathSetting::SetLocal(ClientContext &context, const Value &input) {
 		// empty path: clean up query writer
 		client_data.log_query_writer = nullptr;
 	} else {
-		client_data.log_query_writer =
-		    make_uniq<BufferedFileWriter>(FileSystem::GetFileSystem(context), path,
-		                                  BufferedFileWriter::DEFAULT_OPEN_FLAGS, client_data.file_opener.get());
+		client_data.log_query_writer = make_uniq<BufferedFileWriter>(FileSystem::GetFileSystem(context), path,
+		                                                             BufferedFileWriter::DEFAULT_OPEN_FLAGS);
 	}
 }

package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp CHANGED Viewed

@@ -574,7 +574,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalO
 	D_ASSERT(node.set.count == 1);
 	auto relation_id = node.set.relations[0];
-	double lowest_card_found = NumericLimits<double>::Maximum();
+	double lowest_card_found = node.GetBaseTableCardinality();
 	for (auto &column : relation_attributes[relation_id].columns) {
 		auto card_after_filters = node.GetBaseTableCardinality();
 		ColumnBinding key = ColumnBinding(relation_id, column);

package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp CHANGED Viewed

@@ -147,6 +147,18 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
 			}
 		}
 	}
+	if (op->type == LogicalOperatorType::LOGICAL_ANY_JOIN && non_reorderable_operation) {
+		auto &join = op->Cast<LogicalAnyJoin>();
+		if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
+			auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
+			auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
+			if (rhs_cardinality > lhs_cardinality * 2) {
+				join.join_type = JoinType::RIGHT;
+				std::swap(join.children[0], join.children[1]);
+			}
+		}
+	}
 	if (non_reorderable_operation) {
 		// we encountered a non-reordable operation (setop or non-inner join)
 		// we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
@@ -648,7 +660,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
 			// we have to add a cross product; we add it between the two smallest relations
 			optional_ptr<JoinNode> smallest_plans[2];
 			idx_t smallest_index[2];
-			for (idx_t i = 0; i < join_relations.size(); i++) {
+			D_ASSERT(join_relations.size() >= 2);
+			// first just add the first two join relations. It doesn't matter the cost as the JOO
+			// will swap them on estimated cardinality anyway.
+			for (idx_t i = 0; i < 2; i++) {
+				auto current_plan = plans[&join_relations[i].get()].get();
+				smallest_plans[i] = current_plan;
+				smallest_index[i] = i;
+			}
+			// if there are any other join relations that don't have connections
+			// add them if they have lower estimated cardinality.
+			for (idx_t i = 2; i < join_relations.size(); i++) {
 				// get the plan for this relation
 				auto current_plan = plans[&join_relations[i].get()].get();
 				// check if the cardinality is smaller than the smallest two found so far

package/src/duckdb/src/planner/binder/statement/bind_insert.cpp CHANGED Viewed

@@ -25,6 +25,7 @@
 #include "duckdb/planner/tableref/bound_basetableref.hpp"
 #include "duckdb/planner/tableref/bound_dummytableref.hpp"
 #include "duckdb/parser/parsed_expression_iterator.hpp"
+#include "duckdb/storage/table_storage_info.hpp"
 namespace duckdb {
@@ -78,10 +79,10 @@ void ReplaceColumnBindings(Expression &expr, idx_t source, idx_t dest) {
 	    expr, [&](unique_ptr<Expression> &child) { ReplaceColumnBindings(*child, source, dest); });
 }
-void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
-                                        TableCatalogEntry &table) {
-	D_ASSERT(insert->children.size() == 1);
-	D_ASSERT(insert->children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
+void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
+                                        TableCatalogEntry &table, TableStorageInfo &storage_info) {
+	D_ASSERT(insert.children.size() == 1);
+	D_ASSERT(insert.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
 	vector<column_t> logical_column_ids;
 	vector<string> column_names;
@@ -97,13 +98,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 		if (column.Generated()) {
 			throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
 		}
-		if (std::find(insert->set_columns.begin(), insert->set_columns.end(), column.Physical()) !=
-		    insert->set_columns.end()) {
+		if (std::find(insert.set_columns.begin(), insert.set_columns.end(), column.Physical()) !=
+		    insert.set_columns.end()) {
 			throw BinderException("Multiple assignments to same column \"%s\"", colname);
 		}
-		insert->set_columns.push_back(column.Physical());
+		insert.set_columns.push_back(column.Physical());
 		logical_column_ids.push_back(column.Oid());
-		insert->set_types.push_back(column.Type());
+		insert.set_types.push_back(column.Type());
 		column_names.push_back(colname);
 		if (expr->type == ExpressionType::VALUE_DEFAULT) {
 			expr = ExpandDefaultExpression(column);
@@ -120,14 +121,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 			throw BinderException("Expression in the DO UPDATE SET clause can not be a subquery");
 		}
-		insert->expressions.push_back(std::move(bound_expr));
+		insert.expressions.push_back(std::move(bound_expr));
 	}
 	// Figure out which columns are indexed on
 	unordered_set<column_t> indexed_columns;
-	auto &indexes = table.GetStorage().info->indexes.Indexes();
-	for (auto &index : indexes) {
-		for (auto &column_id : index->column_id_set) {
+	for (auto &index : storage_info.index_info) {
+		for (auto &column_id : index.column_set) {
 			indexed_columns.insert(column_id);
 		}
 	}
@@ -142,16 +142,16 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 	}
 }
-unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert) {
+unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert,
+                                                  TableStorageInfo &storage_info) {
 	auto set_info = make_uniq<UpdateSetInfo>();
 	auto &columns = set_info->columns;
 	// Figure out which columns are indexed on
 	unordered_set<column_t> indexed_columns;
-	auto &indexes = table.GetStorage().info->indexes.Indexes();
-	for (auto &index : indexes) {
-		for (auto &column_id : index->column_id_set) {
+	for (auto &index : storage_info.index_info) {
+		for (auto &column_id : index.column_set) {
 			indexed_columns.insert(column_id);
 		}
 	}
@@ -190,9 +190,6 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		insert.action_type = OnConflictAction::THROW;
 		return;
 	}
-	if (!table.IsDuckTable()) {
-		throw BinderException("ON CONFLICT clause is not yet supported for non-DuckDB tables");
-	}
 	D_ASSERT(stmt.table_ref->type == TableReferenceType::BASE_TABLE);
 	// visit the table reference
@@ -208,6 +205,9 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 	D_ASSERT(on_conflict.action_type != OnConflictAction::THROW);
 	insert.action_type = on_conflict.action_type;
+	// obtain the table storage info
+	auto storage_info = table.GetStorageInfo(context);
 	auto &columns = table.GetColumns();
 	if (!on_conflict.indexed_columns.empty()) {
 		// Bind the ON CONFLICT (<columns>)
@@ -232,18 +232,17 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 				insert.on_conflict_filter.insert(col.Oid());
 			}
 		}
-		auto &indexes = table.GetStorage().info->indexes;
 		bool index_references_columns = false;
-		indexes.Scan([&](Index &index) {
-			if (!index.IsUnique()) {
-				return false;
+		for (auto &index : storage_info.index_info) {
+			if (!index.is_unique) {
+				continue;
 			}
-			bool index_matches = insert.on_conflict_filter == index.column_id_set;
+			bool index_matches = insert.on_conflict_filter == index.column_set;
 			if (index_matches) {
 				index_references_columns = true;
+				break;
 			}
-			return index_matches;
-		});
+		}
 		if (!index_references_columns) {
 			// Same as before, this is essentially a no-op, turning this into a DO THROW instead
 			// But since this makes no logical sense, it's probably better to throw an error
@@ -254,21 +253,19 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		// When omitting the conflict target, the ON CONFLICT applies to every UNIQUE/PRIMARY KEY on the table
 		// We check if there are any constraints on the table, if there aren't we throw an error.
-		auto &indexes = table.GetStorage().info->indexes;
 		idx_t found_matching_indexes = 0;
-		indexes.Scan([&](Index &index) {
-			if (!index.IsUnique()) {
-				return false;
+		for (auto &index : storage_info.index_info) {
+			if (!index.is_unique) {
+				continue;
 			}
 			// does this work with multi-column indexes?
-			auto &indexed_columns = index.column_id_set;
+			auto &indexed_columns = index.column_set;
 			for (auto &column : table.GetColumns().Physical()) {
 				if (indexed_columns.count(column.Physical().index)) {
 					found_matching_indexes++;
 				}
 			}
-			return false;
-		});
+		}
 		if (!found_matching_indexes) {
 			throw BinderException(
 			    "There are no UNIQUE/PRIMARY KEY Indexes that refer to this table, ON CONFLICT is a no-op");
@@ -338,7 +335,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 	if (insert.action_type == OnConflictAction::REPLACE) {
 		D_ASSERT(on_conflict.set_info == nullptr);
-		on_conflict.set_info = CreateSetInfoForReplace(table, stmt);
+		on_conflict.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
 		insert.action_type = OnConflictAction::UPDATE;
 	}
 	if (on_conflict.set_info && on_conflict.set_info->columns.empty()) {
@@ -374,7 +371,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		insert.do_update_condition = std::move(condition);
 	}
-	BindDoUpdateSetExpressions(table_alias, &insert, set_info, table);
+	BindDoUpdateSetExpressions(table_alias, insert, set_info, table, storage_info);
 	// Get the column_ids we need to fetch later on from the conflicting tuples
 	// of the original table, to execute the expressions

package/src/duckdb/src/storage/table/row_group_collection.cpp CHANGED Viewed

@@ -526,43 +526,59 @@ void RowGroupCollection::Update(TransactionData transaction, row_t *ids, const v
 void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count) {
 	auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
-	// figure out which row_group to fetch from
-	auto row_group = row_groups->GetSegment(row_ids[0]);
-	auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE;
-	auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
-	// create a selection vector from the row_ids
-	SelectionVector sel(STANDARD_VECTOR_SIZE);
-	for (idx_t i = 0; i < count; i++) {
-		auto row_in_vector = row_ids[i] - base_row_id;
-		D_ASSERT(row_in_vector < STANDARD_VECTOR_SIZE);
-		sel.set_index(i, row_in_vector);
-	}
-	// now fetch the columns from that row_group
-	TableScanState state;
-	state.table_state.max_row = row_start + total_rows;
+	// initialize the fetch state
 	// FIXME: we do not need to fetch all columns, only the columns required by the indices!
+	TableScanState state;
 	vector<column_t> column_ids;
 	column_ids.reserve(types.size());
 	for (idx_t i = 0; i < types.size(); i++) {
 		column_ids.push_back(i);
 	}
 	state.Initialize(std::move(column_ids));
+	state.table_state.max_row = row_start + total_rows;
+	// initialize the fetch chunk
 	DataChunk result;
 	result.Initialize(GetAllocator(), types);
-	state.table_state.Initialize(GetTypes());
-	row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
-	row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
-	result.Slice(sel, count);
+	SelectionVector sel(STANDARD_VECTOR_SIZE);
+	// now iterate over the row ids
+	for (idx_t r = 0; r < count;) {
+		result.Reset();
+		// figure out which row_group to fetch from
+		auto row_id = row_ids[r];
+		auto row_group = row_groups->GetSegment(row_id);
+		auto row_group_vector_idx = (row_id - row_group->start) / STANDARD_VECTOR_SIZE;
+		auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
+		// fetch the current vector
+		state.table_state.Initialize(GetTypes());
+		row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
+		row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
+		result.Verify();
+		// check for any remaining row ids if they also fall into this vector
+		// we try to fetch handle as many rows as possible at the same time
+		idx_t sel_count = 0;
+		for (; r < count; r++) {
+			idx_t current_row = idx_t(row_ids[r]);
+			if (current_row < base_row_id || current_row >= base_row_id + result.size()) {
+				// this row-id does not fall into the current chunk - break
+				break;
+			}
+			auto row_in_vector = current_row - base_row_id;
+			D_ASSERT(row_in_vector < result.size());
+			sel.set_index(sel_count++, row_in_vector);
+		}
+		D_ASSERT(sel_count > 0);
+		// slice the vector with all rows that are present in this vector and erase from the index
+		result.Slice(sel, sel_count);
-	indexes.Scan([&](Index &index) {
-		index.Delete(result, row_identifiers);
-		return false;
-	});
+		indexes.Scan([&](Index &index) {
+			index.Delete(result, row_identifiers);
+			return false;
+		});
+	}
 }
 void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,