npm - duckdb - Versions diffs - 0.7.2-dev3402.0 → 0.7.2-dev3441.0 - Mend

duckdb 0.7.2-dev3402.0 → 0.7.2-dev3441.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.7.2-dev3402.0",
+  "version": "0.7.2-dev3441.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp CHANGED Viewed

@@ -721,9 +721,9 @@ TableStorageInfo DuckTableEntry::GetStorageInfo(ClientContext &context) {
 	storage->info->indexes.Scan([&](Index &index) {
 		IndexInfo info;
 		info.is_primary = index.IsPrimary();
-		info.is_unique = index.IsUnique();
+		info.is_unique = index.IsUnique() || info.is_primary;
 		info.is_foreign = index.IsForeign();
-		index.column_id_set = index.column_id_set;
+		info.column_set = index.column_id_set;
 		result.index_info.push_back(std::move(info));
 		return false;
 	});

package/src/duckdb/src/catalog/catalog_set.cpp CHANGED Viewed

@@ -621,7 +621,7 @@ void CatalogSet::Undo(CatalogEntry &entry) {
 		auto &dependency_manager = catalog.GetDependencyManager();
 		dependency_manager.EraseObject(to_be_removed_node);
 	}
-	if (entry.name != to_be_removed_node.name) {
+	if (!StringUtil::CIEquals(entry.name, to_be_removed_node.name)) {
 		// rename: clean up the new name when the rename is rolled back
 		auto removed_entry = mapping.find(to_be_removed_node.name);
 		if (removed_entry->second->child) {

package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp CHANGED Viewed

@@ -60,11 +60,15 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
 	idx_t true_count = 0;
 	SelectionVector true_sel(elem_cnt);
-	auto lambda_values = FlatVector::GetData<bool>(lambda_vector);
-	auto &lambda_validity = FlatVector::Validity(lambda_vector);
+	UnifiedVectorFormat lambda_data;
+	lambda_vector.ToUnifiedFormat(elem_cnt, lambda_data);
+	auto lambda_values = (bool *)lambda_data.data;
+	auto &lambda_validity = lambda_data.validity;
 	// compute the new lengths and offsets, and create a selection vector
 	for (idx_t i = 0; i < elem_cnt; i++) {
+		auto entry = lambda_data.sel->get_index(i);
 		while (appended_lists_cnt < lists_len.size() && lists_len[appended_lists_cnt] == 0) {
 			result_entries[appended_lists_cnt].offset = curr_list_offset;
@@ -73,12 +77,11 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
 		}
 		// found a true value
-		if (lambda_validity.RowIsValid(i)) {
-			if (lambda_values[i] > 0) {
-				true_sel.set_index(true_count++, i);
-				curr_list_len++;
-			}
+		if (lambda_validity.RowIsValid(entry) && lambda_values[entry] > 0) {
+			true_sel.set_index(true_count++, i);
+			curr_list_len++;
 		}
 		curr_original_list_len++;
 		if (lists_len[appended_lists_cnt] == curr_original_list_len) {

package/src/duckdb/src/execution/expression_executor/execute_between.cpp CHANGED Viewed

@@ -74,6 +74,9 @@ static idx_t BetweenLoopTypeSwitch(Vector &input, Vector &lower, Vector &upper,
 	case PhysicalType::VARCHAR:
 		return TernaryExecutor::Select<string_t, string_t, string_t, OP>(input, lower, upper, sel, count, true_sel,
 		                                                                 false_sel);
+	case PhysicalType::INTERVAL:
+		return TernaryExecutor::Select<interval_t, interval_t, interval_t, OP>(input, lower, upper, sel, count,
+		                                                                       true_sel, false_sel);
 	default:
 		throw InvalidTypeException(input.GetType(), "Invalid type for BETWEEN");
 	}

package/src/duckdb/src/execution/operator/join/physical_index_join.cpp CHANGED Viewed

@@ -213,6 +213,8 @@ OperatorResultType PhysicalIndexJoin::ExecuteInternal(ExecutionContext &context,
 		state.lhs_idx = 0;
 		state.rhs_idx = 0;
 		state.first_fetch = true;
+		// reset the LHS chunk to reset the validity masks
+		state.join_keys.Reset();
 		return OperatorResultType::NEED_MORE_INPUT;
 	}
 	//! Output vectors

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.7.2-dev3402"
+#define DUCKDB_VERSION "0.7.2-dev3441"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "6f543cb464"
+#define DUCKDB_SOURCE_ID "e97702367a"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/planner/binder.hpp CHANGED Viewed

@@ -39,6 +39,7 @@ class LogicalProjection;
 class ColumnList;
 class ExternalDependency;
 class TableFunction;
+class TableStorageInfo;
 struct CreateInfo;
 struct BoundCreateTableInfo;
@@ -167,8 +168,8 @@ public:
 	unique_ptr<LogicalOperator> BindUpdateSet(LogicalOperator &op, unique_ptr<LogicalOperator> root,
 	                                          UpdateSetInfo &set_info, TableCatalogEntry &table,
 	                                          vector<PhysicalIndex> &columns);
-	void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
-	                                TableCatalogEntry &table);
+	void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
+	                                TableCatalogEntry &table, TableStorageInfo &storage_info);
 	void BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt);
 	static void BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema);

package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp CHANGED Viewed

@@ -8,7 +8,9 @@
 #pragma once
+#include "duckdb/storage/storage_info.hpp"
 #include "duckdb/common/types/value.hpp"
+#include "duckdb/common/unordered_set.hpp"
 namespace duckdb {

package/src/duckdb/src/main/client_context.cpp CHANGED Viewed

@@ -1013,6 +1013,7 @@ void ClientContext::TryBindRelation(Relation &relation, vector<ColumnDefinition>
 	D_ASSERT(!relation.GetAlias().empty());
 	D_ASSERT(!relation.ToString().empty());
 #endif
+	client_data->http_state = make_uniq<HTTPState>();
 	RunFunctionInTransaction([&]() {
 		// bind the expressions
 		auto binder = Binder::CreateBinder(*this);

package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp CHANGED Viewed

@@ -574,7 +574,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalO
 	D_ASSERT(node.set.count == 1);
 	auto relation_id = node.set.relations[0];
-	double lowest_card_found = NumericLimits<double>::Maximum();
+	double lowest_card_found = node.GetBaseTableCardinality();
 	for (auto &column : relation_attributes[relation_id].columns) {
 		auto card_after_filters = node.GetBaseTableCardinality();
 		ColumnBinding key = ColumnBinding(relation_id, column);

package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp CHANGED Viewed

@@ -147,6 +147,18 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
 			}
 		}
 	}
+	if (op->type == LogicalOperatorType::LOGICAL_ANY_JOIN && non_reorderable_operation) {
+		auto &join = op->Cast<LogicalAnyJoin>();
+		if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
+			auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
+			auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
+			if (rhs_cardinality > lhs_cardinality * 2) {
+				join.join_type = JoinType::RIGHT;
+				std::swap(join.children[0], join.children[1]);
+			}
+		}
+	}
 	if (non_reorderable_operation) {
 		// we encountered a non-reordable operation (setop or non-inner join)
 		// we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
@@ -648,7 +660,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
 			// we have to add a cross product; we add it between the two smallest relations
 			optional_ptr<JoinNode> smallest_plans[2];
 			idx_t smallest_index[2];
-			for (idx_t i = 0; i < join_relations.size(); i++) {
+			D_ASSERT(join_relations.size() >= 2);
+			// first just add the first two join relations. It doesn't matter the cost as the JOO
+			// will swap them on estimated cardinality anyway.
+			for (idx_t i = 0; i < 2; i++) {
+				auto current_plan = plans[&join_relations[i].get()].get();
+				smallest_plans[i] = current_plan;
+				smallest_index[i] = i;
+			}
+			// if there are any other join relations that don't have connections
+			// add them if they have lower estimated cardinality.
+			for (idx_t i = 2; i < join_relations.size(); i++) {
 				// get the plan for this relation
 				auto current_plan = plans[&join_relations[i].get()].get();
 				// check if the cardinality is smaller than the smallest two found so far

package/src/duckdb/src/planner/binder/statement/bind_insert.cpp CHANGED Viewed

@@ -25,6 +25,7 @@
 #include "duckdb/planner/tableref/bound_basetableref.hpp"
 #include "duckdb/planner/tableref/bound_dummytableref.hpp"
 #include "duckdb/parser/parsed_expression_iterator.hpp"
+#include "duckdb/storage/table_storage_info.hpp"
 namespace duckdb {
@@ -78,10 +79,10 @@ void ReplaceColumnBindings(Expression &expr, idx_t source, idx_t dest) {
 	    expr, [&](unique_ptr<Expression> &child) { ReplaceColumnBindings(*child, source, dest); });
 }
-void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
-                                        TableCatalogEntry &table) {
-	D_ASSERT(insert->children.size() == 1);
-	D_ASSERT(insert->children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
+void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
+                                        TableCatalogEntry &table, TableStorageInfo &storage_info) {
+	D_ASSERT(insert.children.size() == 1);
+	D_ASSERT(insert.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
 	vector<column_t> logical_column_ids;
 	vector<string> column_names;
@@ -97,13 +98,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 		if (column.Generated()) {
 			throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
 		}
-		if (std::find(insert->set_columns.begin(), insert->set_columns.end(), column.Physical()) !=
-		    insert->set_columns.end()) {
+		if (std::find(insert.set_columns.begin(), insert.set_columns.end(), column.Physical()) !=
+		    insert.set_columns.end()) {
 			throw BinderException("Multiple assignments to same column \"%s\"", colname);
 		}
-		insert->set_columns.push_back(column.Physical());
+		insert.set_columns.push_back(column.Physical());
 		logical_column_ids.push_back(column.Oid());
-		insert->set_types.push_back(column.Type());
+		insert.set_types.push_back(column.Type());
 		column_names.push_back(colname);
 		if (expr->type == ExpressionType::VALUE_DEFAULT) {
 			expr = ExpandDefaultExpression(column);
@@ -120,14 +121,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 			throw BinderException("Expression in the DO UPDATE SET clause can not be a subquery");
 		}
-		insert->expressions.push_back(std::move(bound_expr));
+		insert.expressions.push_back(std::move(bound_expr));
 	}
 	// Figure out which columns are indexed on
 	unordered_set<column_t> indexed_columns;
-	auto &indexes = table.GetStorage().info->indexes.Indexes();
-	for (auto &index : indexes) {
-		for (auto &column_id : index->column_id_set) {
+	for (auto &index : storage_info.index_info) {
+		for (auto &column_id : index.column_set) {
 			indexed_columns.insert(column_id);
 		}
 	}
@@ -142,16 +142,16 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 	}
 }
-unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert) {
+unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert,
+                                                  TableStorageInfo &storage_info) {
 	auto set_info = make_uniq<UpdateSetInfo>();
 	auto &columns = set_info->columns;
 	// Figure out which columns are indexed on
 	unordered_set<column_t> indexed_columns;
-	auto &indexes = table.GetStorage().info->indexes.Indexes();
-	for (auto &index : indexes) {
-		for (auto &column_id : index->column_id_set) {
+	for (auto &index : storage_info.index_info) {
+		for (auto &column_id : index.column_set) {
 			indexed_columns.insert(column_id);
 		}
 	}
@@ -190,9 +190,6 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		insert.action_type = OnConflictAction::THROW;
 		return;
 	}
-	if (!table.IsDuckTable()) {
-		throw BinderException("ON CONFLICT clause is not yet supported for non-DuckDB tables");
-	}
 	D_ASSERT(stmt.table_ref->type == TableReferenceType::BASE_TABLE);
 	// visit the table reference
@@ -208,6 +205,9 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 	D_ASSERT(on_conflict.action_type != OnConflictAction::THROW);
 	insert.action_type = on_conflict.action_type;
+	// obtain the table storage info
+	auto storage_info = table.GetStorageInfo(context);
 	auto &columns = table.GetColumns();
 	if (!on_conflict.indexed_columns.empty()) {
 		// Bind the ON CONFLICT (<columns>)
@@ -232,18 +232,17 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 				insert.on_conflict_filter.insert(col.Oid());
 			}
 		}
-		auto &indexes = table.GetStorage().info->indexes;
 		bool index_references_columns = false;
-		indexes.Scan([&](Index &index) {
-			if (!index.IsUnique()) {
-				return false;
+		for (auto &index : storage_info.index_info) {
+			if (!index.is_unique) {
+				continue;
 			}
-			bool index_matches = insert.on_conflict_filter == index.column_id_set;
+			bool index_matches = insert.on_conflict_filter == index.column_set;
 			if (index_matches) {
 				index_references_columns = true;
+				break;
 			}
-			return index_matches;
-		});
+		}
 		if (!index_references_columns) {
 			// Same as before, this is essentially a no-op, turning this into a DO THROW instead
 			// But since this makes no logical sense, it's probably better to throw an error
@@ -254,21 +253,19 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		// When omitting the conflict target, the ON CONFLICT applies to every UNIQUE/PRIMARY KEY on the table
 		// We check if there are any constraints on the table, if there aren't we throw an error.
-		auto &indexes = table.GetStorage().info->indexes;
 		idx_t found_matching_indexes = 0;
-		indexes.Scan([&](Index &index) {
-			if (!index.IsUnique()) {
-				return false;
+		for (auto &index : storage_info.index_info) {
+			if (!index.is_unique) {
+				continue;
 			}
 			// does this work with multi-column indexes?
-			auto &indexed_columns = index.column_id_set;
+			auto &indexed_columns = index.column_set;
 			for (auto &column : table.GetColumns().Physical()) {
 				if (indexed_columns.count(column.Physical().index)) {
 					found_matching_indexes++;
 				}
 			}
-			return false;
-		});
+		}
 		if (!found_matching_indexes) {
 			throw BinderException(
 			    "There are no UNIQUE/PRIMARY KEY Indexes that refer to this table, ON CONFLICT is a no-op");
@@ -338,7 +335,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 	if (insert.action_type == OnConflictAction::REPLACE) {
 		D_ASSERT(on_conflict.set_info == nullptr);
-		on_conflict.set_info = CreateSetInfoForReplace(table, stmt);
+		on_conflict.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
 		insert.action_type = OnConflictAction::UPDATE;
 	}
 	if (on_conflict.set_info && on_conflict.set_info->columns.empty()) {
@@ -374,7 +371,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		insert.do_update_condition = std::move(condition);
 	}
-	BindDoUpdateSetExpressions(table_alias, &insert, set_info, table);
+	BindDoUpdateSetExpressions(table_alias, insert, set_info, table, storage_info);
 	// Get the column_ids we need to fetch later on from the conflicting tuples
 	// of the original table, to execute the expressions

package/src/duckdb/src/storage/table/row_group_collection.cpp CHANGED Viewed

@@ -526,43 +526,59 @@ void RowGroupCollection::Update(TransactionData transaction, row_t *ids, const v
 void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count) {
 	auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
-	// figure out which row_group to fetch from
-	auto row_group = row_groups->GetSegment(row_ids[0]);
-	auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE;
-	auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
-	// create a selection vector from the row_ids
-	SelectionVector sel(STANDARD_VECTOR_SIZE);
-	for (idx_t i = 0; i < count; i++) {
-		auto row_in_vector = row_ids[i] - base_row_id;
-		D_ASSERT(row_in_vector < STANDARD_VECTOR_SIZE);
-		sel.set_index(i, row_in_vector);
-	}
-	// now fetch the columns from that row_group
-	TableScanState state;
-	state.table_state.max_row = row_start + total_rows;
+	// initialize the fetch state
 	// FIXME: we do not need to fetch all columns, only the columns required by the indices!
+	TableScanState state;
 	vector<column_t> column_ids;
 	column_ids.reserve(types.size());
 	for (idx_t i = 0; i < types.size(); i++) {
 		column_ids.push_back(i);
 	}
 	state.Initialize(std::move(column_ids));
+	state.table_state.max_row = row_start + total_rows;
+	// initialize the fetch chunk
 	DataChunk result;
 	result.Initialize(GetAllocator(), types);
-	state.table_state.Initialize(GetTypes());
-	row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
-	row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
-	result.Slice(sel, count);
+	SelectionVector sel(STANDARD_VECTOR_SIZE);
+	// now iterate over the row ids
+	for (idx_t r = 0; r < count;) {
+		result.Reset();
+		// figure out which row_group to fetch from
+		auto row_id = row_ids[r];
+		auto row_group = row_groups->GetSegment(row_id);
+		auto row_group_vector_idx = (row_id - row_group->start) / STANDARD_VECTOR_SIZE;
+		auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
+		// fetch the current vector
+		state.table_state.Initialize(GetTypes());
+		row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
+		row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
+		result.Verify();
+		// check for any remaining row ids if they also fall into this vector
+		// we try to fetch handle as many rows as possible at the same time
+		idx_t sel_count = 0;
+		for (; r < count; r++) {
+			idx_t current_row = idx_t(row_ids[r]);
+			if (current_row < base_row_id || current_row >= base_row_id + result.size()) {
+				// this row-id does not fall into the current chunk - break
+				break;
+			}
+			auto row_in_vector = current_row - base_row_id;
+			D_ASSERT(row_in_vector < result.size());
+			sel.set_index(sel_count++, row_in_vector);
+		}
+		D_ASSERT(sel_count > 0);
+		// slice the vector with all rows that are present in this vector and erase from the index
+		result.Slice(sel, sel_count);
-	indexes.Scan([&](Index &index) {
-		index.Delete(result, row_identifiers);
-		return false;
-	});
+		indexes.Scan([&](Index &index) {
+			index.Delete(result, row_identifiers);
+			return false;
+		});
+	}
 }
 void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,