npm - duckdb - Versions diffs - 0.7.2-dev3402.0 → 0.7.2-dev3515.0 - Mend

duckdb 0.7.2-dev3402.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/src/duckdb/src/optimizer/unnest_rewriter.cpp CHANGED Viewed

@@ -195,18 +195,24 @@ void UnnestRewriter::UpdateRHSBindings(unique_ptr<LogicalOperator> *plan_ptr, un
 		updater.replace_bindings.push_back(replace_binding);
 	}
-	// temporarily remove the BOUND_UNNEST and the child of the LOGICAL_UNNEST from the plan
+	// temporarily remove the BOUND_UNNESTs and the child of the LOGICAL_UNNEST from the plan
 	D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
 	auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
-	auto temp_bound_unnest = std::move(unnest.expressions[0]);
+	vector<unique_ptr<Expression>> temp_bound_unnests;
+	for (auto &temp_bound_unnest : unnest.expressions) {
+		temp_bound_unnests.push_back(std::move(temp_bound_unnest));
+	}
+	D_ASSERT(unnest.children.size() == 1);
 	auto temp_unnest_child = std::move(unnest.children[0]);
 	unnest.expressions.clear();
 	unnest.children.clear();
 	// update the bindings of the plan
 	updater.VisitOperator(*plan_ptr->get());
 	updater.replace_bindings.clear();
-	// add the child again
-	unnest.expressions.push_back(std::move(temp_bound_unnest));
+	// add the children again
+	for (auto &temp_bound_unnest : temp_bound_unnests) {
+		unnest.expressions.push_back(std::move(temp_bound_unnest));
+	}
 	unnest.children.push_back(std::move(temp_unnest_child));
 	// add the LHS expressions to each LOGICAL_PROJECTION
@@ -256,6 +262,7 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
 	D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
 	auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
+	D_ASSERT(unnest.children.size() == 1);
 	auto unnest_child_cols = unnest.children[0]->GetColumnBindings();
 	for (idx_t delim_col_idx = 0; delim_col_idx < delim_columns.size(); delim_col_idx++) {
 		for (idx_t child_col_idx = 0; child_col_idx < unnest_child_cols.size(); child_col_idx++) {
@@ -268,8 +275,9 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
 	}
 	// update bindings
-	D_ASSERT(unnest.expressions.size() == 1);
-	updater.VisitExpression(&unnest.expressions[0]);
+	for (auto &unnest_expr : unnest.expressions) {
+		updater.VisitExpression(&unnest_expr);
+	}
 	updater.replace_bindings.clear();
 }

package/src/duckdb/src/parser/statement/insert_statement.cpp CHANGED Viewed

@@ -30,7 +30,7 @@ InsertStatement::InsertStatement(const InsertStatement &other)
     : SQLStatement(other), select_statement(unique_ptr_cast<SQLStatement, SelectStatement>(
                                other.select_statement ? other.select_statement->Copy() : nullptr)),
       columns(other.columns), table(other.table), schema(other.schema), catalog(other.catalog),
-      default_values(other.default_values) {
+      default_values(other.default_values), column_order(other.column_order) {
 	cte_map = other.cte_map.Copy();
 	for (auto &expr : other.returning_list) {
 		returning_list.emplace_back(expr->Copy());
@@ -81,6 +81,9 @@ string InsertStatement::ToString() const {
 	if (table_ref && !table_ref->alias.empty()) {
 		result += StringUtil::Format(" AS %s", KeywordHelper::WriteOptionallyQuoted(table_ref->alias));
 	}
+	if (column_order == InsertColumnOrder::INSERT_BY_NAME) {
+		result += " BY NAME";
+	}
 	if (!columns.empty()) {
 		result += " (";
 		for (idx_t i = 0; i < columns.size(); i++) {

package/src/duckdb/src/parser/transform/statement/transform_insert.cpp CHANGED Viewed

@@ -67,6 +67,16 @@ unique_ptr<InsertStatement> Transformer::TransformInsert(duckdb_libpgquery::PGNo
 		result->on_conflict_info = DummyOnConflictClause(stmt->onConflictAlias, result->schema);
 		result->table_ref = TransformRangeVar(stmt->relation);
 	}
+	switch (stmt->insert_column_order) {
+	case duckdb_libpgquery::PG_INSERT_BY_POSITION:
+		result->column_order = InsertColumnOrder::INSERT_BY_POSITION;
+		break;
+	case duckdb_libpgquery::PG_INSERT_BY_NAME:
+		result->column_order = InsertColumnOrder::INSERT_BY_NAME;
+		break;
+	default:
+		throw InternalException("Unrecognized insert column order in TransformInsert");
+	}
 	result->catalog = qname.catalog;
 	return result;
 }

package/src/duckdb/src/planner/binder/statement/bind_insert.cpp CHANGED Viewed

@@ -25,6 +25,7 @@
 #include "duckdb/planner/tableref/bound_basetableref.hpp"
 #include "duckdb/planner/tableref/bound_dummytableref.hpp"
 #include "duckdb/parser/parsed_expression_iterator.hpp"
+#include "duckdb/storage/table_storage_info.hpp"
 namespace duckdb {
@@ -78,10 +79,10 @@ void ReplaceColumnBindings(Expression &expr, idx_t source, idx_t dest) {
 	    expr, [&](unique_ptr<Expression> &child) { ReplaceColumnBindings(*child, source, dest); });
 }
-void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
-                                        TableCatalogEntry &table) {
-	D_ASSERT(insert->children.size() == 1);
-	D_ASSERT(insert->children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
+void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
+                                        TableCatalogEntry &table, TableStorageInfo &storage_info) {
+	D_ASSERT(insert.children.size() == 1);
+	D_ASSERT(insert.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
 	vector<column_t> logical_column_ids;
 	vector<string> column_names;
@@ -97,13 +98,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 		if (column.Generated()) {
 			throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
 		}
-		if (std::find(insert->set_columns.begin(), insert->set_columns.end(), column.Physical()) !=
-		    insert->set_columns.end()) {
+		if (std::find(insert.set_columns.begin(), insert.set_columns.end(), column.Physical()) !=
+		    insert.set_columns.end()) {
 			throw BinderException("Multiple assignments to same column \"%s\"", colname);
 		}
-		insert->set_columns.push_back(column.Physical());
+		insert.set_columns.push_back(column.Physical());
 		logical_column_ids.push_back(column.Oid());
-		insert->set_types.push_back(column.Type());
+		insert.set_types.push_back(column.Type());
 		column_names.push_back(colname);
 		if (expr->type == ExpressionType::VALUE_DEFAULT) {
 			expr = ExpandDefaultExpression(column);
@@ -120,14 +121,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 			throw BinderException("Expression in the DO UPDATE SET clause can not be a subquery");
 		}
-		insert->expressions.push_back(std::move(bound_expr));
+		insert.expressions.push_back(std::move(bound_expr));
 	}
 	// Figure out which columns are indexed on
 	unordered_set<column_t> indexed_columns;
-	auto &indexes = table.GetStorage().info->indexes.Indexes();
-	for (auto &index : indexes) {
-		for (auto &column_id : index->column_id_set) {
+	for (auto &index : storage_info.index_info) {
+		for (auto &column_id : index.column_set) {
 			indexed_columns.insert(column_id);
 		}
 	}
@@ -142,16 +142,16 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
 	}
 }
-unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert) {
+unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert,
+                                                  TableStorageInfo &storage_info) {
 	auto set_info = make_uniq<UpdateSetInfo>();
 	auto &columns = set_info->columns;
 	// Figure out which columns are indexed on
 	unordered_set<column_t> indexed_columns;
-	auto &indexes = table.GetStorage().info->indexes.Indexes();
-	for (auto &index : indexes) {
-		for (auto &column_id : index->column_id_set) {
+	for (auto &index : storage_info.index_info) {
+		for (auto &column_id : index.column_set) {
 			indexed_columns.insert(column_id);
 		}
 	}
@@ -190,9 +190,6 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		insert.action_type = OnConflictAction::THROW;
 		return;
 	}
-	if (!table.IsDuckTable()) {
-		throw BinderException("ON CONFLICT clause is not yet supported for non-DuckDB tables");
-	}
 	D_ASSERT(stmt.table_ref->type == TableReferenceType::BASE_TABLE);
 	// visit the table reference
@@ -208,6 +205,9 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 	D_ASSERT(on_conflict.action_type != OnConflictAction::THROW);
 	insert.action_type = on_conflict.action_type;
+	// obtain the table storage info
+	auto storage_info = table.GetStorageInfo(context);
 	auto &columns = table.GetColumns();
 	if (!on_conflict.indexed_columns.empty()) {
 		// Bind the ON CONFLICT (<columns>)
@@ -232,18 +232,17 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 				insert.on_conflict_filter.insert(col.Oid());
 			}
 		}
-		auto &indexes = table.GetStorage().info->indexes;
 		bool index_references_columns = false;
-		indexes.Scan([&](Index &index) {
-			if (!index.IsUnique()) {
-				return false;
+		for (auto &index : storage_info.index_info) {
+			if (!index.is_unique) {
+				continue;
 			}
-			bool index_matches = insert.on_conflict_filter == index.column_id_set;
+			bool index_matches = insert.on_conflict_filter == index.column_set;
 			if (index_matches) {
 				index_references_columns = true;
+				break;
 			}
-			return index_matches;
-		});
+		}
 		if (!index_references_columns) {
 			// Same as before, this is essentially a no-op, turning this into a DO THROW instead
 			// But since this makes no logical sense, it's probably better to throw an error
@@ -254,21 +253,19 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		// When omitting the conflict target, the ON CONFLICT applies to every UNIQUE/PRIMARY KEY on the table
 		// We check if there are any constraints on the table, if there aren't we throw an error.
-		auto &indexes = table.GetStorage().info->indexes;
 		idx_t found_matching_indexes = 0;
-		indexes.Scan([&](Index &index) {
-			if (!index.IsUnique()) {
-				return false;
+		for (auto &index : storage_info.index_info) {
+			if (!index.is_unique) {
+				continue;
 			}
 			// does this work with multi-column indexes?
-			auto &indexed_columns = index.column_id_set;
+			auto &indexed_columns = index.column_set;
 			for (auto &column : table.GetColumns().Physical()) {
 				if (indexed_columns.count(column.Physical().index)) {
 					found_matching_indexes++;
 				}
 			}
-			return false;
-		});
+		}
 		if (!found_matching_indexes) {
 			throw BinderException(
 			    "There are no UNIQUE/PRIMARY KEY Indexes that refer to this table, ON CONFLICT is a no-op");
@@ -338,7 +335,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 	if (insert.action_type == OnConflictAction::REPLACE) {
 		D_ASSERT(on_conflict.set_info == nullptr);
-		on_conflict.set_info = CreateSetInfoForReplace(table, stmt);
+		on_conflict.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
 		insert.action_type = OnConflictAction::UPDATE;
 	}
 	if (on_conflict.set_info && on_conflict.set_info->columns.empty()) {
@@ -374,7 +371,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
 		insert.do_update_condition = std::move(condition);
 	}
-	BindDoUpdateSetExpressions(table_alias, &insert, set_info, table);
+	BindDoUpdateSetExpressions(table_alias, insert, set_info, table, storage_info);
 	// Get the column_ids we need to fetch later on from the conflicting tuples
 	// of the original table, to execute the expressions
@@ -409,6 +406,26 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
 	// Add CTEs as bindable
 	AddCTEMap(stmt.cte_map);
+	auto values_list = stmt.GetValuesList();
+	// bind the root select node (if any)
+	BoundStatement root_select;
+	if (stmt.column_order == InsertColumnOrder::INSERT_BY_NAME) {
+		if (values_list) {
+			throw BinderException("INSERT BY NAME can only be used when inserting from a SELECT statement");
+		}
+		if (!stmt.columns.empty()) {
+			throw BinderException("INSERT BY NAME cannot be combined with an explicit column list");
+		}
+		D_ASSERT(stmt.select_statement);
+		// INSERT BY NAME - generate the columns from the names of the SELECT statement
+		auto select_binder = Binder::CreateBinder(context, this);
+		root_select = select_binder->Bind(*stmt.select_statement);
+		MoveCorrelatedExpressions(*select_binder);
+		stmt.columns = root_select.names;
+	}
 	vector<LogicalIndex> named_column_map;
 	if (!stmt.columns.empty() || stmt.default_values) {
 		// insertion statement specifies column list
@@ -416,6 +433,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
 		// create a mapping of (list index) -> (column index)
 		case_insensitive_map_t<idx_t> column_name_map;
 		for (idx_t i = 0; i < stmt.columns.size(); i++) {
+			auto entry = column_name_map.insert(make_pair(stmt.columns[i], i));
+			if (!entry.second) {
+				throw BinderException("Duplicate column name \"%s\" in INSERT", stmt.columns[i]);
+			}
 			column_name_map[stmt.columns[i]] = i;
 			auto column_index = table.GetColumnIndex(stmt.columns[i]);
 			if (column_index.index == COLUMN_IDENTIFIER_ROW_ID) {
@@ -439,8 +460,8 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
 			}
 		}
 	} else {
-		// No columns specified, assume insertion into all columns
-		// Intentionally don't populate 'column_index_map' as an indication of this
+		// insert by position and no columns specified - insertion into all columns of the table
+		// intentionally don't populate 'column_index_map' as an indication of this
 		for (auto &col : table.GetColumns().Physical()) {
 			named_column_map.push_back(col.Logical());
 			insert->expected_types.push_back(col.Type());
@@ -457,7 +478,6 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
 	idx_t expected_columns = stmt.columns.empty() ? table.GetColumns().PhysicalColumnCount() : stmt.columns.size();
 	// special case: check if we are inserting from a VALUES statement
-	auto values_list = stmt.GetValuesList();
 	if (values_list) {
 		auto &expr_list = values_list->Cast<ExpressionListRef>();
 		expr_list.expected_types.resize(expected_columns);
@@ -490,10 +510,12 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
 	// parse select statement and add to logical plan
 	unique_ptr<LogicalOperator> root;
 	if (stmt.select_statement) {
-		auto select_binder = Binder::CreateBinder(context, this);
-		auto root_select = select_binder->Bind(*stmt.select_statement);
-		MoveCorrelatedExpressions(*select_binder);
+		if (stmt.column_order == InsertColumnOrder::INSERT_BY_POSITION) {
+			auto select_binder = Binder::CreateBinder(context, this);
+			root_select = select_binder->Bind(*stmt.select_statement);
+			MoveCorrelatedExpressions(*select_binder);
+		}
+		// inserting from a select - check if the column count matches
 		CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
 		                               table.name.c_str());

package/src/duckdb/src/storage/arena_allocator.cpp CHANGED Viewed

@@ -64,7 +64,7 @@ data_ptr_t ArenaAllocator::Allocate(idx_t len) {
 		do {
 			current_capacity *= 2;
 		} while (current_capacity < len);
-		auto new_chunk = make_uniq<ArenaChunk>(allocator, current_capacity);
+		auto new_chunk = make_unsafe_uniq<ArenaChunk>(allocator, current_capacity);
 		if (head) {
 			head->prev = new_chunk.get();
 			new_chunk->next = std::move(head);

package/src/duckdb/src/storage/buffer/buffer_handle.cpp CHANGED Viewed

@@ -7,7 +7,8 @@ namespace duckdb {
 BufferHandle::BufferHandle() : handle(nullptr), node(nullptr) {
 }
-BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle, FileBuffer *node) : handle(std::move(handle)), node(node) {
+BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle_p, FileBuffer *node_p)
+    : handle(std::move(handle_p)), node(node_p) {
 }
 BufferHandle::BufferHandle(BufferHandle &&other) noexcept {
@@ -29,16 +30,6 @@ bool BufferHandle::IsValid() const {
 	return node != nullptr;
 }
-data_ptr_t BufferHandle::Ptr() const {
-	D_ASSERT(IsValid());
-	return node->buffer;
-}
-data_ptr_t BufferHandle::Ptr() {
-	D_ASSERT(IsValid());
-	return node->buffer;
-}
 void BufferHandle::Destroy() {
 	if (!handle || !IsValid()) {
 		return;

package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp CHANGED Viewed

@@ -32,7 +32,7 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
 	MiniZStream s;
 	size_t compressed_size = 0;
 	compressed_size = s.MaxCompressedLength(uncompressed_size);
-	auto compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
+	auto compressed_buf = make_unsafe_array<data_t>(compressed_size);
 	s.Compress((const char *)string.GetData(), uncompressed_size, (char *)compressed_buf.get(), &compressed_size);
 	string_t compressed_string((const char *)compressed_buf.get(), compressed_size);

package/src/duckdb/src/storage/compression/string_uncompressed.cpp CHANGED Viewed

@@ -292,13 +292,13 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
 		offset += 2 * sizeof(uint32_t);
 		data_ptr_t decompression_ptr;
-		unique_ptr<data_t[]> decompression_buffer;
+		unsafe_array_ptr<data_t> decompression_buffer;
 		// If string is in single block we decompress straight from it, else we copy first
 		if (remaining <= Storage::BLOCK_SIZE - sizeof(block_id_t) - offset) {
 			decompression_ptr = handle.Ptr() + offset;
 		} else {
-			decompression_buffer = unique_ptr<data_t[]>(new data_t[compressed_size]);
+			decompression_buffer = make_unsafe_array<data_t>(compressed_size);
 			auto target_ptr = decompression_buffer.get();
 			// now append the string to the single buffer

package/src/duckdb/src/storage/statistics/list_stats.cpp CHANGED Viewed

@@ -7,7 +7,7 @@
 namespace duckdb {
 void ListStats::Construct(BaseStatistics &stats) {
-	stats.child_stats = unique_ptr<BaseStatistics[]>(new BaseStatistics[1]);
+	stats.child_stats = unsafe_array_ptr<BaseStatistics>(new BaseStatistics[1]);
 	BaseStatistics::Construct(stats.child_stats[0], ListType::GetChildType(stats.GetType()));
 }

package/src/duckdb/src/storage/statistics/struct_stats.cpp CHANGED Viewed

@@ -7,7 +7,7 @@ namespace duckdb {
 void StructStats::Construct(BaseStatistics &stats) {
 	auto &child_types = StructType::GetChildTypes(stats.GetType());
-	stats.child_stats = unique_ptr<BaseStatistics[]>(new BaseStatistics[child_types.size()]);
+	stats.child_stats = unsafe_array_ptr<BaseStatistics>(new BaseStatistics[child_types.size()]);
 	for (idx_t i = 0; i < child_types.size(); i++) {
 		BaseStatistics::Construct(stats.child_stats[i], child_types[i].second);
 	}

package/src/duckdb/src/storage/table/row_group.cpp CHANGED Viewed

@@ -155,7 +155,7 @@ void ColumnScanState::Initialize(const LogicalType &type) {
 void CollectionScanState::Initialize(const vector<LogicalType> &types) {
 	auto &column_ids = GetColumnIds();
-	column_scans = unique_ptr<ColumnScanState[]>(new ColumnScanState[column_ids.size()]);
+	column_scans = make_unsafe_array<ColumnScanState>(column_ids.size());
 	for (idx_t i = 0; i < column_ids.size(); i++) {
 		if (column_ids[i] == COLUMN_IDENTIFIER_ROW_ID) {
 			continue;
@@ -695,7 +695,7 @@ void RowGroup::InitializeAppend(RowGroupAppendState &append_state) {
 	append_state.row_group = this;
 	append_state.offset_in_row_group = this->count;
 	// for each column, initialize the append state
-	append_state.states = unique_ptr<ColumnAppendState[]>(new ColumnAppendState[GetColumnCount()]);
+	append_state.states = make_unsafe_array<ColumnAppendState>(GetColumnCount());
 	for (idx_t i = 0; i < GetColumnCount(); i++) {
 		auto &col_data = GetColumn(i);
 		col_data.InitializeAppend(append_state.states[i]);

package/src/duckdb/src/storage/table/row_group_collection.cpp CHANGED Viewed

@@ -526,43 +526,59 @@ void RowGroupCollection::Update(TransactionData transaction, row_t *ids, const v
 void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count) {
 	auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
-	// figure out which row_group to fetch from
-	auto row_group = row_groups->GetSegment(row_ids[0]);
-	auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE;
-	auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
-	// create a selection vector from the row_ids
-	SelectionVector sel(STANDARD_VECTOR_SIZE);
-	for (idx_t i = 0; i < count; i++) {
-		auto row_in_vector = row_ids[i] - base_row_id;
-		D_ASSERT(row_in_vector < STANDARD_VECTOR_SIZE);
-		sel.set_index(i, row_in_vector);
-	}
-	// now fetch the columns from that row_group
-	TableScanState state;
-	state.table_state.max_row = row_start + total_rows;
+	// initialize the fetch state
 	// FIXME: we do not need to fetch all columns, only the columns required by the indices!
+	TableScanState state;
 	vector<column_t> column_ids;
 	column_ids.reserve(types.size());
 	for (idx_t i = 0; i < types.size(); i++) {
 		column_ids.push_back(i);
 	}
 	state.Initialize(std::move(column_ids));
+	state.table_state.max_row = row_start + total_rows;
+	// initialize the fetch chunk
 	DataChunk result;
 	result.Initialize(GetAllocator(), types);
-	state.table_state.Initialize(GetTypes());
-	row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
-	row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
-	result.Slice(sel, count);
+	SelectionVector sel(STANDARD_VECTOR_SIZE);
+	// now iterate over the row ids
+	for (idx_t r = 0; r < count;) {
+		result.Reset();
+		// figure out which row_group to fetch from
+		auto row_id = row_ids[r];
+		auto row_group = row_groups->GetSegment(row_id);
+		auto row_group_vector_idx = (row_id - row_group->start) / STANDARD_VECTOR_SIZE;
+		auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
+		// fetch the current vector
+		state.table_state.Initialize(GetTypes());
+		row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
+		row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
+		result.Verify();
+		// check for any remaining row ids if they also fall into this vector
+		// we try to fetch handle as many rows as possible at the same time
+		idx_t sel_count = 0;
+		for (; r < count; r++) {
+			idx_t current_row = idx_t(row_ids[r]);
+			if (current_row < base_row_id || current_row >= base_row_id + result.size()) {
+				// this row-id does not fall into the current chunk - break
+				break;
+			}
+			auto row_in_vector = current_row - base_row_id;
+			D_ASSERT(row_in_vector < result.size());
+			sel.set_index(sel_count++, row_in_vector);
+		}
+		D_ASSERT(sel_count > 0);
+		// slice the vector with all rows that are present in this vector and erase from the index
+		result.Slice(sel, sel_count);
-	indexes.Scan([&](Index &index) {
-		index.Delete(result, row_identifiers);
-		return false;
-	});
+		indexes.Scan([&](Index &index) {
+			index.Delete(result, row_identifiers);
+			return false;
+		});
+	}
 }
 void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,

package/src/duckdb/src/storage/table/update_segment.cpp CHANGED Viewed

@@ -1042,8 +1042,9 @@ static idx_t SortSelectionVector(SelectionVector &sel, idx_t count, row_t *ids)
 	return pos;
 }
-UpdateInfo *CreateEmptyUpdateInfo(TransactionData transaction, idx_t type_size, idx_t count, unique_ptr<char[]> &data) {
-	data = unique_ptr<char[]>(new char[sizeof(UpdateInfo) + (sizeof(sel_t) + type_size) * STANDARD_VECTOR_SIZE]);
+UpdateInfo *CreateEmptyUpdateInfo(TransactionData transaction, idx_t type_size, idx_t count,
+                                  unsafe_array_ptr<char> &data) {
+	data = make_unsafe_array<char>(sizeof(UpdateInfo) + (sizeof(sel_t) + type_size) * STANDARD_VECTOR_SIZE);
 	auto update_info = (UpdateInfo *)data.get();
 	update_info->max = STANDARD_VECTOR_SIZE;
 	update_info->tuples = (sel_t *)(((data_ptr_t)update_info) + sizeof(UpdateInfo));
@@ -1109,7 +1110,7 @@ void UpdateSegment::Update(TransactionData transaction, idx_t column_index, Vect
 			}
 			node = node->next;
 		}
-		unique_ptr<char[]> update_info_data;
+		unsafe_array_ptr<char> update_info_data;
 		if (!node) {
 			// no updates made yet by this transaction: initially the update info to empty
 			if (transaction.transaction) {
@@ -1144,8 +1145,8 @@ void UpdateSegment::Update(TransactionData transaction, idx_t column_index, Vect
 		auto result = make_uniq<UpdateNodeData>();
 		result->info = make_uniq<UpdateInfo>();
-		result->tuples = unique_ptr<sel_t[]>(new sel_t[STANDARD_VECTOR_SIZE]);
-		result->tuple_data = unique_ptr<data_t[]>(new data_t[STANDARD_VECTOR_SIZE * type_size]);
+		result->tuples = make_unsafe_array<sel_t>(STANDARD_VECTOR_SIZE);
+		result->tuple_data = make_unsafe_array<data_t>(STANDARD_VECTOR_SIZE * type_size);
 		result->info->tuples = result->tuples.get();
 		result->info->tuple_data = result->tuple_data.get();
 		result->info->version_number = TRANSACTION_ID_START - 1;
@@ -1153,7 +1154,7 @@ void UpdateSegment::Update(TransactionData transaction, idx_t column_index, Vect
 		InitializeUpdateInfo(*result->info, ids, sel, count, vector_index, vector_offset);
 		// now create the transaction level update info in the undo log
-		unique_ptr<char[]> update_info_data;
+		unsafe_array_ptr<char> update_info_data;
 		UpdateInfo *transaction_node;
 		if (transaction.transaction) {
 			transaction_node = transaction.transaction->CreateUpdateInfo(type_size, count);

package/src/duckdb/third_party/fsst/libfsst.cpp CHANGED Viewed

@@ -381,8 +381,7 @@ static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_
 	size_t curLine, suffixLim = symbolTable.suffixLim;
 	u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0];
-	u8 buf[512+7]; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
-	memset(buf+511, 0, 8); /* and initialize the sentinal bytes */
+	u8 buf[512+7] = {}; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
 	// three variants are possible. dead code falls away since the bool arguments are constants
 	auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) {

package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp CHANGED Viewed

@@ -814,4 +814,13 @@ typedef enum PGOnConflictActionAlias {
 	PG_ONCONFLICT_ALIAS_IGNORE   /* INSERT OR IGNORE */
 } PGOnConflictActionAlias;
+/*
+ * PGInsertByNameOrPosition
+ *    "INSERT BY [POSITION|NAME]
+ */
+typedef enum PGInsertColumnOrder {
+	PG_INSERT_BY_POSITION,    /* INSERT BY POSITION (default behavior) */
+	PG_INSERT_BY_NAME,        /* INSERT BY NAME */
+} PGInsertColumnOrder;
 }

package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp CHANGED Viewed

@@ -1045,11 +1045,11 @@ typedef struct PGInferClause {
  */
 typedef struct PGOnConflictClause {
 	PGNodeTag type;
-	PGOnConflictAction action; /* DO NOTHING or UPDATE? */
-	PGInferClause *infer;      /* Optional index inference clause */
-	PGList *targetList;        /* the target list (of PGResTarget) */
-	PGNode *whereClause;       /* qualifications */
-	int location;              /* token location, or -1 if unknown */
+	PGOnConflictAction action;               /* DO NOTHING or UPDATE? */
+	PGInferClause *infer;                    /* Optional index inference clause */
+	PGList *targetList;                      /* the target list (of PGResTarget) */
+	PGNode *whereClause;                     /* qualifications */
+	int location;                            /* token location, or -1 if unknown */
 } PGOnConflictClause;
 /*
@@ -1125,14 +1125,15 @@ typedef struct PGRawStmt {
  */
 typedef struct PGInsertStmt {
 	PGNodeTag type;
-	PGRangeVar *relation;                 /* relation to insert into */
-	PGList *cols;                         /* optional: names of the target columns */
-	PGNode *selectStmt;                   /* the source SELECT/VALUES, or NULL */
+	PGRangeVar *relation;                    /* relation to insert into */
+	PGList *cols;                            /* optional: names of the target columns */
+	PGNode *selectStmt;                      /* the source SELECT/VALUES, or NULL */
 	PGOnConflictActionAlias onConflictAlias; /* the (optional) shorthand provided for the onConflictClause */
-	PGOnConflictClause *onConflictClause; /* ON CONFLICT clause */
-	PGList *returningList;                /* list of expressions to return */
-	PGWithClause *withClause;             /* WITH clause */
-	PGOverridingKind override;            /* OVERRIDING clause */
+	PGOnConflictClause *onConflictClause;    /* ON CONFLICT clause */
+	PGList *returningList;                   /* list of expressions to return */
+	PGWithClause *withClause;                /* WITH clause */
+	PGOverridingKind override;               /* OVERRIDING clause */
+	PGInsertColumnOrder insert_column_order; /* INSERT BY NAME or INSERT BY POSITION */
 } PGInsertStmt;
 /* ----------------------

package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp CHANGED Viewed

@@ -1065,9 +1065,10 @@ typedef union YYSTYPE
 	PGLockWaitPolicy lockwaitpolicy;
 	PGSubLinkType subquerytype;
 	PGViewCheckOption viewcheckoption;
+	PGInsertColumnOrder bynameorposition;
 }
 /* Line 1529 of yacc.c.  */
-#line 1071 "third_party/libpg_query/grammar/grammar_out.hpp"
+#line 1072 "third_party/libpg_query/grammar/grammar_out.hpp"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1