npm - duckdb - Versions diffs - 0.6.1-dev86.0 → 0.6.2-dev13.0 - Mend

duckdb 0.6.1-dev86.0 → 0.6.2-dev13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/connection.cpp +100 -99
package/src/duckdb.cpp +1567 -811
package/src/duckdb.hpp +100 -35
package/src/duckdb_node.hpp +0 -1
package/src/parquet-amalgamation.cpp +13204 -13194
package/test/arrow.test.js +36 -45

package/src/duckdb.cpp CHANGED Viewed

@@ -652,6 +652,7 @@ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
     {"from_substrait", "substrait"},
     {"get_substrait", "substrait"},
     {"get_substrait_json", "substrait"},
+    {"from_substrait_json", "substrait"},
     {"icu_calendar_names", "icu"},
     {"icu_sort_key", "icu"},
     {"json", "json"},
@@ -1405,7 +1406,7 @@ CopyFunctionCatalogEntry::CopyFunctionCatalogEntry(Catalog *catalog, SchemaCatal
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
-// duckdb/common/bit_operations.hpp
+// duckdb/common/radix.hpp
 //
 //
 //===----------------------------------------------------------------------===//
@@ -4121,6 +4122,20 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, A
 	}
 }
+void TableCatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) {
+	D_ASSERT(!internal);
+	D_ASSERT(info->type == AlterType::ALTER_TABLE);
+	auto table_info = (AlterTableInfo *)info;
+	switch (table_info->alter_table_type) {
+	case AlterTableType::RENAME_TABLE: {
+		storage->info->table = this->name;
+		break;
+	default:
+		break;
+	}
+	}
+}
 static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) {
 	if (expr.type == ExpressionType::COLUMN_REF) {
 		auto &colref = (ColumnRefExpression &)expr;
@@ -4219,6 +4234,8 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AddColumn(ClientContext &context, Ad
 		create_info->constraints.push_back(constraint->Copy());
 	}
 	Binder::BindLogicalType(context, info.new_column.TypeMutable(), schema->name);
+	info.new_column.SetOid(columns.LogicalColumnCount());
+	info.new_column.SetStorageOid(columns.PhysicalColumnCount());
 	auto col = info.new_column.Copy();
 	create_info->columns.AddColumn(move(col));
@@ -4966,6 +4983,9 @@ unique_ptr<CatalogEntry> CatalogEntry::AlterEntry(ClientContext &context, AlterI
 	throw InternalException("Unsupported alter type for catalog entry!");
 }
+void CatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) {
+}
 unique_ptr<CatalogEntry> CatalogEntry::Copy(ClientContext &context) {
 	throw InternalException("Unsupported copy type for catalog entry!");
 }
@@ -5144,6 +5164,98 @@ private:
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/catalog/mapping_value.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+namespace duckdb {
+struct AlterInfo;
+class ClientContext;
+struct EntryIndex {
+	EntryIndex() : catalog(nullptr), index(DConstants::INVALID_INDEX) {
+	}
+	EntryIndex(CatalogSet &catalog, idx_t index) : catalog(&catalog), index(index) {
+		auto entry = catalog.entries.find(index);
+		if (entry == catalog.entries.end()) {
+			throw InternalException("EntryIndex - Catalog entry not found in constructor!?");
+		}
+		catalog.entries[index].reference_count++;
+	}
+	~EntryIndex() {
+		if (!catalog) {
+			return;
+		}
+		auto entry = catalog->entries.find(index);
+		D_ASSERT(entry != catalog->entries.end());
+		auto remaining_ref = --entry->second.reference_count;
+		if (remaining_ref == 0) {
+			catalog->entries.erase(index);
+		}
+		catalog = nullptr;
+	}
+	// disable copy constructors
+	EntryIndex(const EntryIndex &other) = delete;
+	EntryIndex &operator=(const EntryIndex &) = delete;
+	//! enable move constructors
+	EntryIndex(EntryIndex &&other) noexcept {
+		catalog = nullptr;
+		index = DConstants::INVALID_INDEX;
+		std::swap(catalog, other.catalog);
+		std::swap(index, other.index);
+	}
+	EntryIndex &operator=(EntryIndex &&other) noexcept {
+		std::swap(catalog, other.catalog);
+		std::swap(index, other.index);
+		return *this;
+	}
+	unique_ptr<CatalogEntry> &GetEntry() {
+		auto entry = catalog->entries.find(index);
+		if (entry == catalog->entries.end()) {
+			throw InternalException("EntryIndex - Catalog entry not found!?");
+		}
+		return entry->second.entry;
+	}
+	idx_t GetIndex() {
+		return index;
+	}
+	EntryIndex Copy() {
+		if (catalog) {
+			return EntryIndex(*catalog, index);
+		} else {
+			return EntryIndex();
+		}
+	}
+private:
+	CatalogSet *catalog;
+	idx_t index;
+};
+struct MappingValue {
+	explicit MappingValue(EntryIndex index_p) : index(move(index_p)), timestamp(0), deleted(false), parent(nullptr) {
+	}
+	EntryIndex index;
+	transaction_t timestamp;
+	bool deleted;
+	unique_ptr<MappingValue> child;
+	MappingValue *parent;
+};
+} // namespace duckdb
 namespace duckdb {
@@ -5157,27 +5269,44 @@ namespace duckdb {
 class EntryDropper {
 public:
 	//! Both constructor and destructor are privates because they should only be called by DropEntryDependencies
-	explicit EntryDropper(CatalogSet &catalog_set, idx_t entry_index)
-	    : catalog_set(catalog_set), entry_index(entry_index) {
-		old_deleted = catalog_set.entries[entry_index].get()->deleted;
+	explicit EntryDropper(EntryIndex &entry_index_p) : entry_index(entry_index_p) {
+		old_deleted = entry_index.GetEntry()->deleted;
 	}
 	~EntryDropper() {
-		catalog_set.entries[entry_index].get()->deleted = old_deleted;
+		entry_index.GetEntry()->deleted = old_deleted;
 	}
 private:
-	//! The current catalog_set
-	CatalogSet &catalog_set;
 	//! Keeps track of the state of the entry before starting the delete
 	bool old_deleted;
 	//! Index of entry to be deleted
-	idx_t entry_index;
+	EntryIndex &entry_index;
 };
 CatalogSet::CatalogSet(Catalog &catalog, unique_ptr<DefaultGenerator> defaults)
     : catalog(catalog), defaults(move(defaults)) {
 }
+CatalogSet::~CatalogSet() {
+}
+EntryIndex CatalogSet::PutEntry(idx_t entry_index, unique_ptr<CatalogEntry> entry) {
+	if (entries.find(entry_index) != entries.end()) {
+		throw InternalException("Entry with entry index \"%llu\" already exists", entry_index);
+	}
+	entries.insert(make_pair(entry_index, EntryValue(move(entry))));
+	return EntryIndex(*this, entry_index);
+}
+void CatalogSet::PutEntry(EntryIndex index, unique_ptr<CatalogEntry> catalog_entry) {
+	auto entry = entries.find(index.GetIndex());
+	if (entry == entries.end()) {
+		throw InternalException("Entry with entry index \"%llu\" does not exist", index.GetIndex());
+	}
+	catalog_entry->child = move(entry->second.entry);
+	catalog_entry->child->parent = catalog_entry.get();
+	entry->second.entry = move(catalog_entry);
+}
 bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ptr<CatalogEntry> value,
                              unordered_set<CatalogEntry *> &dependencies) {
@@ -5188,7 +5317,7 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
 	unique_lock<mutex> read_lock(catalog_lock);
 	// first check if the entry exists in the unordered set
-	idx_t entry_index;
+	idx_t index;
 	auto mapping_value = GetMapping(context, name);
 	if (mapping_value == nullptr || mapping_value->deleted) {
 		// if it does not: entry has never been created
@@ -5202,17 +5331,17 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
 		// first create a dummy deleted entry for this entry
 		// so transactions started before the commit of this transaction don't
 		// see it yet
-		entry_index = current_entry++;
 		auto dummy_node = make_unique<CatalogEntry>(CatalogType::INVALID, value->catalog, name);
 		dummy_node->timestamp = 0;
 		dummy_node->deleted = true;
 		dummy_node->set = this;
-		entries[entry_index] = move(dummy_node);
-		PutMapping(context, name, entry_index);
+		auto entry_index = PutEntry(current_entry++, move(dummy_node));
+		index = entry_index.GetIndex();
+		PutMapping(context, name, move(entry_index));
 	} else {
-		entry_index = mapping_value->index;
-		auto &current = *entries[entry_index];
+		index = mapping_value->index.GetIndex();
+		auto &current = *mapping_value->index.GetEntry();
 		// if it does, we have to check version numbers
 		if (HasConflict(context, current.timestamp)) {
 			// current version has been written to by a currently active
@@ -5234,16 +5363,16 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
 	// now add the dependency set of this object to the dependency manager
 	catalog.dependency_manager->AddObject(context, value.get(), dependencies);
-	value->child = move(entries[entry_index]);
-	value->child->parent = value.get();
+	auto value_ptr = value.get();
+	EntryIndex entry_index(*this, index);
+	PutEntry(move(entry_index), move(value));
 	// push the old entry in the undo buffer for this transaction
-	transaction.PushCatalogEntry(value->child.get());
-	entries[entry_index] = move(value);
+	transaction.PushCatalogEntry(value_ptr->child.get());
 	return true;
 }
-bool CatalogSet::GetEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry *&catalog_entry) {
-	catalog_entry = entries[entry_index].get();
+bool CatalogSet::GetEntryInternal(ClientContext &context, EntryIndex &entry_index, CatalogEntry *&catalog_entry) {
+	catalog_entry = entry_index.GetEntry().get();
 	// if it does: we have to retrieve the entry and to check version numbers
 	if (HasConflict(context, catalog_entry->timestamp)) {
 		// current version has been written to by a currently active
@@ -5259,21 +5388,22 @@ bool CatalogSet::GetEntryInternal(ClientContext &context, idx_t entry_index, Cat
 	return true;
 }
-bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name, idx_t &entry_index,
+bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name, EntryIndex *entry_index,
                                   CatalogEntry *&catalog_entry) {
 	auto mapping_value = GetMapping(context, name);
 	if (mapping_value == nullptr || mapping_value->deleted) {
 		// the entry does not exist, check if we can create a default entry
 		return false;
 	}
-	entry_index = mapping_value->index;
-	return GetEntryInternal(context, entry_index, catalog_entry);
+	if (entry_index) {
+		*entry_index = mapping_value->index.Copy();
+	}
+	return GetEntryInternal(context, mapping_value->index, catalog_entry);
 }
 bool CatalogSet::AlterOwnership(ClientContext &context, ChangeOwnershipInfo *info) {
-	idx_t entry_index;
 	CatalogEntry *entry;
-	if (!GetEntryInternal(context, info->name, entry_index, entry)) {
+	if (!GetEntryInternal(context, info->name, nullptr, entry)) {
 		return false;
 	}
@@ -5293,9 +5423,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
 	lock_guard<mutex> write_lock(catalog.write_lock);
 	// first check if the entry exists in the unordered set
-	idx_t entry_index;
+	EntryIndex entry_index;
 	CatalogEntry *entry;
-	if (!GetEntryInternal(context, name, entry_index, entry)) {
+	if (!GetEntryInternal(context, name, &entry_index, entry)) {
 		return false;
 	}
 	if (entry->internal) {
@@ -5318,8 +5448,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
 	if (value->name != original_name) {
 		auto mapping_value = GetMapping(context, value->name);
 		if (mapping_value && !mapping_value->deleted) {
-			auto entry = GetEntryForTransaction(context, entries[mapping_value->index].get());
-			if (!entry->deleted) {
+			auto original_entry = GetEntryForTransaction(context, mapping_value->index.GetEntry().get());
+			if (!original_entry->deleted) {
+				entry->UndoAlter(context, alter_info);
 				string rename_err_msg =
 				    "Could not rename \"%s\" to \"%s\": another entry with this name already exists!";
 				throw CatalogException(rename_err_msg, original_name, value->name);
@@ -5329,25 +5460,22 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
 	if (value->name != original_name) {
 		// Do PutMapping and DeleteMapping after dependency check
-		PutMapping(context, value->name, entry_index);
+		PutMapping(context, value->name, entry_index.Copy());
 		DeleteMapping(context, original_name);
 	}
 	value->timestamp = transaction.transaction_id;
-	value->child = move(entries[entry_index]);
-	value->child->parent = value.get();
 	value->set = this;
+	auto new_entry = value.get();
+	PutEntry(move(entry_index), move(value));
 	// serialize the AlterInfo into a temporary buffer
 	BufferedSerializer serializer;
 	alter_info->Serialize(serializer);
 	BinaryData serialized_alter = serializer.GetData();
-	auto new_entry = value.get();
 	// push the old entry in the undo buffer for this transaction
-	transaction.PushCatalogEntry(value->child.get(), serialized_alter.data.get(), serialized_alter.size);
-	entries[entry_index] = move(value);
+	transaction.PushCatalogEntry(new_entry->child.get(), serialized_alter.data.get(), serialized_alter.size);
 	// Check the dependency manager to verify that there are no conflicting dependencies with this alter
 	// Note that we do this AFTER the new entry has been entirely set up in the catalog set
@@ -5358,13 +5486,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
 	return true;
 }
-void CatalogSet::DropEntryDependencies(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade) {
+void CatalogSet::DropEntryDependencies(ClientContext &context, EntryIndex &entry_index, CatalogEntry &entry,
+                                       bool cascade) {
 	// Stores the deleted value of the entry before starting the process
-	EntryDropper dropper(*this, entry_index);
+	EntryDropper dropper(entry_index);
 	// To correctly delete the object and its dependencies, it temporarily is set to deleted.
-	entries[entry_index].get()->deleted = true;
+	entry_index.GetEntry()->deleted = true;
 	// check any dependencies of this object
 	entry.catalog->dependency_manager->DropObject(context, &entry, cascade);
@@ -5374,7 +5502,7 @@ void CatalogSet::DropEntryDependencies(ClientContext &context, idx_t entry_index
 	// dropper.~EntryDropper()
 }
-void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade) {
+void CatalogSet::DropEntryInternal(ClientContext &context, EntryIndex entry_index, CatalogEntry &entry, bool cascade) {
 	auto &transaction = Transaction::GetTransaction(context);
 	DropEntryDependencies(context, entry_index, entry, cascade);
@@ -5384,31 +5512,30 @@ void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, Ca
 	// and point it at the dummy node
 	auto value = make_unique<CatalogEntry>(CatalogType::DELETED_ENTRY, entry.catalog, entry.name);
 	value->timestamp = transaction.transaction_id;
-	value->child = move(entries[entry_index]);
-	value->child->parent = value.get();
 	value->set = this;
 	value->deleted = true;
+	auto value_ptr = value.get();
+	PutEntry(move(entry_index), move(value));
 	// push the old entry in the undo buffer for this transaction
-	transaction.PushCatalogEntry(value->child.get());
-	entries[entry_index] = move(value);
+	transaction.PushCatalogEntry(value_ptr->child.get());
 }
 bool CatalogSet::DropEntry(ClientContext &context, const string &name, bool cascade) {
 	// lock the catalog for writing
 	lock_guard<mutex> write_lock(catalog.write_lock);
 	// we can only delete an entry that exists
-	idx_t entry_index;
+	EntryIndex entry_index;
 	CatalogEntry *entry;
-	if (!GetEntryInternal(context, name, entry_index, entry)) {
+	if (!GetEntryInternal(context, name, &entry_index, entry)) {
 		return false;
 	}
 	if (entry->internal) {
 		throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
 	}
-	DropEntryInternal(context, entry_index, *entry, cascade);
+	lock_guard<mutex> read_lock(catalog_lock);
+	DropEntryInternal(context, move(entry_index), *entry, cascade);
 	return true;
 }
@@ -5426,12 +5553,10 @@ void CatalogSet::CleanupEntry(CatalogEntry *catalog_entry) {
 		if (parent->deleted && !parent->child && !parent->parent) {
 			auto mapping_entry = mapping.find(parent->name);
 			D_ASSERT(mapping_entry != mapping.end());
-			auto index = mapping_entry->second->index;
-			auto entry = entries.find(index);
-			D_ASSERT(entry != entries.end());
-			if (entry->second.get() == parent) {
+			auto entry = mapping_entry->second->index.GetEntry().get();
+			D_ASSERT(entry);
+			if (entry == parent) {
 				mapping.erase(mapping_entry);
-				entries.erase(entry);
 			}
 		}
 	}
@@ -5465,9 +5590,9 @@ MappingValue *CatalogSet::GetMapping(ClientContext &context, const string &name,
 	return mapping_value;
 }
-void CatalogSet::PutMapping(ClientContext &context, const string &name, idx_t entry_index) {
+void CatalogSet::PutMapping(ClientContext &context, const string &name, EntryIndex entry_index) {
 	auto entry = mapping.find(name);
-	auto new_value = make_unique<MappingValue>(entry_index);
+	auto new_value = make_unique<MappingValue>(move(entry_index));
 	new_value->timestamp = Transaction::GetTransaction(context).transaction_id;
 	if (entry != mapping.end()) {
 		if (HasConflict(context, entry->second->timestamp)) {
@@ -5482,7 +5607,7 @@ void CatalogSet::PutMapping(ClientContext &context, const string &name, idx_t en
 void CatalogSet::DeleteMapping(ClientContext &context, const string &name) {
 	auto entry = mapping.find(name);
 	D_ASSERT(entry != mapping.end());
-	auto delete_marker = make_unique<MappingValue>(entry->second->index);
+	auto delete_marker = make_unique<MappingValue>(entry->second->index.Copy());
 	delete_marker->deleted = true;
 	delete_marker->timestamp = Transaction::GetTransaction(context).transaction_id;
 	delete_marker->child = move(entry->second);
@@ -5550,15 +5675,14 @@ CatalogEntry *CatalogSet::CreateEntryInternal(ClientContext &context, unique_ptr
 		return nullptr;
 	}
 	auto &name = entry->name;
-	auto entry_index = current_entry++;
 	auto catalog_entry = entry.get();
 	entry->set = this;
 	entry->timestamp = 0;
-	PutMapping(context, name, entry_index);
+	auto entry_index = PutEntry(current_entry++, move(entry));
+	PutMapping(context, name, move(entry_index));
 	mapping[name]->timestamp = 0;
-	entries[entry_index] = move(entry);
 	return catalog_entry;
 }
@@ -5597,7 +5721,7 @@ CatalogEntry *CatalogSet::GetEntry(ClientContext &context, const string &name) {
 		// we found an entry for this name
 		// check the version numbers
-		auto catalog_entry = entries[mapping_value->index].get();
+		auto catalog_entry = mapping_value->index.GetEntry().get();
 		CatalogEntry *current = GetEntryForTransaction(context, catalog_entry);
 		if (current->deleted || (current->name != name && !UseTimestamp(context, mapping_value->timestamp))) {
 			return nullptr;
@@ -5706,7 +5830,7 @@ void CatalogSet::Undo(CatalogEntry *entry) {
 		// otherwise we need to update the base entry tables
 		auto &name = entry->name;
 		to_be_removed_node->child->SetAsRoot();
-		entries[mapping[name]->index] = move(to_be_removed_node->child);
+		mapping[name]->index.GetEntry() = move(to_be_removed_node->child);
 		entry->parent = nullptr;
 	}
@@ -5721,7 +5845,7 @@ void CatalogSet::Undo(CatalogEntry *entry) {
 		}
 	}
 	// we mark the catalog as being modified, since this action can lead to e.g. tables being dropped
-	entry->catalog->ModifyCatalog();
+	catalog.ModifyCatalog();
 }
 void CatalogSet::CreateDefaultEntries(ClientContext &context, unique_lock<mutex> &lock) {
@@ -5754,7 +5878,7 @@ void CatalogSet::Scan(ClientContext &context, const std::function<void(CatalogEn
 	CreateDefaultEntries(context, lock);
 	for (auto &kv : entries) {
-		auto entry = kv.second.get();
+		auto entry = kv.second.entry.get();
 		entry = GetEntryForTransaction(context, entry);
 		if (!entry->deleted) {
 			callback(entry);
@@ -5766,7 +5890,7 @@ void CatalogSet::Scan(const std::function<void(CatalogEntry *)> &callback) {
 	// lock the catalog set
 	lock_guard<mutex> lock(catalog_lock);
 	for (auto &kv : entries) {
-		auto entry = kv.second.get();
+		auto entry = kv.second.entry.get();
 		entry = GetCommittedEntry(entry);
 		if (!entry->deleted) {
 			callback(entry);
@@ -6182,14 +6306,17 @@ static DefaultView internal_views[] = {
     {"pg_catalog", "pg_attrdef", "SELECT column_index oid, table_oid adrelid, column_index adnum, column_default adbin from duckdb_columns() where column_default is not null;"},
     {"pg_catalog", "pg_class", "SELECT table_oid oid, table_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, estimated_size::real reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, index_count > 0 relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'r' relkind, column_count relnatts, check_constraint_count relchecks, false relhasoids, has_primary_key relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_tables() UNION ALL SELECT view_oid oid, view_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'v' relkind, column_count relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_views() UNION ALL SELECT sequence_oid oid, sequence_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'S' relkind, 0 relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_sequences() UNION ALL SELECT index_oid oid, index_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, 't' relpersistence, 'i' relkind, NULL relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_indexes()"},
     {"pg_catalog", "pg_constraint", "SELECT table_oid*1000000+constraint_index oid, constraint_text conname, schema_oid connamespace, CASE constraint_type WHEN 'CHECK' then 'c' WHEN 'UNIQUE' then 'u' WHEN 'PRIMARY KEY' THEN 'p' WHEN 'FOREIGN KEY' THEN 'f' ELSE 'x' END contype, false condeferrable, false condeferred, true convalidated, table_oid conrelid, 0 contypid, 0 conindid, 0 conparentid, 0 confrelid, NULL confupdtype, NULL confdeltype, NULL confmatchtype, true conislocal, 0 coninhcount, false connoinherit, constraint_column_indexes conkey, NULL confkey, NULL conpfeqop, NULL conppeqop, NULL conffeqop, NULL conexclop, expression conbin FROM duckdb_constraints()"},
+	{"pg_catalog", "pg_database", "SELECT 0 oid, 'main' datname"},
     {"pg_catalog", "pg_depend", "SELECT * FROM duckdb_dependencies()"},
 	{"pg_catalog", "pg_description", "SELECT NULL objoid, NULL classoid, NULL objsubid, NULL description WHERE 1=0"},
     {"pg_catalog", "pg_enum", "SELECT NULL oid, NULL enumtypid, NULL enumsortorder, NULL enumlabel WHERE 1=0"},
     {"pg_catalog", "pg_index", "SELECT index_oid indexrelid, table_oid indrelid, 0 indnatts, 0 indnkeyatts, is_unique indisunique, is_primary indisprimary, false indisexclusion, true indimmediate, false indisclustered, true indisvalid, false indcheckxmin, true indisready, true indislive, false indisreplident, NULL::INT[] indkey, NULL::OID[] indcollation, NULL::OID[] indclass, NULL::INT[] indoption, expressions indexprs, NULL indpred FROM duckdb_indexes()"},
     {"pg_catalog", "pg_indexes", "SELECT schema_name schemaname, table_name tablename, index_name indexname, NULL \"tablespace\", sql indexdef FROM duckdb_indexes()"},
     {"pg_catalog", "pg_namespace", "SELECT oid, schema_name nspname, 0 nspowner, NULL nspacl FROM duckdb_schemas()"},
+	{"pg_catalog", "pg_proc", "SELECT f.function_oid oid, function_name proname, s.oid pronamespace FROM duckdb_functions() f LEFT JOIN duckdb_schemas() s USING (schema_name)"},
     {"pg_catalog", "pg_sequence", "SELECT sequence_oid seqrelid, 0 seqtypid, start_value seqstart, increment_by seqincrement, max_value seqmax, min_value seqmin, 0 seqcache, cycle seqcycle FROM duckdb_sequences()"},
 	{"pg_catalog", "pg_sequences", "SELECT schema_name schemaname, sequence_name sequencename, 'duckdb' sequenceowner, 0 data_type, start_value, min_value, max_value, increment_by, cycle, 0 cache_size, last_value FROM duckdb_sequences()"},
+	{"pg_catalog", "pg_settings", "SELECT name, value setting, description short_desc, CASE WHEN input_type = 'VARCHAR' THEN 'string' WHEN input_type = 'BOOLEAN' THEN 'bool' WHEN input_type IN ('BIGINT', 'UBIGINT') THEN 'integer' ELSE input_type END vartype FROM duckdb_settings()"},
     {"pg_catalog", "pg_tables", "SELECT schema_name schemaname, table_name tablename, 'duckdb' tableowner, NULL \"tablespace\", index_count > 0 hasindexes, false hasrules, false hastriggers FROM duckdb_tables()"},
     {"pg_catalog", "pg_tablespace", "SELECT 0 oid, 'pg_default' spcname, 0 spcowner, NULL spcacl, NULL spcoptions"},
     {"pg_catalog", "pg_type", "SELECT type_oid oid, format_pg_type(type_name) typname, schema_oid typnamespace, 0 typowner, type_size typlen, false typbyval, 'b' typtype, CASE WHEN type_category='NUMERIC' THEN 'N' WHEN type_category='STRING' THEN 'S' WHEN type_category='DATETIME' THEN 'D' WHEN type_category='BOOLEAN' THEN 'B' WHEN type_category='COMPOSITE' THEN 'C' WHEN type_category='USER' THEN 'U' ELSE 'X' END typcategory, false typispreferred, true typisdefined, NULL typdelim, NULL typrelid, NULL typsubscript, NULL typelem, NULL typarray, NULL typinput, NULL typoutput, NULL typreceive, NULL typsend, NULL typmodin, NULL typmodout, NULL typanalyze, 'd' typalign, 'p' typstorage, NULL typnotnull, NULL typbasetype, NULL typtypmod, NULL typndims, NULL typcollation, NULL typdefaultbin, NULL typdefault, NULL typacl FROM duckdb_types();"},
@@ -6256,6 +6383,7 @@ vector<string> DefaultViewGenerator::GetDefaultEntries() {
 namespace duckdb {
 DependencyManager::DependencyManager(Catalog &catalog) : catalog(catalog) {
@@ -6265,12 +6393,11 @@ void DependencyManager::AddObject(ClientContext &context, CatalogEntry *object,
                                   unordered_set<CatalogEntry *> &dependencies) {
 	// check for each object in the sources if they were not deleted yet
 	for (auto &dependency : dependencies) {
-		idx_t entry_index;
 		CatalogEntry *catalog_entry;
 		if (!dependency->set) {
 			throw InternalException("Dependency has no set");
 		}
-		if (!dependency->set->GetEntryInternal(context, dependency->name, entry_index, catalog_entry)) {
+		if (!dependency->set->GetEntryInternal(context, dependency->name, nullptr, catalog_entry)) {
 			throw InternalException("Dependency has already been deleted?");
 		}
 	}
@@ -6298,10 +6425,9 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object,
 		if (mapping_value == nullptr) {
 			continue;
 		}
-		idx_t entry_index = mapping_value->index;
 		CatalogEntry *dependency_entry;
-		if (!catalog_set.GetEntryInternal(context, entry_index, dependency_entry)) {
+		if (!catalog_set.GetEntryInternal(context, mapping_value->index, dependency_entry)) {
 			// the dependent object was already deleted, no conflict
 			continue;
 		}
@@ -6309,7 +6435,7 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object,
 		if (cascade || dep.dependency_type == DependencyType::DEPENDENCY_AUTOMATIC ||
 		    dep.dependency_type == DependencyType::DEPENDENCY_OWNS) {
 			// cascade: drop the dependent object
-			catalog_set.DropEntryInternal(context, entry_index, *dependency_entry, cascade);
+			catalog_set.DropEntryInternal(context, mapping_value->index.Copy(), *dependency_entry, cascade);
 		} else {
 			// no cascade and there are objects that depend on this object: throw error
 			throw DependencyException("Cannot drop entry \"%s\" because there are entries that "
@@ -6329,9 +6455,8 @@ void DependencyManager::AlterObject(ClientContext &context, CatalogEntry *old_ob
 	for (auto &dep : dependent_objects) {
 		// look up the entry in the catalog set
 		auto &catalog_set = *dep.entry->set;
-		idx_t entry_index;
 		CatalogEntry *dependency_entry;
-		if (!catalog_set.GetEntryInternal(context, dep.entry->name, entry_index, dependency_entry)) {
+		if (!catalog_set.GetEntryInternal(context, dep.entry->name, nullptr, dependency_entry)) {
 			// the dependent object was already deleted, no conflict
 			continue;
 		}
@@ -9213,6 +9338,13 @@ void BoxRenderer::Render(ClientContext &context, const vector<string> &names, co
 	// figure out how many/which rows to render
 	idx_t row_count = result.Count();
 	idx_t rows_to_render = MinValue<idx_t>(row_count, config.max_rows);
+	if (row_count <= config.max_rows + 3) {
+		// hiding rows adds 3 extra rows
+		// so hiding rows makes no sense if we are only slightly over the limit
+		// if we are 1 row over the limit hiding rows will actually increase the number of lines we display!
+		// in this case render all the rows
+		rows_to_render = row_count;
+	}
 	idx_t top_rows;
 	idx_t bottom_rows;
 	if (rows_to_render == row_count) {
@@ -30473,7 +30605,7 @@ public:
 private:
 	void AllocateEmptyBlock(idx_t size);
-	void AllocateBlock();
+	BufferHandle AllocateBlock();
 	BufferHandle Pin(uint32_t block_id);
 	BufferHandle PinInternal(uint32_t block_id);
@@ -30587,11 +30719,7 @@ protected:
 		return make_unique<ColumnDataCollection>(allocators->allocators[partition_index], types);
 	}
 	//! Create a DataChunk used for buffering appends to the partition
-	unique_ptr<DataChunk> CreatePartitionBuffer() const {
-		auto result = make_unique<DataChunk>();
-		result->Initialize(Allocator::Get(context), types, BufferSize());
-		return result;
-	}
+	unique_ptr<DataChunk> CreatePartitionBuffer() const;
 protected:
 	PartitionedColumnDataType type;
@@ -30968,6 +31096,9 @@ struct PartitionFunctor {
 		const auto row_width = layout.GetRowWidth();
 		const auto has_heap = !layout.AllConstant();
+		block_collection.VerifyBlockSizes();
+		string_heap.VerifyBlockSizes();
 		// Fixed-size data
 		RowDataBlock *partition_data_blocks[CONSTANTS::NUM_PARTITIONS];
 		vector<BufferHandle> partition_data_handles;
@@ -31102,6 +31233,10 @@ struct PartitionFunctor {
 #ifdef DEBUG
 		for (idx_t bin = 0; bin < CONSTANTS::NUM_PARTITIONS; bin++) {
 			auto &p_block_collection = *partition_block_collections[bin];
+			p_block_collection.VerifyBlockSizes();
+			if (!layout.AllConstant()) {
+				partition_string_heaps[bin]->VerifyBlockSizes();
+			}
 			idx_t p_count = 0;
 			for (idx_t b = 0; b < p_block_collection.blocks.size(); b++) {
 				auto &data_block = *p_block_collection.blocks[b];
@@ -39011,14 +39146,13 @@ static void SortTiedBlobs(BufferManager &buffer_manager, const data_ptr_t datapt
 		          return order * Comparators::CompareVal(left_ptr, right_ptr, logical_type) < 0;
 	          });
 	// Re-order
-	auto temp_block =
-	    buffer_manager.Allocate(MaxValue((end - start) * sort_layout.entry_size, (idx_t)Storage::BLOCK_SIZE));
-	data_ptr_t temp_ptr = temp_block.Ptr();
+	auto temp_block = buffer_manager.GetBufferAllocator().Allocate((end - start) * sort_layout.entry_size);
+	data_ptr_t temp_ptr = temp_block.get();
 	for (idx_t i = 0; i < end - start; i++) {
 		FastMemcpy(temp_ptr, entry_ptrs[i], sort_layout.entry_size);
 		temp_ptr += sort_layout.entry_size;
 	}
-	memcpy(dataptr + start * sort_layout.entry_size, temp_block.Ptr(), (end - start) * sort_layout.entry_size);
+	memcpy(dataptr + start * sort_layout.entry_size, temp_block.get(), (end - start) * sort_layout.entry_size);
 	// Determine if there are still ties (if this is not the last column)
 	if (tie_col < sort_layout.column_count - 1) {
 		data_ptr_t idx_ptr = dataptr + start * sort_layout.entry_size + sort_layout.comparison_size;
@@ -39083,7 +39217,7 @@ static void ComputeTies(data_ptr_t dataptr, const idx_t &count, const idx_t &col
 //! Textbook LSD radix sort
 void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, const idx_t &count, const idx_t &col_offset,
                   const idx_t &row_width, const idx_t &sorting_size) {
-	auto temp_block = buffer_manager.Allocate(MaxValue(count * row_width, (idx_t)Storage::BLOCK_SIZE));
+	auto temp_block = buffer_manager.GetBufferAllocator().Allocate(count * row_width);
 	bool swap = false;
 	idx_t counts[SortConstants::VALUES_PER_RADIX];
@@ -39091,8 +39225,8 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons
 		// Init counts to 0
 		memset(counts, 0, sizeof(counts));
 		// Const some values for convenience
-		const data_ptr_t source_ptr = swap ? temp_block.Ptr() : dataptr;
-		const data_ptr_t target_ptr = swap ? dataptr : temp_block.Ptr();
+		const data_ptr_t source_ptr = swap ? temp_block.get() : dataptr;
+		const data_ptr_t target_ptr = swap ? dataptr : temp_block.get();
 		const idx_t offset = col_offset + sorting_size - r;
 		// Collect counts
 		data_ptr_t offset_ptr = source_ptr + offset;
@@ -39120,7 +39254,7 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons
 	}
 	// Move data back to original buffer (if it was swapped)
 	if (swap) {
-		memcpy(dataptr, temp_block.Ptr(), count * row_width);
+		memcpy(dataptr, temp_block.get(), count * row_width);
 	}
 }
@@ -39468,6 +39602,9 @@ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
 }
 LocalSortState::LocalSortState() : initialized(false) {
+	if (!Radix::IsLittleEndian()) {
+		throw NotImplementedException("Sorting is not supported on big endian architectures");
+	}
 }
 void LocalSortState::Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p) {
@@ -43143,13 +43280,14 @@ BufferHandle ColumnDataAllocator::PinInternal(uint32_t block_id) {
 	return alloc.buffer_manager->Pin(blocks[block_id].handle);
 }
-void ColumnDataAllocator::AllocateBlock() {
+BufferHandle ColumnDataAllocator::AllocateBlock() {
 	D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR);
 	BlockMetaData data;
 	data.size = 0;
 	data.capacity = Storage::BLOCK_SIZE;
-	data.handle = alloc.buffer_manager->RegisterMemory(Storage::BLOCK_SIZE, false);
+	auto pin = alloc.buffer_manager->Allocate(Storage::BLOCK_SIZE, false, &data.handle);
 	blocks.push_back(move(data));
+	return pin;
 }
 void ColumnDataAllocator::AllocateEmptyBlock(idx_t size) {
@@ -43183,11 +43321,10 @@ void ColumnDataAllocator::AllocateBuffer(idx_t size, uint32_t &block_id, uint32_
                                          ChunkManagementState *chunk_state) {
 	D_ASSERT(allocated_data.empty());
 	if (blocks.empty() || blocks.back().Capacity() < size) {
-		AllocateBlock();
-		if (chunk_state && !blocks.empty()) {
-			auto &last_block = blocks.back();
+		auto pinned_block = AllocateBlock();
+		if (chunk_state) {
+			D_ASSERT(!blocks.empty());
 			auto new_block_id = blocks.size() - 1;
-			auto pinned_block = alloc.buffer_manager->Pin(last_block.handle);
 			chunk_state->handles[new_block_id] = move(pinned_block);
 		}
 	}
@@ -44132,7 +44269,7 @@ namespace duckdb {
 ColumnDataCollectionSegment::ColumnDataCollectionSegment(shared_ptr<ColumnDataAllocator> allocator_p,
                                                          vector<LogicalType> types_p)
-    : allocator(move(allocator_p)), types(move(types_p)), count(0) {
+    : allocator(move(allocator_p)), types(move(types_p)), count(0), heap(allocator->GetAllocator()) {
 }
 idx_t ColumnDataCollectionSegment::GetDataSize(idx_t type_size) {
@@ -47530,6 +47667,12 @@ void PartitionedColumnData::InitializeAppendState(PartitionedColumnDataAppendSta
 	InitializeAppendStateInternal(state);
 }
+unique_ptr<DataChunk> PartitionedColumnData::CreatePartitionBuffer() const {
+	auto result = make_unique<DataChunk>();
+	result->Initialize(BufferManager::GetBufferManager(context).GetBufferAllocator(), types, BufferSize());
+	return result;
+}
 void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, DataChunk &input) {
 	// Compute partition indices and store them in state.partition_indices
 	ComputePartitionIndices(state, input);
@@ -48216,7 +48359,7 @@ buffer_ptr<SelectionData> SelectionVector::Slice(const SelectionVector &sel, idx
 namespace duckdb {
-StringHeap::StringHeap() : allocator(Allocator::DefaultAllocator()) {
+StringHeap::StringHeap(Allocator &allocator) : allocator(allocator) {
 }
 void StringHeap::Destroy() {
@@ -53780,7 +53923,7 @@ string LogicalType::ToString() const {
 		string ret = "UNION(";
 		size_t count = UnionType::GetMemberCount(*this);
 		for (size_t i = 0; i < count; i++) {
-			ret += UnionType::GetMemberType(*this, i).ToString();
+			ret += UnionType::GetMemberName(*this, i) + " " + UnionType::GetMemberType(*this, i).ToString();
 			if (i < count - 1) {
 				ret += ", ";
 			}
@@ -57240,7 +57383,9 @@ static inline void ListLoopHash(Vector &input, Vector &hashes, const SelectionVe
 	const auto child_count = ListVector::GetListSize(input);
 	Vector child_hashes(LogicalType::HASH, child_count);
-	VectorOperations::Hash(child, child_hashes, child_count);
+	if (child_count > 0) {
+		VectorOperations::Hash(child, child_hashes, child_count);
+	}
 	auto chdata = FlatVector::GetData<hash_t>(child_hashes);
 	// Reduce the number of entries to check to the non-empty ones
@@ -58640,11 +58785,13 @@ public:
 	ColumnBindingResolver();
 	void VisitOperator(LogicalOperator &op) override;
+	static void Verify(LogicalOperator &op);
 protected:
 	vector<ColumnBinding> bindings;
 	unique_ptr<Expression> VisitReplace(BoundColumnRefExpression &expr, unique_ptr<Expression> *expr_ptr) override;
+	static unordered_set<idx_t> VerifyInternal(LogicalOperator &op);
 };
 } // namespace duckdb
@@ -58986,6 +59133,35 @@ unique_ptr<Expression> ColumnBindingResolver::VisitReplace(BoundColumnRefExpress
 	// LCOV_EXCL_STOP
 }
+unordered_set<idx_t> ColumnBindingResolver::VerifyInternal(LogicalOperator &op) {
+	unordered_set<idx_t> result;
+	for (auto &child : op.children) {
+		auto child_indexes = VerifyInternal(*child);
+		for (auto index : child_indexes) {
+			D_ASSERT(index != DConstants::INVALID_INDEX);
+			if (result.find(index) != result.end()) {
+				throw InternalException("Duplicate table index \"%lld\" found", index);
+			}
+			result.insert(index);
+		}
+	}
+	auto indexes = op.GetTableIndex();
+	for (auto index : indexes) {
+		D_ASSERT(index != DConstants::INVALID_INDEX);
+		if (result.find(index) != result.end()) {
+			throw InternalException("Duplicate table index \"%lld\" found", index);
+		}
+		result.insert(index);
+	}
+	return result;
+}
+void ColumnBindingResolver::Verify(LogicalOperator &op) {
+#ifdef DEBUG
+	VerifyInternal(op);
+#endif
+}
 } // namespace duckdb
@@ -60516,6 +60692,9 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
          DatabaseInstance &db, idx_t block_id, idx_t block_offset)
     : Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db),
       estimated_art_size(0), estimated_key_size(16) {
+	if (!Radix::IsLittleEndian()) {
+		throw NotImplementedException("ART indexes are not supported on big endian architectures");
+	}
 	if (block_id != DConstants::INVALID_INDEX) {
 		tree = Node::Deserialize(*this, block_id, block_offset);
 	} else {
@@ -60799,7 +60978,7 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
 	auto payload_types = logical_types;
 	payload_types.emplace_back(LogicalType::ROW_TYPE);
-	ArenaAllocator arena_allocator(allocator);
+	ArenaAllocator arena_allocator(BufferAllocator::Get(db));
 	vector<Key> keys(STANDARD_VECTOR_SIZE);
 	auto temp_art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
@@ -60856,7 +61035,7 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
 	D_ASSERT(logical_types[0] == input.data[0].GetType());
 	// generate the keys for the given input
-	ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
+	ArenaAllocator arena_allocator(BufferAllocator::Get(db));
 	vector<Key> keys(input.size());
 	GenerateKeys(arena_allocator, input, keys);
@@ -61016,7 +61195,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
 	estimated_art_size -= released_memory;
 	// then generate the keys for the given input
-	ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
+	ArenaAllocator arena_allocator(BufferAllocator::Get(db));
 	vector<Key> keys(expression.size());
 	GenerateKeys(arena_allocator, expression, keys);
@@ -61260,7 +61439,7 @@ bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table
 	// FIXME: the key directly owning the data for a single key might be more efficient
 	D_ASSERT(state->values[0].type().InternalType() == types[0]);
-	ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
+	ArenaAllocator arena_allocator(Allocator::Get(db));
 	auto key = CreateKey(arena_allocator, types[0], state->values[0]);
 	if (state->values[1].IsNull()) {
@@ -61335,7 +61514,7 @@ void ART::VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, str
 	ExecuteExpressions(chunk, expression_chunk);
 	// generate the keys for the given input
-	ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
+	ArenaAllocator arena_allocator(BufferAllocator::Get(db));
 	vector<Key> keys(expression_chunk.size());
 	GenerateKeys(arena_allocator, expression_chunk, keys);
@@ -63513,7 +63692,7 @@ private:
 	mutex pinned_handles_lock;
 	vector<BufferHandle> pinned_handles;
 	//! The hash map of the HT, created after finalization
-	BufferHandle hash_map;
+	AllocatedData hash_map;
 	//! Whether or not NULL values are considered equal in each of the comparisons
 	vector<bool> null_values_are_equal;
@@ -63597,9 +63776,10 @@ public:
 	idx_t SwizzledSize() const {
 		return swizzled_block_collection->SizeInBytes() + swizzled_string_heap->SizeInBytes();
 	}
-	//! Capacity of the pointer table given the
+	//! Capacity of the pointer table given the ht count
+	//! (minimum of 1024 to prevent collision chance for small HT's)
 	static idx_t PointerTableCapacity(idx_t count) {
-		return NextPowerOfTwo(MaxValue<idx_t>(count * 2, (Storage::BLOCK_SIZE / sizeof(data_ptr_t)) + 1));
+		return MaxValue<idx_t>(NextPowerOfTwo(count * 2), 1 << 10);
 	}
 	//! Swizzle the blocks in this HT (moves from block_collection and string_heap to swizzled_...)
@@ -63770,7 +63950,7 @@ void JoinHashTable::ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx
 	auto hash_data = (hash_t *)hdata.data;
 	auto result_data = FlatVector::GetData<data_ptr_t *>(pointers);
-	auto main_ht = (data_ptr_t *)hash_map.Ptr();
+	auto main_ht = (data_ptr_t *)hash_map.get();
 	for (idx_t i = 0; i < count; i++) {
 		auto rindex = sel.get_index(i);
 		auto hindex = hdata.sel->get_index(rindex);
@@ -63952,7 +64132,7 @@ void JoinHashTable::InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_loc
 	hashes.Flatten(count);
 	D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR);
-	auto pointers = (atomic<data_ptr_t> *)hash_map.Ptr();
+	auto pointers = (atomic<data_ptr_t> *)hash_map.get();
 	auto indices = FlatVector::GetData<hash_t>(hashes);
 	if (parallel) {
@@ -63969,19 +64149,19 @@ void JoinHashTable::InitializePointerTable() {
 	D_ASSERT((capacity & (capacity - 1)) == 0);
 	bitmask = capacity - 1;
-	if (!hash_map.IsValid()) {
+	if (!hash_map.get()) {
 		// allocate the HT if not yet done
-		hash_map = buffer_manager.Allocate(capacity * sizeof(data_ptr_t));
+		hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(data_ptr_t));
 	}
-	D_ASSERT(hash_map.GetFileBuffer().size >= capacity * sizeof(data_ptr_t));
+	D_ASSERT(hash_map.GetSize() == capacity * sizeof(data_ptr_t));
 	// initialize HT with all-zero entries
-	memset(hash_map.Ptr(), 0, capacity * sizeof(data_ptr_t));
+	memset(hash_map.get(), 0, capacity * sizeof(data_ptr_t));
 }
 void JoinHashTable::Finalize(idx_t block_idx_start, idx_t block_idx_end, bool parallel) {
 	// Pointer table should be allocated
-	D_ASSERT(hash_map.IsValid());
+	D_ASSERT(hash_map.get());
 	vector<BufferHandle> local_pinned_handles;
@@ -64863,7 +65043,8 @@ ProbeSpillLocalState ProbeSpill::RegisterThread() {
 		result.local_partition = local_partitions.back().get();
 		result.local_partition_append_state = local_partition_append_states.back().get();
 	} else {
-		local_spill_collections.emplace_back(make_unique<ColumnDataCollection>(context, probe_types));
+		local_spill_collections.emplace_back(
+		    make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types));
 		local_spill_append_states.emplace_back(make_unique<ColumnDataAppendState>());
 		local_spill_collections.back()->InitializeAppend(*local_spill_append_states.back());
@@ -64894,7 +65075,8 @@ void ProbeSpill::Finalize() {
 		local_partition_append_states.clear();
 	} else {
 		if (local_spill_collections.empty()) {
-			global_spill_collection = make_unique<ColumnDataCollection>(context, probe_types);
+			global_spill_collection =
+			    make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types);
 		} else {
 			global_spill_collection = move(local_spill_collections[0]);
 			for (idx_t i = 1; i < local_spill_collections.size(); i++) {
@@ -64911,7 +65093,8 @@ void ProbeSpill::PrepareNextProbe() {
 		auto &partitions = global_partitions->GetPartitions();
 		if (partitions.empty() || ht.partition_start == partitions.size()) {
 			// Can't probe, just make an empty one
-			global_spill_collection = make_unique<ColumnDataCollection>(context, probe_types);
+			global_spill_collection =
+			    make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types);
 		} else {
 			// Move specific partitions to the global spill collection
 			global_spill_collection = move(partitions[ht.partition_start]);
@@ -65185,6 +65368,44 @@ static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t r
 	}
 }
+static void MarkJoinNested(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[],
+                           ExpressionType comparison_type) {
+	Vector left_reference(left.GetType());
+	SelectionVector true_sel(rcount);
+	for (idx_t i = 0; i < lcount; i++) {
+		if (found_match[i]) {
+			continue;
+		}
+		ConstantVector::Reference(left_reference, left, i, rcount);
+		idx_t count;
+		switch (comparison_type) {
+		case ExpressionType::COMPARE_EQUAL:
+			count = VectorOperations::Equals(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		case ExpressionType::COMPARE_NOTEQUAL:
+			count = VectorOperations::NotEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		case ExpressionType::COMPARE_LESSTHAN:
+			count = VectorOperations::LessThan(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		case ExpressionType::COMPARE_GREATERTHAN:
+			count = VectorOperations::GreaterThan(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		case ExpressionType::COMPARE_LESSTHANOREQUALTO:
+			count = VectorOperations::LessThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
+			count = VectorOperations::GreaterThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
+			break;
+		default:
+			throw InternalException("Unsupported comparison type for MarkJoinNested");
+		}
+		if (count > 0) {
+			found_match[i] = true;
+		}
+	}
+}
 template <class OP>
 static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) {
 	switch (left.GetType().InternalType()) {
@@ -65220,6 +65441,13 @@ static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcou
 static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[],
                                      ExpressionType comparison_type) {
+	switch (left.GetType().InternalType()) {
+	case PhysicalType::STRUCT:
+	case PhysicalType::LIST:
+		return MarkJoinNested(left, right, lcount, rcount, found_match, comparison_type);
+	default:
+		break;
+	}
 	D_ASSERT(left.GetType() == right.GetType());
 	switch (comparison_type) {
 	case ExpressionType::COMPARE_EQUAL:
@@ -71250,6 +71478,7 @@ class LimitPercentOperatorState : public GlobalSourceState {
 public:
 	explicit LimitPercentOperatorState(const PhysicalLimitPercent &op)
 	    : limit(DConstants::INVALID_INDEX), current_offset(0) {
+		D_ASSERT(op.sink_state);
 		auto &gstate = (LimitPercentGlobalState &)*op.sink_state;
 		gstate.data.InitializeScan(scan_state);
 	}
@@ -72271,7 +72500,12 @@ void PhysicalTransaction::GetData(ExecutionContext &context, DataChunk &chunk, G
                                   LocalSourceState &lstate) const {
 	auto &client = context.client;
-	switch (info->type) {
+	auto type = info->type;
+	if (type == TransactionType::COMMIT && ValidChecker::IsInvalidated(client.ActiveTransaction())) {
+		// transaction is invalidated - turn COMMIT into ROLLBACK
+		type = TransactionType::ROLLBACK;
+	}
+	switch (type) {
 	case TransactionType::BEGIN_TRANSACTION: {
 		if (client.transaction.IsAutoCommit()) {
 			// start the active transaction
@@ -72493,6 +72727,7 @@ public:
 public:
 	bool EmptyResultIfRHSIsEmpty() const;
+	static bool HasNullValues(DataChunk &chunk);
 	static void ConstructSemiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
 	static void ConstructAntiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
 	static void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left, DataChunk &result, bool found_match[],
@@ -74496,6 +74731,10 @@ public:
 };
 void HashJoinGlobalSinkState::ScheduleFinalize(Pipeline &pipeline, Event &event) {
+	if (hash_table->Count() == 0) {
+		hash_table->finalized = true;
+		return;
+	}
 	hash_table->InitializePointerTable();
 	auto new_event = make_shared<HashJoinFinalizeEvent>(pipeline, *this);
 	event.InsertEvent(move(new_event));
@@ -76494,7 +76733,7 @@ namespace duckdb {
 class IndexJoinOperatorState : public CachingOperatorState {
 public:
 	IndexJoinOperatorState(ClientContext &context, const PhysicalIndexJoin &op)
-	    : probe_executor(context), arena_allocator(Allocator::Get(context)), keys(STANDARD_VECTOR_SIZE) {
+	    : probe_executor(context), arena_allocator(BufferAllocator::Get(context)), keys(STANDARD_VECTOR_SIZE) {
 		auto &allocator = Allocator::Get(context);
 		rhs_rows.resize(STANDARD_VECTOR_SIZE);
 		result_sizes.resize(STANDARD_VECTOR_SIZE);
@@ -76862,7 +77101,7 @@ public:
 		return true;
 	}
-	static bool IsSupported(const vector<JoinCondition> &conditions);
+	static bool IsSupported(const vector<JoinCondition> &conditions, JoinType join_type);
 public:
 	//! Returns a list of the types of the join conditions
@@ -76896,7 +77135,7 @@ PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, unique_ptr<P
 	children.push_back(move(right));
 }
-static bool HasNullValues(DataChunk &chunk) {
+bool PhysicalJoin::HasNullValues(DataChunk &chunk) {
 	for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
 		UnifiedVectorFormat vdata;
 		chunk.data[col_idx].ToUnifiedFormat(chunk.size(), vdata);
@@ -76985,7 +77224,10 @@ void PhysicalJoin::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left
 	}
 }
-bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions) {
+bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions, JoinType join_type) {
+	if (join_type == JoinType::MARK) {
+		return true;
+	}
 	for (auto &cond : conditions) {
 		if (cond.left->return_type.InternalType() == PhysicalType::STRUCT ||
 		    cond.left->return_type.InternalType() == PhysicalType::LIST) {
@@ -77029,7 +77271,7 @@ public:
 	//! Materialized join condition of the RHS
 	ColumnDataCollection right_condition_data;
 	//! Whether or not the RHS of the nested loop join has NULL values
-	bool has_null;
+	atomic<bool> has_null;
 	//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
 	OuterJoinMarker right_outer;
 };
@@ -85827,15 +86069,14 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
 	for (auto &pipeline : pipelines) {
 		auto sink = pipeline->GetSink();
 		if (sink != this) {
-			// reset the sink state for any intermediate sinks
-			sink->sink_state = sink->GetGlobalSinkState(context.client);
+			sink->sink_state.reset();
 		}
 		for (auto &op : pipeline->GetOperators()) {
 			if (op) {
-				op->op_state = op->GetGlobalOperatorState(context.client);
+				op->op_state.reset();
 			}
 		}
-		pipeline->ResetSource(true);
+		pipeline->ClearSource();
 	}
 	// get the MetaPipelines in the recursive_meta_pipeline and reschedule them
@@ -86810,6 +87051,7 @@ public:
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
 	idx_t EstimateCardinality(ClientContext &context) override;
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override;
@@ -87092,6 +87334,7 @@ public:
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override {
@@ -87361,6 +87604,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
 		                                     op.estimated_cardinality, perfect_join_stats);
 	} else {
+		static constexpr const idx_t NESTED_LOOP_JOIN_THRESHOLD = 5;
 		bool can_merge = has_range > 0;
 		bool can_iejoin = has_range >= 2 && recursive_cte_tables.empty();
 		switch (op.join_type) {
@@ -87373,6 +87617,11 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
 		default:
 			break;
 		}
+		if (left->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD ||
+		    right->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD) {
+			can_iejoin = false;
+			can_merge = false;
+		}
 		if (can_iejoin) {
 			plan = make_unique<PhysicalIEJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
 			                                   op.estimated_cardinality);
@@ -87380,7 +87629,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
 			// range join: use piecewise merge join
 			plan = make_unique<PhysicalPiecewiseMergeJoin>(op, move(left), move(right), move(op.conditions),
 			                                               op.join_type, op.estimated_cardinality);
-		} else if (PhysicalNestedLoopJoin::IsSupported(op.conditions)) {
+		} else if (PhysicalNestedLoopJoin::IsSupported(op.conditions, op.join_type)) {
 			// inequality join: use nested loop
 			plan = make_unique<PhysicalNestedLoopJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
 			                                           op.estimated_cardinality);
@@ -87604,7 +87853,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
@@ -87646,25 +87894,11 @@ protected:
-namespace duckdb {
-static void ExtractDependencies(Expression &expr, unordered_set<CatalogEntry *> &dependencies) {
-	if (expr.type == ExpressionType::BOUND_FUNCTION) {
-		auto &function = (BoundFunctionExpression &)expr;
-		if (function.function.dependency) {
-			function.function.dependency(function, dependencies);
-		}
-	}
-	ExpressionIterator::EnumerateChildren(expr, [&](Expression &child) { ExtractDependencies(child, dependencies); });
-}
+namespace duckdb {
 unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTable &op) {
-	// extract dependencies from any default values
-	for (auto &default_value : op.info->bound_defaults) {
-		if (default_value) {
-			ExtractDependencies(*default_value, op.info->dependencies);
-		}
-	}
 	auto &create_info = (CreateTableInfo &)*op.info->base;
 	auto &catalog = Catalog::GetCatalog(context);
 	auto existing_entry =
@@ -87675,13 +87909,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTabl
 		bool parallel_streaming_insert = !PreserveInsertionOrder(*plan);
 		bool use_batch_index = UseBatchIndex(*plan);
+		auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
 		unique_ptr<PhysicalOperator> create;
 		if (!parallel_streaming_insert && use_batch_index) {
 			create = make_unique<PhysicalBatchInsert>(op, op.schema, move(op.info), op.estimated_cardinality);
 		} else {
 			create = make_unique<PhysicalInsert>(op, op.schema, move(op.info), op.estimated_cardinality,
-			                                     parallel_streaming_insert);
+			                                     parallel_streaming_insert && num_threads > 1);
 		}
 		D_ASSERT(op.children.size() == 1);
@@ -87763,8 +87998,9 @@ namespace duckdb {
 class LogicalDelete : public LogicalOperator {
 public:
-	explicit LogicalDelete(TableCatalogEntry *table)
-	    : LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(0), return_chunk(false) {
+	explicit LogicalDelete(TableCatalogEntry *table, idx_t table_index)
+	    : LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(table_index),
+	      return_chunk(false) {
 	}
 	TableCatalogEntry *table;
@@ -87775,6 +88011,7 @@ public:
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
 	idx_t EstimateCardinality(ClientContext &context) override;
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	vector<ColumnBinding> GetColumnBindings() override {
@@ -87851,6 +88088,7 @@ public:
 	}
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override {
@@ -88091,6 +88329,7 @@ public:
 	}
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override {
@@ -88474,6 +88713,7 @@ public:
 	idx_t EstimateCardinality(ClientContext &context) override {
 		return expressions.size();
 	}
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override {
@@ -89145,8 +89385,9 @@ namespace duckdb {
 //! LogicalInsert represents an insertion of data into a base table
 class LogicalInsert : public LogicalOperator {
 public:
-	explicit LogicalInsert(TableCatalogEntry *table)
-	    : LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(0), return_chunk(false) {
+	LogicalInsert(TableCatalogEntry *table, idx_t table_index)
+	    : LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(table_index),
+	      return_chunk(false) {
 	}
 	vector<vector<unique_ptr<Expression>>> insert_values;
@@ -89183,6 +89424,7 @@ protected:
 	}
 	idx_t EstimateCardinality(ClientContext &context) override;
+	vector<idx_t> GetTableIndex() const override;
 };
 } // namespace duckdb
@@ -89237,6 +89479,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
 	bool parallel_streaming_insert = !PreserveInsertionOrder(*plan);
 	bool use_batch_index = UseBatchIndex(*plan);
+	auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
 	if (op.return_chunk) {
 		// not supported for RETURNING (yet?)
 		parallel_streaming_insert = false;
@@ -89248,7 +89491,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
 		                                          op.estimated_cardinality);
 	} else {
 		insert = make_unique<PhysicalInsert>(op.types, op.table, op.column_index_map, move(op.bound_defaults),
-		                                     op.estimated_cardinality, op.return_chunk, parallel_streaming_insert);
+		                                     op.estimated_cardinality, op.return_chunk,
+		                                     parallel_streaming_insert && num_threads > 1);
 	}
 	if (plan) {
 		insert->children.push_back(move(plan));
@@ -89591,6 +89835,7 @@ public:
 	vector<ColumnBinding> GetColumnBindings() override;
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override;
@@ -89680,6 +89925,7 @@ public:
 	}
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override {
@@ -89727,6 +89973,7 @@ public:
 	}
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override {
@@ -89939,6 +90186,7 @@ public:
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override {
@@ -90243,6 +90491,7 @@ public:
 	vector<ColumnBinding> GetColumnBindings() override;
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override;
@@ -90370,6 +90619,7 @@ public:
 	vector<ColumnBinding> GetColumnBindings() override;
 	void Serialize(FieldWriter &writer) const override;
 	static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
+	vector<idx_t> GetTableIndex() const override;
 protected:
 	void ResolveTypes() override;
@@ -90528,6 +90778,8 @@ struct LogicalExtensionOperator : public LogicalOperator {
 	    : LogicalOperator(LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR, move(expressions)) {
 	}
+	static unique_ptr<LogicalExtensionOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
 	virtual unique_ptr<PhysicalOperator> CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) = 0;
 };
 } // namespace duckdb
@@ -98818,13 +99070,16 @@ struct LinkedList {
 // forward declarations
 struct WriteDataToSegment;
 struct ReadDataFromSegment;
+struct CopyDataFromSegment;
 typedef ListSegment *(*create_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
-                                         vector<AllocatedData> &owning_vector, uint16_t &capacity);
+                                         vector<AllocatedData> &owning_vector, const uint16_t &capacity);
 typedef void (*write_data_to_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
                                         vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
                                         idx_t &entry_idx, idx_t &count);
-typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, ListSegment *segment,
+typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
                                          Vector &result, idx_t &total_count);
+typedef ListSegment *(*copy_data_from_segment_t)(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
+                                                 Allocator &allocator, vector<AllocatedData> &owning_vector);
 struct WriteDataToSegment {
 	create_segment_t create_segment;
@@ -98835,6 +99090,10 @@ struct ReadDataFromSegment {
 	read_data_from_segment_t segment_function;
 	vector<ReadDataFromSegment> child_functions;
 };
+struct CopyDataFromSegment {
+	copy_data_from_segment_t segment_function;
+	vector<CopyDataFromSegment> child_functions;
+};
 // forward declarations
 static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
@@ -98842,24 +99101,27 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo
                       idx_t &count);
 static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedList *linked_list, Vector &result,
                             idx_t &initial_total_count);
+static void CopyLinkedList(CopyDataFromSegment &copy_data_from_segment, const LinkedList *source_list,
+                           LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector);
 template <class T>
 static data_ptr_t AllocatePrimitiveData(Allocator &allocator, vector<AllocatedData> &owning_vector,
-                                        uint16_t &capacity) {
+                                        const uint16_t &capacity) {
 	owning_vector.emplace_back(allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(T))));
 	return owning_vector.back().get();
 }
-static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector, uint16_t &capacity) {
+static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector,
+                                   const uint16_t &capacity) {
 	owning_vector.emplace_back(
 	    allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList)));
 	return owning_vector.back().get();
 }
-static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector, uint16_t &capacity,
-                                     idx_t child_count) {
+static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector,
+                                     const uint16_t &capacity, const idx_t &child_count) {
 	owning_vector.emplace_back(
 	    allocator.Allocate(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *)));
@@ -98867,28 +99129,28 @@ static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData>
 }
 template <class T>
-static T *GetPrimitiveData(ListSegment *segment) {
+static T *GetPrimitiveData(const ListSegment *segment) {
 	return (T *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
 }
-static uint64_t *GetListLengthData(ListSegment *segment) {
+static uint64_t *GetListLengthData(const ListSegment *segment) {
 	return (uint64_t *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
 }
-static LinkedList *GetListChildData(ListSegment *segment) {
+static LinkedList *GetListChildData(const ListSegment *segment) {
 	return (LinkedList *)(((char *)segment) + sizeof(ListSegment) +
 	                      segment->capacity * (sizeof(bool) + sizeof(uint64_t)));
 }
-static ListSegment **GetStructData(ListSegment *segment) {
+static ListSegment **GetStructData(const ListSegment *segment) {
 	return (ListSegment **)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
 }
-static bool *GetNullMask(ListSegment *segment) {
+static bool *GetNullMask(const ListSegment *segment) {
 	return (bool *)(((char *)segment) + sizeof(ListSegment));
 }
-static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) {
+static uint16_t GetCapacityForNewSegment(const LinkedList *linked_list) {
 	// consecutive segments grow by the power of two
 	uint16_t capacity = 4;
@@ -98901,7 +99163,7 @@ static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) {
 template <class T>
 static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allocator,
-                                           vector<AllocatedData> &owning_vector, uint16_t &capacity) {
+                                           vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
 	// allocate data and set the header
 	auto segment = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, capacity);
@@ -98912,7 +99174,7 @@ static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allo
 }
 static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator, vector<AllocatedData> &owning_vector,
-                                      uint16_t &capacity) {
+                                      const uint16_t &capacity) {
 	// allocate data and set the header
 	auto segment = (ListSegment *)AllocateListData(allocator, owning_vector, capacity);
@@ -98929,7 +99191,7 @@ static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator
 }
 static ListSegment *CreateStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
-                                        vector<AllocatedData> &owning_vector, uint16_t &capacity) {
+                                        vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
 	// allocate data and set header
 	auto segment = (ListSegment *)AllocateStructData(allocator, owning_vector, capacity,
@@ -99123,7 +99385,7 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo
 }
 template <class T>
-static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result,
+static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
                                          idx_t &total_count) {
 	auto &aggr_vector_validity = FlatVector::Validity(result);
@@ -99147,7 +99409,7 @@ static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *seg
 	}
 }
-static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result,
+static void ReadDataFromVarcharSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
                                        idx_t &total_count) {
 	auto &aggr_vector_validity = FlatVector::Validity(result);
@@ -99188,8 +99450,8 @@ static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segme
 	}
 }
-static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment, Vector &result,
-                                    idx_t &total_count) {
+static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
+                                    Vector &result, idx_t &total_count) {
 	auto &aggr_vector_validity = FlatVector::Validity(result);
@@ -99228,8 +99490,8 @@ static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment,
 	BuildListVector(read_data_from_segment.child_functions[0], &linked_child_list, child_vector, starting_offset);
 }
-static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment, Vector &result,
-                                      idx_t &total_count) {
+static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
+                                      Vector &result, idx_t &total_count) {
 	auto &aggr_vector_validity = FlatVector::Validity(result);
@@ -99268,6 +99530,86 @@ static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedL
 	linked_list->last_segment = nullptr;
 }
+template <class T>
+static ListSegment *CopyDataFromPrimitiveSegment(CopyDataFromSegment &, const ListSegment *source, Allocator &allocator,
+                                                 vector<AllocatedData> &owning_vector) {
+	auto target = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, source->capacity);
+	memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T)));
+	target->next = nullptr;
+	return target;
+}
+static ListSegment *CopyDataFromListSegment(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
+                                            Allocator &allocator, vector<AllocatedData> &owning_vector) {
+	// create an empty linked list for the child vector of target
+	auto source_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(source));
+	// create the segment
+	auto target = (ListSegment *)AllocateListData(allocator, owning_vector, source->capacity);
+	memcpy(target, source,
+	       sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList));
+	target->next = nullptr;
+	auto target_linked_list = GetListChildData(target);
+	LinkedList linked_list(source_linked_child_list.total_capacity, nullptr, nullptr);
+	Store<LinkedList>(linked_list, (data_ptr_t)target_linked_list);
+	// recurse to copy the linked child list
+	auto target_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(target));
+	D_ASSERT(copy_data_from_segment.child_functions.size() == 1);
+	CopyLinkedList(copy_data_from_segment.child_functions[0], &source_linked_child_list, target_linked_child_list,
+	               allocator, owning_vector);
+	// store the updated linked list
+	Store<LinkedList>(target_linked_child_list, (data_ptr_t)GetListChildData(target));
+	return target;
+}
+static ListSegment *CopyDataFromStructSegment(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
+                                              Allocator &allocator, vector<AllocatedData> &owning_vector) {
+	auto source_child_count = copy_data_from_segment.child_functions.size();
+	auto target = (ListSegment *)AllocateStructData(allocator, owning_vector, source->capacity, source_child_count);
+	memcpy(target, source,
+	       sizeof(ListSegment) + source->capacity * sizeof(bool) + source_child_count * sizeof(ListSegment *));
+	target->next = nullptr;
+	// recurse and copy the children
+	auto source_child_segments = GetStructData(source);
+	auto target_child_segments = GetStructData(target);
+	for (idx_t i = 0; i < copy_data_from_segment.child_functions.size(); i++) {
+		auto child_function = copy_data_from_segment.child_functions[i];
+		auto source_child_segment = Load<ListSegment *>((data_ptr_t)(source_child_segments + i));
+		auto target_child_segment =
+		    child_function.segment_function(child_function, source_child_segment, allocator, owning_vector);
+		Store<ListSegment *>(target_child_segment, (data_ptr_t)(target_child_segments + i));
+	}
+	return target;
+}
+static void CopyLinkedList(CopyDataFromSegment &copy_data_from_segment, const LinkedList *source_list,
+                           LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector) {
+	auto source_segment = source_list->first_segment;
+	while (source_segment) {
+		auto target_segment =
+		    copy_data_from_segment.segment_function(copy_data_from_segment, source_segment, allocator, owning_vector);
+		source_segment = source_segment->next;
+		if (!target_list.first_segment) {
+			target_list.first_segment = target_segment;
+		}
+		if (target_list.last_segment) {
+			target_list.last_segment->next = target_segment;
+		}
+		target_list.last_segment = target_segment;
+	}
+}
 static void InitializeValidities(Vector &vector, idx_t &capacity) {
 	auto &validity_mask = FlatVector::Validity(vector);
@@ -99311,6 +99653,7 @@ struct ListBindData : public FunctionData {
 	LogicalType stype;
 	WriteDataToSegment write_data_to_segment;
 	ReadDataFromSegment read_data_from_segment;
+	CopyDataFromSegment copy_data_from_segment;
 	unique_ptr<FunctionData> Copy() const override {
 		return make_unique<ListBindData>(stype);
@@ -99323,7 +99666,8 @@ struct ListBindData : public FunctionData {
 };
 static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
-                                    ReadDataFromSegment &read_data_from_segment, const LogicalType &type) {
+                                    ReadDataFromSegment &read_data_from_segment,
+                                    CopyDataFromSegment &copy_data_from_segment, const LogicalType &type) {
 	auto physical_type = type.InternalType();
 	switch (physical_type) {
@@ -99332,113 +99676,135 @@ static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<bool>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<bool>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<bool>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<bool>;
 		break;
 	}
 	case PhysicalType::INT8: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<int8_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int8_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int8_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int8_t>;
 		break;
 	}
 	case PhysicalType::INT16: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<int16_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int16_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int16_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int16_t>;
 		break;
 	}
 	case PhysicalType::INT32: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<int32_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int32_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int32_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int32_t>;
 		break;
 	}
 	case PhysicalType::INT64: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<int64_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int64_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int64_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int64_t>;
 		break;
 	}
 	case PhysicalType::UINT8: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<uint8_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint8_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint8_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint8_t>;
 		break;
 	}
 	case PhysicalType::UINT16: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<uint16_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint16_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint16_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint16_t>;
 		break;
 	}
 	case PhysicalType::UINT32: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<uint32_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint32_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint32_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint32_t>;
 		break;
 	}
 	case PhysicalType::UINT64: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<uint64_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint64_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint64_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint64_t>;
 		break;
 	}
 	case PhysicalType::FLOAT: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<float>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<float>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<float>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<float>;
 		break;
 	}
 	case PhysicalType::DOUBLE: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<double>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<double>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<double>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<double>;
 		break;
 	}
 	case PhysicalType::INT128: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<hugeint_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<hugeint_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<hugeint_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<hugeint_t>;
 		break;
 	}
 	case PhysicalType::INTERVAL: {
 		write_data_to_segment.create_segment = CreatePrimitiveSegment<interval_t>;
 		write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<interval_t>;
 		read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<interval_t>;
+		copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<interval_t>;
 		break;
 	}
 	case PhysicalType::VARCHAR: {
 		write_data_to_segment.create_segment = CreateListSegment;
 		write_data_to_segment.segment_function = WriteDataToVarcharSegment;
 		read_data_from_segment.segment_function = ReadDataFromVarcharSegment;
+		copy_data_from_segment.segment_function = CopyDataFromListSegment;
 		write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
 		write_data_to_segment.child_functions.back().create_segment = CreatePrimitiveSegment<char>;
+		copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
+		copy_data_from_segment.child_functions.back().segment_function = CopyDataFromPrimitiveSegment<char>;
 		break;
 	}
 	case PhysicalType::LIST: {
 		write_data_to_segment.create_segment = CreateListSegment;
 		write_data_to_segment.segment_function = WriteDataToListSegment;
 		read_data_from_segment.segment_function = ReadDataFromListSegment;
+		copy_data_from_segment.segment_function = CopyDataFromListSegment;
 		// recurse
 		write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
 		read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
+		copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
 		GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
-		                        read_data_from_segment.child_functions.back(), ListType::GetChildType(type));
+		                        read_data_from_segment.child_functions.back(),
+		                        copy_data_from_segment.child_functions.back(), ListType::GetChildType(type));
 		break;
 	}
 	case PhysicalType::STRUCT: {
 		write_data_to_segment.create_segment = CreateStructSegment;
 		write_data_to_segment.segment_function = WriteDataToStructSegment;
 		read_data_from_segment.segment_function = ReadDataFromStructSegment;
+		copy_data_from_segment.segment_function = CopyDataFromStructSegment;
 		// recurse
 		auto child_types = StructType::GetChildTypes(type);
 		for (idx_t i = 0; i < child_types.size(); i++) {
 			write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
 			read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
+			copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
 			GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
-			                        read_data_from_segment.child_functions.back(), child_types[i].second);
+			                        read_data_from_segment.child_functions.back(),
+			                        copy_data_from_segment.child_functions.back(), child_types[i].second);
 		}
 		break;
 	}
@@ -99451,7 +99817,7 @@ ListBindData::ListBindData(const LogicalType &stype_p) : stype(stype_p) {
 	// always unnest once because the result vector is of type LIST
 	auto type = ListType::GetChildType(stype_p);
-	GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, type);
+	GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, copy_data_from_segment, type);
 }
 ListBindData::~ListBindData() {
@@ -99519,11 +99885,13 @@ static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_d
 	}
 }
-static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &, idx_t count) {
+static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &aggr_input_data, idx_t count) {
 	UnifiedVectorFormat sdata;
 	state.ToUnifiedFormat(count, sdata);
 	auto states_ptr = (ListAggState **)sdata.data;
+	auto &list_bind_data = (ListBindData &)*aggr_input_data.bind_data;
 	auto combined_ptr = FlatVector::GetData<ListAggState *>(combined);
 	for (idx_t i = 0; i < count; i++) {
 		auto state = states_ptr[sdata.sel->get_index(i)];
@@ -99533,32 +99901,27 @@ static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputD
 		}
 		D_ASSERT(state->type);
 		D_ASSERT(state->owning_vector);
-		if (!combined_ptr[i]->linked_list) {
-			// copy the linked list
+		if (!combined_ptr[i]->linked_list) {
 			combined_ptr[i]->linked_list = new LinkedList(0, nullptr, nullptr);
-			combined_ptr[i]->linked_list->first_segment = state->linked_list->first_segment;
-			combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment;
-			combined_ptr[i]->linked_list->total_capacity = state->linked_list->total_capacity;
-			// copy the type
+			combined_ptr[i]->owning_vector = new vector<AllocatedData>;
 			combined_ptr[i]->type = new LogicalType(*state->type);
+		}
+		auto owning_vector = combined_ptr[i]->owning_vector;
-			// new owning_vector to hold the unique pointers
-			combined_ptr[i]->owning_vector = new vector<AllocatedData>;
+		// copy the linked list of the state
+		auto copied_linked_list = LinkedList(state->linked_list->total_capacity, nullptr, nullptr);
+		CopyLinkedList(list_bind_data.copy_data_from_segment, state->linked_list, copied_linked_list,
+		               aggr_input_data.allocator, *owning_vector);
+		// append the copied linked list to the combined state
+		if (combined_ptr[i]->linked_list->last_segment) {
+			combined_ptr[i]->linked_list->last_segment->next = copied_linked_list.first_segment;
 		} else {
-			combined_ptr[i]->linked_list->last_segment->next = state->linked_list->first_segment;
-			combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment;
-			combined_ptr[i]->linked_list->total_capacity += state->linked_list->total_capacity;
-		}
-		// copy the owning vector (and its unique pointers to the allocated data)
-		// FIXME: more efficient way of copying the unique pointers?
-		auto &owning_vector = *state->owning_vector;
-		for (idx_t j = 0; j < state->owning_vector->size(); j++) {
-			combined_ptr[i]->owning_vector->push_back(move(owning_vector[j]));
+			combined_ptr[i]->linked_list->first_segment = copied_linked_list.first_segment;
 		}
+		combined_ptr[i]->linked_list->last_segment = copied_linked_list.last_segment;
+		combined_ptr[i]->linked_list->total_capacity += copied_linked_list.total_capacity;
 	}
 }
@@ -99822,10 +100185,11 @@ struct RegrCountFunction {
 namespace duckdb {
 void RegrCountFun::RegisterFunction(BuiltinFunctions &set) {
-	AggregateFunctionSet corr("regr_count");
-	corr.AddFunction(AggregateFunction::BinaryAggregate<size_t, double, double, uint32_t, RegrCountFunction>(
-	    LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::UINTEGER));
-	set.AddFunction(corr);
+	auto regr_count = AggregateFunction::BinaryAggregate<size_t, double, double, uint32_t, RegrCountFunction>(
+	    LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::UINTEGER);
+	regr_count.name = "regr_count";
+	regr_count.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
+	set.AddFunction(regr_count);
 }
 } // namespace duckdb
@@ -102674,18 +103038,9 @@ unique_ptr<BoundCastData> BindUnionToUnionCast(BindCastInput &input, const Logic
 		for (idx_t target_idx = 0; target_idx < UnionType::GetMemberCount(target); target_idx++) {
 			auto &target_member_name = UnionType::GetMemberName(target, target_idx);
-			// found a matching member, check if the types are castable
+			// found a matching member
 			if (source_member_name == target_member_name) {
 				auto &target_member_type = UnionType::GetMemberType(target, target_idx);
-				if (input.function_set.ImplicitCastCost(source_member_type, target_member_type) < 0) {
-					auto message = StringUtil::Format(
-					    "Type %s can't be cast as %s. The member '%s' can't be implicitly cast from %s to %s",
-					    source.ToString(), target.ToString(), source_member_name, source_member_type.ToString(),
-					    target_member_type.ToString());
-					throw CastException(message);
-				}
 				tag_map[source_idx] = target_idx;
 				member_casts.push_back(input.GetCastFunction(source_member_type, target_member_type));
 				found = true;
@@ -102759,6 +103114,14 @@ static bool UnionToUnionCast(Vector &source, Vector &result, idx_t count, CastPa
 		}
 	} else {
 		// Otherwise, use the unified vector format to access the source vector.
+		// Ensure that all the result members are flat vectors
+		// This is not always the case, e.g. when a member is cast using the default TryNullCast function
+		// the resulting member vector will be a constant null vector.
+		for (idx_t target_member_idx = 0; target_member_idx < target_member_count; target_member_idx++) {
+			UnionVector::GetMember(result, target_member_idx).Flatten(count);
+		}
 		// We assume that a union tag vector validity matches the union vector validity.
 		UnifiedVectorFormat source_tag_format;
 		source_tag_vector.ToUnifiedFormat(count, source_tag_format);
@@ -102771,6 +103134,9 @@ static bool UnionToUnionCast(Vector &source, Vector &result, idx_t count, CastPa
 				auto target_tag = cast_data.tag_map[source_tag];
 				FlatVector::GetData<union_tag_t>(result_tag_vector)[row_idx] = target_tag;
 			} else {
+				// Issue: The members of the result is not always flatvectors
+				// In the case of TryNullCast, the result member is constant.
 				FlatVector::SetNull(result, row_idx, true);
 			}
 		}
@@ -108931,15 +109297,16 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
 }
 struct StrfTimeBindData : public FunctionData {
-	explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p)
-	    : format(move(format_p)), format_string(move(format_string_p)) {
+	explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p, bool is_null)
+	    : format(move(format_p)), format_string(move(format_string_p)), is_null(is_null) {
 	}
 	StrfTimeFormat format;
 	string format_string;
+	bool is_null;
 	unique_ptr<FunctionData> Copy() const override {
-		return make_unique<StrfTimeBindData>(format, format_string);
+		return make_unique<StrfTimeBindData>(format, format_string, is_null);
 	}
 	bool Equals(const FunctionData &other_p) const override {
@@ -108962,13 +109329,14 @@ static unique_ptr<FunctionData> StrfTimeBindFunction(ClientContext &context, Sca
 	Value options_str = ExpressionExecutor::EvaluateScalar(context, *format_arg);
 	auto format_string = options_str.GetValue<string>();
 	StrfTimeFormat format;
-	if (!options_str.IsNull()) {
+	bool is_null = options_str.IsNull();
+	if (!is_null) {
 		string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
 		if (!error.empty()) {
 			throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
 		}
 	}
-	return make_unique<StrfTimeBindData>(format, format_string);
+	return make_unique<StrfTimeBindData>(format, format_string, is_null);
 }
 void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
@@ -108995,7 +109363,7 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector
 	auto &func_expr = (BoundFunctionExpression &)state.expr;
 	auto &info = (StrfTimeBindData &)*func_expr.bind_info;
-	if (ConstantVector::IsNull(args.data[REVERSED ? 0 : 1])) {
+	if (info.is_null) {
 		result.SetVectorType(VectorType::CONSTANT_VECTOR);
 		ConstantVector::SetNull(result, true);
 		return;
@@ -109029,7 +109397,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V
 	auto &func_expr = (BoundFunctionExpression &)state.expr;
 	auto &info = (StrfTimeBindData &)*func_expr.bind_info;
-	if (ConstantVector::IsNull(args.data[REVERSED ? 0 : 1])) {
+	if (info.is_null) {
 		result.SetVectorType(VectorType::CONSTANT_VECTOR);
 		ConstantVector::SetNull(result, true);
 		return;
@@ -122527,7 +122895,9 @@ static unique_ptr<FunctionData> StructInsertBind(ClientContext &context, ScalarF
 unique_ptr<BaseStatistics> StructInsertStats(ClientContext &context, FunctionStatisticsInput &input) {
 	auto &child_stats = input.child_stats;
 	auto &expr = input.expr;
+	if (child_stats.empty() || !child_stats[0]) {
+		return nullptr;
+	}
 	auto &existing_struct_stats = (StructStatistics &)*child_stats[0];
 	auto new_struct_stats = make_unique<StructStatistics>(expr.return_type);
@@ -127954,6 +128324,9 @@ static unique_ptr<FunctionData> DuckDBFunctionsBind(ClientContext &context, Tabl
 	names.emplace_back("has_side_effects");
 	return_types.emplace_back(LogicalType::BOOLEAN);
+	names.emplace_back("function_oid");
+	return_types.emplace_back(LogicalType::BIGINT);
 	return nullptr;
 }
@@ -128340,6 +128713,9 @@ bool ExtractFunctionData(StandardEntry *entry, idx_t function_idx, DataChunk &ou
 	// has_side_effects, LogicalType::BOOLEAN
 	output.SetValue(9, output_offset, OP::HasSideEffects(function, function_idx));
+	// function_oid, LogicalType::BIGINT
+	output.SetValue(10, output_offset, Value::BIGINT(entry->oid));
 	return function_idx + 1 == OP::FunctionCount(function);
 }
@@ -135347,6 +135723,10 @@ public:
 private:
 	void RunOptimizer(OptimizerType type, const std::function<void()> &callback);
+	void Verify(LogicalOperator &op);
+private:
+	unique_ptr<LogicalOperator> plan;
 };
 } // namespace duckdb
@@ -136051,6 +136431,7 @@ unique_ptr<LogicalOperator> ClientContext::ExtractPlan(const string &query) {
 		}
 		ColumnBindingResolver resolver;
+		resolver.Verify(*plan);
 		resolver.VisitOperator(*plan);
 		plan->ResolveOperatorTypes();
@@ -137142,6 +137523,14 @@ struct MaximumMemorySetting {
 	static Value GetSetting(ClientContext &context);
 };
+struct PasswordSetting {
+	static constexpr const char *Name = "password";
+	static constexpr const char *Description = "The password to use. Ignored for legacy compatibility.";
+	static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
+	static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
+	static Value GetSetting(ClientContext &context);
+};
 struct PerfectHashThresholdSetting {
 	static constexpr const char *Name = "perfect_ht_threshold";
 	static constexpr const char *Description = "Threshold in bytes for when to use a perfect hash table (default: 12)";
@@ -137237,6 +137626,14 @@ struct ThreadsSetting {
 	static Value GetSetting(ClientContext &context);
 };
+struct UsernameSetting {
+	static constexpr const char *Name = "username";
+	static constexpr const char *Description = "The username to use. Ignored for legacy compatibility.";
+	static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
+	static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
+	static Value GetSetting(ClientContext &context);
+};
 } // namespace duckdb
@@ -137286,6 +137683,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
                                                  DUCKDB_GLOBAL(MaximumMemorySetting),
                                                  DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting),
                                                  DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting),
+                                                 DUCKDB_GLOBAL(PasswordSetting),
                                                  DUCKDB_LOCAL(PerfectHashThresholdSetting),
                                                  DUCKDB_LOCAL(PreserveIdentifierCase),
                                                  DUCKDB_GLOBAL(PreserveInsertionOrder),
@@ -137298,6 +137696,8 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
                                                  DUCKDB_LOCAL(SearchPathSetting),
                                                  DUCKDB_GLOBAL(TempDirectorySetting),
                                                  DUCKDB_GLOBAL(ThreadsSetting),
+                                                 DUCKDB_GLOBAL(UsernameSetting),
+                                                 DUCKDB_GLOBAL_ALIAS("user", UsernameSetting),
                                                  DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting),
                                                  DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting),
                                                  FINAL_SETTING};
@@ -137791,6 +138191,34 @@ unique_ptr<MaterializedQueryResult> Connection::Query(const string &query) {
 	return unique_ptr_cast<QueryResult, MaterializedQueryResult>(move(result));
 }
+DUCKDB_API string Connection::GetSubstrait(const string &query) {
+	vector<Value> params;
+	params.emplace_back(query);
+	auto result = TableFunction("get_substrait", params)->Execute();
+	auto protobuf = result->FetchRaw()->GetValue(0, 0);
+	return protobuf.GetValueUnsafe<string_t>().GetString();
+}
+DUCKDB_API unique_ptr<QueryResult> Connection::FromSubstrait(const string &proto) {
+	vector<Value> params;
+	params.emplace_back(Value::BLOB_RAW(proto));
+	return TableFunction("from_substrait", params)->Execute();
+}
+DUCKDB_API string Connection::GetSubstraitJSON(const string &query) {
+	vector<Value> params;
+	params.emplace_back(query);
+	auto result = TableFunction("get_substrait_json", params)->Execute();
+	auto protobuf = result->FetchRaw()->GetValue(0, 0);
+	return protobuf.GetValueUnsafe<string_t>().GetString();
+}
+DUCKDB_API unique_ptr<QueryResult> Connection::FromSubstraitJSON(const string &json) {
+	vector<Value> params;
+	params.emplace_back(json);
+	return TableFunction("from_substrait_json", params)->Execute();
+}
 unique_ptr<MaterializedQueryResult> Connection::Query(unique_ptr<SQLStatement> statement) {
 	auto result = context->Query(move(statement), false);
 	D_ASSERT(result->type == QueryResultType::MATERIALIZED_RESULT);
@@ -151761,6 +152189,17 @@ Value MaximumMemorySetting::GetSetting(ClientContext &context) {
 	return Value(StringUtil::BytesToHumanReadableString(config.options.maximum_memory));
 }
+//===--------------------------------------------------------------------===//
+// Password Setting
+//===--------------------------------------------------------------------===//
+void PasswordSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
+	// nop
+}
+Value PasswordSetting::GetSetting(ClientContext &context) {
+	return Value();
+}
 //===--------------------------------------------------------------------===//
 // Perfect Hash Threshold
 //===--------------------------------------------------------------------===//
@@ -151927,6 +152366,17 @@ Value ThreadsSetting::GetSetting(ClientContext &context) {
 	return Value::BIGINT(config.options.maximum_threads);
 }
+//===--------------------------------------------------------------------===//
+// Username Setting
+//===--------------------------------------------------------------------===//
+void UsernameSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
+	// nop
+}
+Value UsernameSetting::GetSetting(ClientContext &context) {
+	return Value();
+}
 } // namespace duckdb
@@ -153807,7 +154257,8 @@ bool Deliminator::RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, u
 			}
 			parent_expr =
 			    make_unique<BoundColumnRefExpression>(parent_expr->alias, parent_expr->return_type, it->first);
-			parent_cond.comparison = child_cond.comparison;
+			parent_cond.comparison =
+			    parent_delim_get_side == 0 ? child_cond.comparison : FlipComparisionExpression(child_cond.comparison);
 			break;
 		}
 	}
@@ -154266,6 +154717,9 @@ idx_t FilterCombiner::GetEquivalenceSet(Expression *expr) {
 FilterResult FilterCombiner::AddConstantComparison(vector<ExpressionValueInformation> &info_list,
                                                    ExpressionValueInformation info) {
+	if (info.constant.IsNull()) {
+		return FilterResult::UNSATISFIABLE;
+	}
 	for (idx_t i = 0; i < info_list.size(); i++) {
 		auto comparison = CompareValueInformation(info_list[i], info);
 		switch (comparison) {
@@ -155730,7 +156184,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownJoin(unique_ptr<LogicalOpera
 void FilterPushdown::PushFilters() {
 	for (auto &f : filters) {
 		auto result = combiner.AddFilter(move(f->filter));
-		D_ASSERT(result == FilterResult::SUCCESS);
+		D_ASSERT(result != FilterResult::UNSUPPORTED);
 		(void)result;
 	}
 	filters.clear();
@@ -157927,6 +158381,7 @@ public:
 namespace duckdb {
 Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context), binder(binder), rewriter(context) {
@@ -157963,9 +158418,18 @@ void Optimizer::RunOptimizer(OptimizerType type, const std::function<void()> &ca
 	profiler.StartPhase(OptimizerTypeToString(type));
 	callback();
 	profiler.EndPhase();
+	if (plan) {
+		Verify(*plan);
+	}
+}
+void Optimizer::Verify(LogicalOperator &op) {
+	ColumnBindingResolver::Verify(op);
 }
-unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan) {
+unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan_p) {
+	Verify(*plan_p);
+	this->plan = move(plan_p);
 	// first we perform expression rewrites using the ExpressionRewriter
 	// this does not change the logical plan structure, but only simplifies the expression trees
 	RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); });
@@ -158052,7 +158516,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
 	Planner::VerifyPlan(context, plan);
-	return plan;
+	return move(plan);
 }
 } // namespace duckdb
@@ -158091,7 +158555,8 @@ namespace duckdb {
 unique_ptr<LogicalOperator> FilterPullup::PullupFilter(unique_ptr<LogicalOperator> op) {
 	D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER);
-	if (can_pullup) {
+	auto &filter = (LogicalFilter &)*op;
+	if (can_pullup && filter.projection_map.empty()) {
 		unique_ptr<LogicalOperator> child = move(op->children[0]);
 		child = Rewrite(move(child));
 		// moving filter's expressions
@@ -158398,6 +158863,9 @@ using Filter = FilterPushdown::Filter;
 unique_ptr<LogicalOperator> FilterPushdown::PushdownFilter(unique_ptr<LogicalOperator> op) {
 	D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER);
 	auto &filter = (LogicalFilter &)*op;
+	if (!filter.projection_map.empty()) {
+		return FinishPushdown(move(op));
+	}
 	// filter: gather the filters and remove the filter from the set of operations
 	for (auto &expression : filter.expressions) {
 		if (AddFilter(move(expression)) == FilterResult::UNSATISFIABLE) {
@@ -158659,7 +159127,16 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownLeftJoin(unique_ptr<LogicalO
 	right_pushdown.GenerateFilters();
 	op->children[0] = left_pushdown.Rewrite(move(op->children[0]));
 	op->children[1] = right_pushdown.Rewrite(move(op->children[1]));
-	return FinishPushdown(move(op));
+	if (filters.empty()) {
+		// no filters to push
+		return op;
+	}
+	auto filter = make_unique<LogicalFilter>();
+	for (auto &f : filters) {
+		filter->expressions.push_back(move(f->filter));
+	}
+	filter->children.push_back(move(op));
+	return move(filter);
 }
 } // namespace duckdb
@@ -158701,8 +159178,8 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
 	right_bindings.insert(comp_join.mark_index);
 	FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
-#ifndef NDEBUG
-	bool found_mark_reference = false;
+#ifdef DEBUG
+	bool simplified_mark_join = false;
 #endif
 	// now check the set of filters
 	for (idx_t i = 0; i < filters.size(); i++) {
@@ -158714,15 +159191,16 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
 			filters.erase(filters.begin() + i);
 			i--;
 		} else if (side == JoinSide::RIGHT) {
-			// there can only be at most one filter referencing the marker
-#ifndef NDEBUG
-			D_ASSERT(!found_mark_reference);
-			found_mark_reference = true;
+#ifdef DEBUG
+			D_ASSERT(!simplified_mark_join);
 #endif
 			// this filter references the marker
 			// we can turn this into a SEMI join if the filter is on only the marker
 			if (filters[i]->filter->type == ExpressionType::BOUND_COLUMN_REF) {
 				// filter just references the marker: turn into semi join
+#ifdef DEBUG
+				simplified_mark_join = true;
+#endif
 				join.join_type = JoinType::SEMI;
 				filters.erase(filters.begin() + i);
 				i--;
@@ -158745,6 +159223,9 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
 						}
 					}
 					if (all_null_values_are_equal) {
+#ifdef DEBUG
+						simplified_mark_join = true;
+#endif
 						// all null values are equal, convert to ANTI join
 						join.join_type = JoinType::ANTI;
 						filters.erase(filters.begin() + i);
@@ -159507,7 +159988,8 @@ unique_ptr<Expression> ComparisonSimplificationRule::Apply(LogicalOperator &op,
 		}
 		// Is the constant cast invertible?
-		if (!BoundCastExpression::CastIsInvertible(cast_expression->return_type, target_type)) {
+		if (!cast_constant.IsNull() &&
+		    !BoundCastExpression::CastIsInvertible(cast_expression->return_type, target_type)) {
 			// Is it actually invertible?
 			Value uncast_constant;
 			if (!cast_constant.DefaultTryCastAs(constant_value.type(), uncast_constant, &error_message, true) ||
@@ -188507,13 +188989,14 @@ protected:
 namespace duckdb {
 //! The HAVING binder is responsible for binding an expression within the HAVING clause of a SQL statement
 class HavingBinder : public SelectBinder {
 public:
 	HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info,
-	             case_insensitive_map_t<idx_t> &alias_map);
+	             case_insensitive_map_t<idx_t> &alias_map, AggregateHandling aggregate_handling);
 protected:
 	BindResult BindExpression(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth,
@@ -188523,6 +189006,7 @@ private:
 	BindResult BindColumnRef(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth, bool root_expression);
 	ColumnAliasBinder column_alias_binder;
+	AggregateHandling aggregate_handling;
 };
 } // namespace duckdb
@@ -188653,6 +189137,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
 		delimiter_value = ExpressionExecutor::EvaluateScalar(context, *expr).CastAs(context, type);
 		return nullptr;
 	}
+	if (!new_binder->correlated_columns.empty()) {
+		throw BinderException("Correlated columns not supported in LIMIT/OFFSET");
+	}
 	// move any correlated columns to this binder
 	MoveCorrelatedExpressions(*new_binder);
 	return expr;
@@ -189018,16 +189505,22 @@ unique_ptr<BoundQueryNode> Binder::BindNode(SelectNode &statement) {
 	// bind the HAVING clause, if any
 	if (statement.having) {
-		HavingBinder having_binder(*this, context, *result, info, alias_map);
+		HavingBinder having_binder(*this, context, *result, info, alias_map, statement.aggregate_handling);
 		ExpressionBinder::QualifyColumnNames(*this, statement.having);
 		result->having = having_binder.Bind(statement.having);
 	}
 	// bind the QUALIFY clause, if any
 	if (statement.qualify) {
+		if (statement.aggregate_handling == AggregateHandling::FORCE_AGGREGATES) {
+			throw BinderException("Combining QUALIFY with GROUP BY ALL is not supported yet");
+		}
 		QualifyBinder qualify_binder(*this, context, *result, info, alias_map);
 		ExpressionBinder::QualifyColumnNames(*this, statement.qualify);
 		result->qualify = qualify_binder.Bind(statement.qualify);
+		if (qualify_binder.HasBoundColumns() && qualify_binder.BoundAggregates()) {
+			throw BinderException("Cannot mix aggregates with non-aggregated columns!");
+		}
 	}
 	// after that, we bind to the SELECT list
@@ -190230,7 +190723,7 @@ unique_ptr<Expression> Binder::PlanSubquery(BoundSubqueryExpression &expr, uniqu
 	D_ASSERT(root);
 	// first we translate the QueryNode of the subquery into a logical plan
 	// note that we do not plan nested subqueries yet
-	auto sub_binder = Binder::CreateBinder(context);
+	auto sub_binder = Binder::CreateBinder(context, this);
 	sub_binder->plan_subquery = false;
 	auto subquery_root = sub_binder->CreatePlan(*expr.subquery);
 	D_ASSERT(subquery_root);
@@ -190446,8 +190939,8 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt) {
 	auto function_data =
 	    copy_function->function.copy_from_bind(context, *stmt.info, expected_names, bound_insert.expected_types);
-	auto get = make_unique<LogicalGet>(0, copy_function->function.copy_from_function, move(function_data),
-	                                   bound_insert.expected_types, expected_names);
+	auto get = make_unique<LogicalGet>(GenerateTableIndex(), copy_function->function.copy_from_function,
+	                                   move(function_data), bound_insert.expected_types, expected_names);
 	for (idx_t i = 0; i < bound_insert.expected_types.size(); i++) {
 		get->column_ids.push_back(i);
 	}
@@ -191126,6 +191619,7 @@ protected:
 #include <algorithm>
 namespace duckdb {
@@ -191326,6 +191820,31 @@ void Binder::BindDefaultValues(ColumnList &columns, vector<unique_ptr<Expression
 	}
 }
+static void ExtractExpressionDependencies(Expression &expr, unordered_set<CatalogEntry *> &dependencies) {
+	if (expr.type == ExpressionType::BOUND_FUNCTION) {
+		auto &function = (BoundFunctionExpression &)expr;
+		if (function.function.dependency) {
+			function.function.dependency(function, dependencies);
+		}
+	}
+	ExpressionIterator::EnumerateChildren(
+	    expr, [&](Expression &child) { ExtractExpressionDependencies(child, dependencies); });
+}
+static void ExtractDependencies(BoundCreateTableInfo &info) {
+	for (auto &default_value : info.bound_defaults) {
+		if (default_value) {
+			ExtractExpressionDependencies(*default_value, info.dependencies);
+		}
+	}
+	for (auto &constraint : info.bound_constraints) {
+		if (constraint->type == ConstraintType::CHECK) {
+			auto &bound_check = (BoundCheckConstraint &)*constraint;
+			ExtractExpressionDependencies(*bound_check.expression, info.dependencies);
+		}
+	}
+}
 unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateInfo> info) {
 	auto &base = (CreateTableInfo &)*info;
@@ -191356,6 +191875,8 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
 		// bind the default values
 		BindDefaultValues(base.columns, result->bound_defaults);
 	}
+	// extract dependencies from any default values or CHECK constraints
+	ExtractDependencies(*result);
 	if (base.columns.PhysicalColumnCount() == 0) {
 		throw BinderException("Creating a table without physical (non-generated) columns is not supported");
@@ -191449,7 +191970,8 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
 		unique_ptr<LogicalOperator> child_operator;
 		for (auto &using_clause : stmt.using_clauses) {
 			// bind the using clause
-			auto bound_node = Bind(*using_clause);
+			auto using_binder = Binder::CreateBinder(context, this);
+			auto bound_node = using_binder->Bind(*using_clause);
 			auto op = CreatePlan(*bound_node);
 			if (child_operator) {
 				// already bound a child: create a cross product to unify the two
@@ -191457,6 +191979,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
 			} else {
 				child_operator = move(op);
 			}
+			bind_context.AddContext(move(using_binder->bind_context));
 		}
 		if (child_operator) {
 			root = LogicalCrossProduct::Create(move(root), move(child_operator));
@@ -191475,7 +191998,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
 		root = move(filter);
 	}
 	// create the delete node
-	auto del = make_unique<LogicalDelete>(table);
+	auto del = make_unique<LogicalDelete>(table, GenerateTableIndex());
 	del->AddChild(move(root));
 	// set up the delete expression
@@ -191603,6 +192126,7 @@ BoundStatement Binder::Bind(ExecuteStatement &stmt) {
 		prepared = prepared_planner.PrepareSQLStatement(entry->second->unbound_statement->Copy());
 		rebound_plan = move(prepared_planner.plan);
 		D_ASSERT(prepared->properties.bound_all_parameters);
+		this->bound_tables = prepared_planner.binder->bound_tables;
 	}
 	// copy the properties of the prepared statement into the planner
 	this->properties = prepared->properties;
@@ -191824,7 +192348,7 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
 		CopyStatement copy_stmt;
 		copy_stmt.info = move(info);
-		auto copy_binder = Binder::CreateBinder(context);
+		auto copy_binder = Binder::CreateBinder(context, this);
 		auto bound_statement = copy_binder->Bind(copy_stmt);
 		if (child_operator) {
 			// use UNION ALL to combine the individual copy statements into a single node
@@ -191954,7 +192478,7 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
 		properties.read_only = false;
 	}
-	auto insert = make_unique<LogicalInsert>(table);
+	auto insert = make_unique<LogicalInsert>(table, GenerateTableIndex());
 	// Add CTEs as bindable
 	AddCTEMap(stmt.cte_map);
@@ -192158,6 +192682,7 @@ namespace duckdb {
 BoundStatement Binder::Bind(PrepareStatement &stmt) {
 	Planner prepared_planner(context);
 	auto prepared_data = prepared_planner.PrepareSQLStatement(move(stmt.statement));
+	this->bound_tables = prepared_planner.binder->bound_tables;
 	auto prepare = make_unique<LogicalPrepare>(stmt.name, move(prepared_data), move(prepared_planner.plan));
 	// we can prepare in read-only mode: prepared statements are not written to the catalog
@@ -192284,7 +192809,7 @@ BoundStatement Binder::Bind(AlterStatement &stmt) {
 BoundStatement Binder::Bind(TransactionStatement &stmt) {
 	// transaction statements do not require a valid transaction
-	properties.requires_valid_transaction = false;
+	properties.requires_valid_transaction = stmt.info->type == TransactionType::BEGIN_TRANSACTION;
 	BoundStatement result;
 	result.names = {"Success"};
@@ -192578,6 +193103,13 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log
 			BindExtraColumns(table, get, proj, update, check.bound_columns);
 		}
 	}
+	if (update.return_chunk) {
+		physical_index_set_t all_columns;
+		for (idx_t i = 0; i < table.storage->column_definitions.size(); i++) {
+			all_columns.insert(PhysicalIndex(i));
+		}
+		BindExtraColumns(table, get, proj, update, all_columns);
+	}
 	// for index updates we always turn any update into an insert and a delete
 	// we thus need all the columns to be available, hence we check if the update touches any index columns
 	// If the returning keyword is used, we need access to the whole row in case the user requests it.
@@ -192600,7 +193132,7 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log
 		}
 	}
-	if (update.update_is_del_and_insert || update.return_chunk) {
+	if (update.update_is_del_and_insert) {
 		// the update updates a column required by an index or requires returning the updated rows,
 		// push projections for all columns
 		physical_index_set_t all_columns;
@@ -192711,16 +193243,15 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
 	// set the projection as child of the update node and finalize the result
 	update->AddChild(move(proj));
+	auto update_table_index = GenerateTableIndex();
+	update->table_index = update_table_index;
 	if (!stmt.returning_list.empty()) {
-		auto update_table_index = GenerateTableIndex();
-		update->table_index = update_table_index;
 		unique_ptr<LogicalOperator> update_as_logicaloperator = move(update);
 		return BindReturning(move(stmt.returning_list), table, update_table_index, move(update_as_logicaloperator),
 		                     move(result));
 	}
-	update->table_index = 0;
 	result.names = {"Count"};
 	result.types = {LogicalType::BIGINT};
 	result.plan = move(update);
@@ -193046,6 +193577,9 @@ unique_ptr<BoundTableRef> Binder::Bind(BaseTableRef &ref) {
 		// bind the child subquery
 		view_binder->AddBoundView(view_catalog_entry);
 		auto bound_child = view_binder->Bind(subquery);
+		if (!view_binder->correlated_columns.empty()) {
+			throw BinderException("Contents of view were altered - view bound correlated columns");
+		}
 		D_ASSERT(bound_child->type == TableReferenceType::SUBQUERY);
 		// verify that the types and names match up with the expected types and names
@@ -193557,6 +194091,33 @@ unique_ptr<BoundTableRef> Binder::Bind(SubqueryRef &ref, CommonTableExpressionIn
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/planner/expression_binder/table_function_binder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+namespace duckdb {
+//! The Table function binder can bind standard table function parameters (i.e. non-table-in-out functions)
+class TableFunctionBinder : public ExpressionBinder {
+public:
+	TableFunctionBinder(Binder &binder, ClientContext &context);
+protected:
+	BindResult BindColumnReference(ColumnRefExpression &expr);
+	BindResult BindExpression(unique_ptr<ParsedExpression> *expr, idx_t depth, bool root_expression = false) override;
+	string UnsupportedAggregateMessage() override;
+};
+} // namespace duckdb
@@ -193633,17 +194194,17 @@ bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_functi
 			continue;
 		}
-		ConstantBinder binder(*this, context, "TABLE FUNCTION parameter");
+		TableFunctionBinder binder(*this, context);
 		LogicalType sql_type;
 		auto expr = binder.Bind(child, &sql_type);
 		if (expr->HasParameter()) {
 			throw ParameterNotResolvedException();
 		}
-		if (!expr->IsFoldable()) {
+		if (!expr->IsScalar()) {
 			error = "Table function requires a constant parameter";
 			return false;
 		}
-		auto constant = ExpressionExecutor::EvaluateScalar(context, *expr);
+		auto constant = ExpressionExecutor::EvaluateScalar(context, *expr, true);
 		if (parameter_name.empty()) {
 			// unnamed parameter
 			if (!named_parameters.empty()) {
@@ -196596,8 +197157,9 @@ BindResult GroupBinder::BindColumnRef(ColumnRefExpression &colref) {
 namespace duckdb {
 HavingBinder::HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info,
-                           case_insensitive_map_t<idx_t> &alias_map)
-    : SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map) {
+                           case_insensitive_map_t<idx_t> &alias_map, AggregateHandling aggregate_handling)
+    : SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map),
+      aggregate_handling(aggregate_handling) {
 	target_type = LogicalType(LogicalTypeId::BOOLEAN);
 }
@@ -196607,7 +197169,16 @@ BindResult HavingBinder::BindColumnRef(unique_ptr<ParsedExpression> *expr_ptr, i
 	if (!alias_result.HasError()) {
 		return alias_result;
 	}
+	if (aggregate_handling == AggregateHandling::FORCE_AGGREGATES) {
+		auto expr = duckdb::SelectBinder::BindExpression(expr_ptr, depth);
+		if (expr.HasError()) {
+			return expr;
+		}
+		auto group_ref = make_unique<BoundColumnRefExpression>(
+		    expr.expression->return_type, ColumnBinding(node.group_index, node.groups.group_expressions.size()));
+		node.groups.group_expressions.push_back(move(expr.expression));
+		return BindResult(move(group_ref));
+	}
 	return BindResult(StringUtil::Format(
 	    "column %s must appear in the GROUP BY clause or be used in an aggregate function", expr.ToString()));
 }
@@ -197089,6 +197660,42 @@ BindResult SelectBinder::BindGroup(ParsedExpression &expr, idx_t depth, idx_t gr
 } // namespace duckdb
+namespace duckdb {
+TableFunctionBinder::TableFunctionBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) {
+}
+BindResult TableFunctionBinder::BindColumnReference(ColumnRefExpression &expr) {
+	auto result_name = StringUtil::Join(expr.column_names, ".");
+	return BindResult(make_unique<BoundConstantExpression>(Value(result_name)));
+}
+BindResult TableFunctionBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth,
+                                               bool root_expression) {
+	auto &expr = **expr_ptr;
+	switch (expr.GetExpressionClass()) {
+	case ExpressionClass::COLUMN_REF:
+		return BindColumnReference((ColumnRefExpression &)expr);
+	case ExpressionClass::SUBQUERY:
+		throw BinderException("Table function cannot contain subqueries");
+	case ExpressionClass::DEFAULT:
+		return BindResult("Table function cannot contain DEFAULT clause");
+	case ExpressionClass::WINDOW:
+		return BindResult("Table function cannot contain window functions!");
+	default:
+		return ExpressionBinder::BindExpression(expr_ptr, depth);
+	}
+}
+string TableFunctionBinder::UnsupportedAggregateMessage() {
+	return "Table function cannot contain aggregates!";
+}
+} // namespace duckdb
 namespace duckdb {
 UpdateBinder::UpdateBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) {
@@ -198023,6 +198630,7 @@ JoinSide JoinSide::GetJoinSide(const unordered_set<idx_t> &bindings, unordered_s
 namespace duckdb {
 const uint64_t PLAN_SERIALIZATION_VERSION = 1;
@@ -198354,7 +198962,8 @@ unique_ptr<LogicalOperator> LogicalOperator::Deserialize(Deserializer &deseriali
 		result = LogicalSimple::Deserialize(state, reader);
 		break;
 	case LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR:
-		throw SerializationException("Invalid type for operator deserialization");
+		result = LogicalExtensionOperator::Deserialize(state, reader);
+		break;
 	case LogicalOperatorType::LOGICAL_INVALID:
 		/* no default here to trigger a warning if we forget to implement deserialize for a new operator */
 		throw SerializationException("Invalid type for operator deserialization");
@@ -198366,6 +198975,10 @@ unique_ptr<LogicalOperator> LogicalOperator::Deserialize(Deserializer &deseriali
 	return result;
 }
+vector<idx_t> LogicalOperator::GetTableIndex() const {
+	return vector<idx_t> {};
+}
 unique_ptr<LogicalOperator> LogicalOperator::Copy(ClientContext &context) const {
 	BufferedSerializer logical_op_serializer;
 	try {
@@ -198757,6 +199370,14 @@ idx_t LogicalAggregate::EstimateCardinality(ClientContext &context) {
 	return LogicalOperator::EstimateCardinality(context);
 }
+vector<idx_t> LogicalAggregate::GetTableIndex() const {
+	vector<idx_t> result {group_index, aggregate_index};
+	if (groupings_index != DConstants::INVALID_INDEX) {
+		result.push_back(groupings_index);
+	}
+	return result;
+}
 } // namespace duckdb
@@ -198823,6 +199444,10 @@ unique_ptr<LogicalOperator> LogicalColumnDataGet::Deserialize(LogicalDeserializa
 	return make_unique<LogicalColumnDataGet>(table_index, move(chunk_types), move(collection));
 }
+vector<idx_t> LogicalColumnDataGet::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199087,6 +199712,10 @@ unique_ptr<LogicalOperator> LogicalCTERef::Deserialize(LogicalDeserializationSta
 	return make_unique<LogicalCTERef>(table_index, cte_index, chunk_types, bound_columns);
 }
+vector<idx_t> LogicalCTERef::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199107,8 +199736,8 @@ unique_ptr<LogicalOperator> LogicalDelete::Deserialize(LogicalDeserializationSta
 	TableCatalogEntry *table_catalog_entry = catalog.GetEntry<TableCatalogEntry>(context, info->schema, info->table);
-	auto result = make_unique<LogicalDelete>(table_catalog_entry);
-	result->table_index = reader.ReadRequired<idx_t>();
+	auto table_index = reader.ReadRequired<idx_t>();
+	auto result = make_unique<LogicalDelete>(table_catalog_entry, table_index);
 	result->return_chunk = reader.ReadRequired<bool>();
 	return move(result);
 }
@@ -199117,6 +199746,10 @@ idx_t LogicalDelete::EstimateCardinality(ClientContext &context) {
 	return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1;
 }
+vector<idx_t> LogicalDelete::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199134,6 +199767,10 @@ unique_ptr<LogicalOperator> LogicalDelimGet::Deserialize(LogicalDeserializationS
 	return make_unique<LogicalDelimGet>(table_index, chunk_types);
 }
+vector<idx_t> LogicalDelimGet::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199201,6 +199838,10 @@ unique_ptr<LogicalOperator> LogicalDummyScan::Deserialize(LogicalDeserialization
 	return make_unique<LogicalDummyScan>(table_index);
 }
+vector<idx_t> LogicalDummyScan::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199309,6 +199950,28 @@ unique_ptr<LogicalOperator> LogicalExpressionGet::Deserialize(LogicalDeserializa
 	return make_unique<LogicalExpressionGet>(table_index, expr_types, move(expressions));
 }
+vector<idx_t> LogicalExpressionGet::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
+} // namespace duckdb
+namespace duckdb {
+unique_ptr<LogicalExtensionOperator> LogicalExtensionOperator::Deserialize(LogicalDeserializationState &state,
+                                                                           FieldReader &reader) {
+	auto &config = DBConfig::GetConfig(state.gstate.context);
+	auto extension_name = reader.ReadRequired<std::string>();
+	for (auto &extension : config.operator_extensions) {
+		if (extension->GetName() == extension_name) {
+			return extension->Deserialize(state, reader);
+		}
+	}
+	throw SerializationException("No serialization method exists for extension: " + extension_name);
+}
 } // namespace duckdb
@@ -199546,6 +200209,10 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
 	return move(result);
 }
+vector<idx_t> LogicalGet::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199591,10 +200258,9 @@ unique_ptr<LogicalOperator> LogicalInsert::Deserialize(LogicalDeserializationSta
 		throw InternalException("Cant find catalog entry for table %s", info->table);
 	}
-	auto result = make_unique<LogicalInsert>(table_catalog_entry);
+	auto result = make_unique<LogicalInsert>(table_catalog_entry, table_index);
 	result->type = state.type;
 	result->table = table_catalog_entry;
-	result->table_index = table_index;
 	result->return_chunk = return_chunk;
 	result->insert_values = move(insert_values);
 	result->column_index_map = column_index_map;
@@ -199607,6 +200273,10 @@ idx_t LogicalInsert::EstimateCardinality(ClientContext &context) {
 	return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1;
 }
+vector<idx_t> LogicalInsert::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199843,6 +200513,10 @@ unique_ptr<LogicalOperator> LogicalProjection::Deserialize(LogicalDeserializatio
 	return make_unique<LogicalProjection>(table_index, move(expressions));
 }
+vector<idx_t> LogicalProjection::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199863,6 +200537,10 @@ unique_ptr<LogicalOperator> LogicalRecursiveCTE::Deserialize(LogicalDeserializat
 	return unique_ptr<LogicalRecursiveCTE>(new LogicalRecursiveCTE(table_index, column_count, union_all, state.type));
 }
+vector<idx_t> LogicalRecursiveCTE::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -199881,7 +200559,12 @@ vector<ColumnBinding> LogicalSample::GetColumnBindings() {
 idx_t LogicalSample::EstimateCardinality(ClientContext &context) {
 	auto child_cardinality = children[0]->EstimateCardinality(context);
 	if (sample_options->is_percentage) {
-		return idx_t(child_cardinality * sample_options->sample_size.GetValue<double>());
+		double sample_cardinality =
+		    double(child_cardinality) * (sample_options->sample_size.GetValue<double>() / 100.0);
+		if (sample_cardinality > double(child_cardinality)) {
+			return child_cardinality;
+		}
+		return idx_t(sample_cardinality);
 	} else {
 		auto sample_size = sample_options->sample_size.GetValue<uint64_t>();
 		if (sample_size < child_cardinality) {
@@ -199945,6 +200628,11 @@ unique_ptr<LogicalOperator> LogicalSetOperation::Deserialize(LogicalDeserializat
 	// TODO(stephwang): review if unique_ptr<LogicalOperator> plan is needed
 	return unique_ptr<LogicalSetOperation>(new LogicalSetOperation(table_index, column_count, state.type));
 }
+vector<idx_t> LogicalSetOperation::GetTableIndex() const {
+	return vector<idx_t> {table_index};
+}
 } // namespace duckdb
@@ -200043,6 +200731,11 @@ unique_ptr<LogicalOperator> LogicalUnnest::Deserialize(LogicalDeserializationSta
 	result->expressions = move(expressions);
 	return move(result);
 }
+vector<idx_t> LogicalUnnest::GetTableIndex() const {
+	return vector<idx_t> {unnest_index};
+}
 } // namespace duckdb
@@ -200117,6 +200810,10 @@ unique_ptr<LogicalOperator> LogicalWindow::Deserialize(LogicalDeserializationSta
 	return move(result);
 }
+vector<idx_t> LogicalWindow::GetTableIndex() const {
+	return vector<idx_t> {window_index};
+}
 } // namespace duckdb
@@ -200249,7 +200946,7 @@ void Planner::CreatePlan(SQLStatement &statement) {
 		this->plan = nullptr;
 		for (auto &extension_op : config.operator_extensions) {
 			auto bound_statement =
-			    extension_op.Bind(context, *this->binder, extension_op.operator_info.get(), statement);
+			    extension_op->Bind(context, *this->binder, extension_op->operator_info.get(), statement);
 			if (bound_statement.plan != nullptr) {
 				this->names = bound_statement.names;
 				this->types = bound_statement.types;
@@ -200648,10 +201345,13 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 		// we reached a node without correlated expressions
 		// we can eliminate the dependent join now and create a simple cross product
 		// now create the duplicate eliminated scan for this node
+		auto left_columns = plan->GetColumnBindings().size();
 		auto delim_index = binder.GenerateTableIndex();
 		this->base_binding = ColumnBinding(delim_index, 0);
+		this->delim_offset = 0;
+		this->data_offset = left_columns;
 		auto delim_scan = make_unique<LogicalDelimGet>(delim_index, delim_types);
-		return LogicalCrossProduct::Create(move(delim_scan), move(plan));
+		return LogicalCrossProduct::Create(move(plan), move(delim_scan));
 	}
 	switch (plan->type) {
 	case LogicalOperatorType::LOGICAL_UNNEST:
@@ -201015,8 +201715,19 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 	case LogicalOperatorType::LOGICAL_UNION: {
 		auto &setop = (LogicalSetOperation &)*plan;
 		// set operator, push into both children
+#ifdef DEBUG
+		plan->children[0]->ResolveOperatorTypes();
+		plan->children[1]->ResolveOperatorTypes();
+		D_ASSERT(plan->children[0]->types == plan->children[1]->types);
+#endif
 		plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
 		plan->children[1] = PushDownDependentJoin(move(plan->children[1]));
+#ifdef DEBUG
+		D_ASSERT(plan->children[0]->GetColumnBindings().size() == plan->children[1]->GetColumnBindings().size());
+		plan->children[0]->ResolveOperatorTypes();
+		plan->children[1]->ResolveOperatorTypes();
+		D_ASSERT(plan->children[0]->types == plan->children[1]->types);
+#endif
 		// we have to refer to the setop index now
 		base_binding.table_index = setop.table_index;
 		base_binding.column_index = setop.column_count;
@@ -201924,6 +202635,7 @@ BlockHandle::~BlockHandle() {
 	} else {
 		D_ASSERT(memory_charge.size == 0);
 	}
+	buffer_manager.PurgeQueue();
 	block_manager.UnregisterBlock(block_id, can_destroy);
 }
@@ -201950,7 +202662,7 @@ unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_
                                                              FileBufferType type) {
 	if (source) {
 		auto tmp = move(source);
-		D_ASSERT(tmp->size == size);
+		D_ASSERT(tmp->AllocSize() == BufferManager::GetAllocSize(size));
 		return make_unique<FileBuffer>(*tmp, type);
 	} else {
 		// no re-usable buffer: allocate a new buffer
@@ -202085,7 +202797,7 @@ void BufferManager::SetTemporaryDirectory(string new_dir) {
 BufferManager::BufferManager(DatabaseInstance &db, string tmp, idx_t maximum_memory)
     : db(db), current_memory(0), maximum_memory(maximum_memory), temp_directory(move(tmp)),
-      queue(make_unique<EvictionQueue>()), temporary_id(MAXIMUM_BLOCK),
+      queue(make_unique<EvictionQueue>()), temporary_id(MAXIMUM_BLOCK), queue_insertions(0),
       buffer_allocator(BufferAllocatorAllocate, BufferAllocatorFree, BufferAllocatorRealloc,
                        make_unique<BufferAllocatorData>(*this)) {
 	temp_block_manager = make_unique<InMemoryBlockManager>(*this);
@@ -202161,6 +202873,7 @@ TempBufferPoolReservation BufferManager::EvictBlocksOrThrow(idx_t memory_delta,
 }
 shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
+	D_ASSERT(block_size < Storage::BLOCK_SIZE);
 	auto res = EvictBlocksOrThrow(block_size, maximum_memory, nullptr,
 	                              "could not allocate block of %lld bytes (%lld/%lld used) %s", block_size,
 	                              GetUsedMemory(), GetMaxMemory());
@@ -202173,7 +202886,7 @@ shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
 shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) {
 	D_ASSERT(block_size >= Storage::BLOCK_SIZE);
-	auto alloc_size = AlignValue<idx_t, Storage::SECTOR_SIZE>(block_size + Storage::BLOCK_HEADER_SIZE);
+	auto alloc_size = GetAllocSize(block_size);
 	// first evict blocks until we have enough memory to store this buffer
 	unique_ptr<FileBuffer> reusable_buffer;
 	auto res = EvictBlocksOrThrow(alloc_size, maximum_memory, &reusable_buffer,
@@ -202187,9 +202900,11 @@ shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can
 	                                move(res));
 }
-BufferHandle BufferManager::Allocate(idx_t block_size) {
-	auto block = RegisterMemory(block_size, true);
-	return Pin(block);
+BufferHandle BufferManager::Allocate(idx_t block_size, bool can_destroy, shared_ptr<BlockHandle> *block) {
+	shared_ptr<BlockHandle> local_block;
+	auto block_ptr = block ? block : &local_block;
+	*block_ptr = RegisterMemory(block_size, can_destroy);
+	return Pin(*block_ptr);
 }
 void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) {
@@ -202219,6 +202934,7 @@ void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size
 	// resize and adjust current memory
 	handle->buffer->Resize(block_size);
 	handle->memory_usage += memory_delta;
+	D_ASSERT(handle->memory_usage == handle->buffer->AllocSize());
 }
 BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
@@ -202259,6 +202975,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
 		handle->memory_usage += delta;
 		handle->memory_charge.Resize(current_memory, handle->memory_usage);
 	}
+	D_ASSERT(handle->memory_usage == handle->buffer->AllocSize());
 	return buf;
 }
@@ -202551,7 +203268,9 @@ private:
 			// as a result we can truncate the file
 			auto max_index = index_manager.GetMaxIndex();
 			auto &fs = FileSystem::GetFileSystem(db);
+#ifndef WIN32 // this ended up causing issues when sorting
 			fs.Truncate(*handle, GetPositionInFile(max_index + 1));
+#endif
 		}
 	}
@@ -202827,6 +203546,9 @@ void BufferManager::BufferAllocatorFree(PrivateAllocatorData *private_data, data
 data_ptr_t BufferManager::BufferAllocatorRealloc(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size,
                                                  idx_t size) {
+	if (old_size == size) {
+		return pointer;
+	}
 	auto &data = (BufferAllocatorData &)*private_data;
 	BufferPoolReservation r;
 	r.size = old_size;
@@ -202840,6 +203562,10 @@ Allocator &BufferAllocator::Get(ClientContext &context) {
 	return manager.GetBufferAllocator();
 }
+Allocator &BufferAllocator::Get(DatabaseInstance &db) {
+	return BufferManager::GetBufferManager(db).GetBufferAllocator();
+}
 Allocator &BufferManager::GetBufferAllocator() {
 	return buffer_allocator;
 }
@@ -208409,11 +209135,15 @@ public:
 				new_string = !LookupString(data[idx]);
 			}
-			bool fits = HasEnoughSpace(new_string, string_size);
+			bool fits = CalculateSpaceRequirements(new_string, string_size);
 			if (!fits) {
 				Flush();
 				new_string = true;
-				D_ASSERT(HasEnoughSpace(new_string, string_size));
+				fits = CalculateSpaceRequirements(new_string, string_size);
+				if (!fits) {
+					throw InternalException("Dictionary compression could not write to new segment");
+				}
 			}
 			if (!row_is_valid) {
@@ -208441,8 +209171,8 @@ protected:
 	virtual void AddNewString(string_t str) = 0;
 	// Add a null value to the compression state
 	virtual void AddNull() = 0;
-	// Check if we have enough space to add a string
-	virtual bool HasEnoughSpace(bool new_string, size_t string_size) = 0;
+	// Needs to be called before adding a value. Will return false if a flush is required first.
+	virtual bool CalculateSpaceRequirements(bool new_string, size_t string_size) = 0;
 	// Flush the segment to disk if compressing or reset the counters if analyzing
 	virtual void Flush(bool final = false) = 0;
 };
@@ -208499,7 +209229,8 @@ struct DictionaryCompressionStorage {
 // scanning the whole dictionary at once and then scanning the selection buffer for each emitted vector. Secondly, it
 // allows for efficient bitpacking compression as the selection values should remain relatively small.
 struct DictionaryCompressionCompressState : public DictionaryCompressionState {
-	explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer) : checkpointer(checkpointer) {
+	explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer)
+	    : checkpointer(checkpointer), heap(BufferAllocator::Get(checkpointer.GetDatabase())) {
 		auto &db = checkpointer.GetDatabase();
 		auto &config = DBConfig::GetConfig(db);
 		function = config.GetCompressionFunction(CompressionType::COMPRESSION_DICTIONARY, PhysicalType::VARCHAR);
@@ -208605,7 +209336,7 @@ public:
 		current_segment->count++;
 	}
-	bool HasEnoughSpace(bool new_string, size_t string_size) override {
+	bool CalculateSpaceRequirements(bool new_string, size_t string_size) override {
 		if (new_string) {
 			next_width = BitpackingPrimitives::MinimumBitWidth(index_buffer.size() - 1 + new_string);
 			return DictionaryCompressionStorage::HasEnoughSpace(current_segment->count.load() + 1,
@@ -208726,7 +209457,7 @@ struct DictionaryAnalyzeState : public DictionaryCompressionState {
 		current_tuple_count++;
 	}
-	bool HasEnoughSpace(bool new_string, size_t string_size) override {
+	bool CalculateSpaceRequirements(bool new_string, size_t string_size) override {
 		if (new_string) {
 			next_width =
 			    BitpackingPrimitives::MinimumBitWidth(current_unique_count + 2); // 1 for null, one for new string
@@ -211977,8 +212708,7 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string
 		new_block->offset = 0;
 		new_block->size = alloc_size;
 		// allocate an in-memory buffer for it
-		block = buffer_manager.RegisterMemory(alloc_size, false);
-		handle = buffer_manager.Pin(block);
+		handle = buffer_manager.Allocate(alloc_size, false, &block);
 		state.overflow_blocks[block->BlockId()] = new_block.get();
 		new_block->block = move(block);
 		new_block->next = move(state.head);
@@ -213413,7 +214143,12 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
 	bool append_failed = false;
 	// now append the entries to the indices
 	indexes.Scan([&](Index &index) {
-		if (!index.Append(chunk, row_identifiers)) {
+		try {
+			if (!index.Append(chunk, row_identifiers)) {
+				append_failed = true;
+				return true;
+			}
+		} catch (...) {
 			append_failed = true;
 			return true;
 		}
@@ -213427,7 +214162,6 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
 		for (auto *index : already_appended) {
 			index->Delete(chunk, row_identifiers);
 		}
 		return false;
 	}
 	return true;
@@ -214070,12 +214804,21 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta
 		                                       append_state.current_row);
 	}
 	if (constraint_violated) {
+		PreservedError error;
 		// need to revert the append
 		row_t current_row = append_state.row_start;
 		// remove the data from the indexes, if there are any indexes
 		row_groups->Scan(transaction, [&](DataChunk &chunk) -> bool {
 			// append this chunk to the indexes of the table
-			table->RemoveFromIndexes(append_state, chunk, current_row);
+			try {
+				table->RemoveFromIndexes(append_state, chunk, current_row);
+			} catch (Exception &ex) {
+				error = PreservedError(ex);
+				return false;
+			} catch (std::exception &ex) {
+				error = PreservedError(ex);
+				return false;
+			}
 			current_row += chunk.size();
 			if (current_row >= append_state.current_row) {
@@ -214087,6 +214830,9 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta
 		if (append_to_table) {
 			table->RevertAppendInternal(append_state.row_start, append_count);
 		}
+		if (error) {
+			error.Throw();
+		}
 		throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
 	}
 }
@@ -214218,7 +214964,7 @@ void LocalStorage::InitializeAppend(LocalAppendState &state, DataTable *table) {
 void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
 	// append to unique indices (if any)
 	auto storage = state.storage;
-	idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows();
+	idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows() + state.append_state.total_append_count;
 	if (!DataTable::AppendToIndexes(storage->indexes, chunk, base_id)) {
 		throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
 	}
@@ -215000,6 +215746,7 @@ block_id_t SingleFileBlockManager::GetFreeBlockId() {
 void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
 	lock_guard<mutex> lock(block_lock);
 	D_ASSERT(block_id >= 0);
+	D_ASSERT(block_id < max_block);
 	D_ASSERT(free_list.find(block_id) == free_list.end());
 	multi_use_blocks.erase(block_id);
 	free_list.insert(block_id);
@@ -215008,6 +215755,7 @@ void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
 void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
 	lock_guard<mutex> lock(block_lock);
 	D_ASSERT(block_id >= 0);
+	D_ASSERT(block_id < max_block);
 	// check if the block is a multi-use block
 	auto entry = multi_use_blocks.find(block_id);
@@ -215030,6 +215778,8 @@ void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
 void SingleFileBlockManager::IncreaseBlockReferenceCount(block_id_t block_id) {
 	lock_guard<mutex> lock(block_lock);
+	D_ASSERT(block_id >= 0);
+	D_ASSERT(block_id < max_block);
 	D_ASSERT(free_list.find(block_id) == free_list.end());
 	auto entry = multi_use_blocks.find(block_id);
 	if (entry != multi_use_blocks.end()) {
@@ -218111,7 +218861,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
 		block = block_manager.RegisterBlock(block_id);
 	}
 	auto segment_size = Storage::BLOCK_SIZE;
-	return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::PERSISTENT, start, count, function,
+	return make_unique<ColumnSegment>(db, move(block), type, ColumnSegmentType::PERSISTENT, start, count, function,
 	                                  move(statistics), block_id, offset, segment_size);
 }
@@ -218125,9 +218875,9 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance
 	if (segment_size < Storage::BLOCK_SIZE) {
 		block = buffer_manager.RegisterSmallMemory(segment_size);
 	} else {
-		block = buffer_manager.RegisterMemory(segment_size, false);
+		buffer_manager.Allocate(segment_size, false, &block);
 	}
-	return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
+	return make_unique<ColumnSegment>(db, move(block), type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
 	                                  INVALID_BLOCK, 0, segment_size);
 }
@@ -218208,9 +218958,9 @@ void ColumnSegment::Resize(idx_t new_size) {
 	D_ASSERT(new_size > this->segment_size);
 	D_ASSERT(offset == 0);
 	auto &buffer_manager = BufferManager::GetBufferManager(db);
-	auto new_block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false);
 	auto old_handle = buffer_manager.Pin(block);
-	auto new_handle = buffer_manager.Pin(new_block);
+	shared_ptr<BlockHandle> new_block;
+	auto new_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block);
 	memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size);
 	this->block_id = new_block->BlockId();
 	this->block = move(new_block);
@@ -221658,7 +222408,8 @@ static UpdateSegment::rollback_update_function_t GetRollbackUpdateFunction(Physi
 static UpdateSegment::statistics_update_function_t GetStatisticsUpdateFunction(PhysicalType type);
 static UpdateSegment::fetch_row_function_t GetFetchRowFunction(PhysicalType type);
-UpdateSegment::UpdateSegment(ColumnData &column_data) : column_data(column_data), stats(column_data.type) {
+UpdateSegment::UpdateSegment(ColumnData &column_data)
+    : column_data(column_data), stats(column_data.type), heap(BufferAllocator::Get(column_data.GetDatabase())) {
 	auto physical_type = column_data.type.InternalType();
 	this->type_size = GetTypeIdSize(physical_type);
@@ -223877,7 +224628,10 @@ void CleanupState::Flush() {
 	Vector row_identifiers(LogicalType::ROW_TYPE, (data_ptr_t)row_numbers);
 	// delete the tuples from all the indexes
-	current_table->RemoveFromIndexes(row_identifiers, count);
+	try {
+		current_table->RemoveFromIndexes(row_identifiers, count);
+	} catch (...) {
+	}
 	count = 0;
 }
@@ -228169,626 +228923,628 @@ size_t duckdb_fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* sy
 Symbol concat(Symbol a, Symbol b) {
-   Symbol s;
-   u32 length = a.length()+b.length();
-   if (length > Symbol::maxLength) length = Symbol::maxLength;
-   s.set_code_len(FSST_CODE_MASK, length);
-   s.val.num = (b.val.num << (8*a.length())) | a.val.num;
-   return s;
+	Symbol s;
+	u32 length = a.length()+b.length();
+	if (length > Symbol::maxLength) length = Symbol::maxLength;
+	s.set_code_len(FSST_CODE_MASK, length);
+	s.val.num = (b.val.num << (8*a.length())) | a.val.num;
+	return s;
 }
 namespace std {
 template <>
 class hash<QSymbol> {
-   public:
-   size_t operator()(const QSymbol& q) const {
-      uint64_t k = q.symbol.val.num;
-      const uint64_t m = 0xc6a4a7935bd1e995;
-      const int r = 47;
-      uint64_t h = 0x8445d61a4e774912 ^ (8*m);
-      k *= m;
-      k ^= k >> r;
-      k *= m;
-      h ^= k;
-      h *= m;
-      h ^= h >> r;
-      h *= m;
-      h ^= h >> r;
-      return h;
-   }
+public:
+	size_t operator()(const QSymbol& q) const {
+		uint64_t k = q.symbol.val.num;
+		const uint64_t m = 0xc6a4a7935bd1e995;
+		const int r = 47;
+		uint64_t h = 0x8445d61a4e774912 ^ (8*m);
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+		h ^= k;
+		h *= m;
+		h ^= h >> r;
+		h *= m;
+		h ^= h >> r;
+		return h;
+	}
 };
 }
 bool isEscapeCode(u16 pos) { return pos < FSST_CODE_BASE; }
 std::ostream& operator<<(std::ostream& out, const Symbol& s) {
-   for (u32 i=0; i<s.length(); i++)
-      out << s.val.str[i];
-   return out;
+	for (u32 i=0; i<s.length(); i++)
+		out << s.val.str[i];
+	return out;
 }
-//static u64 iter = 0;
 SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[], bool zeroTerminated=false) {
-   SymbolTable *st = new SymbolTable(), *bestTable = new SymbolTable();
-   int bestGain = (int) -FSST_SAMPLEMAXSZ; // worst case (everything exception)
-   size_t sampleFrac = 128;
-   // start by determining the terminator. We use the (lowest) most infrequent byte as terminator
-   st->zeroTerminated = zeroTerminated;
-   if (zeroTerminated) {
-      st->terminator = 0; // except in case of zeroTerminated mode, then byte 0 is terminator regardless frequency
-   } else {
-      u16 byteHisto[256];
-      memset(byteHisto, 0, sizeof(byteHisto));
-      for(size_t i=0; i<line.size(); i++) {
-         u8* cur = line[i];
-         u8* end = cur + len[i];
-         while(cur < end) byteHisto[*cur++]++;
-      }
-      u32 minSize = FSST_SAMPLEMAXSZ, i = st->terminator = 256;
-      while(i-- > 0) {
-         if (byteHisto[i] > minSize) continue;
-         st->terminator = i;
-         minSize = byteHisto[i];
-      }
-   }
-   assert(st->terminator != 256);
+	SymbolTable *st = new SymbolTable(), *bestTable = new SymbolTable();
+	int bestGain = (int) -FSST_SAMPLEMAXSZ; // worst case (everything exception)
+	size_t sampleFrac = 128;
+	// start by determining the terminator. We use the (lowest) most infrequent byte as terminator
+	st->zeroTerminated = zeroTerminated;
+	if (zeroTerminated) {
+		st->terminator = 0; // except in case of zeroTerminated mode, then byte 0 is terminator regardless frequency
+	} else {
+		u16 byteHisto[256];
+		memset(byteHisto, 0, sizeof(byteHisto));
+		for(size_t i=0; i<line.size(); i++) {
+			u8* cur = line[i];
+			u8* end = cur + len[i];
+			while(cur < end) byteHisto[*cur++]++;
+		}
+		u32 minSize = FSST_SAMPLEMAXSZ, i = st->terminator = 256;
+		while(i-- > 0) {
+			if (byteHisto[i] > minSize) continue;
+			st->terminator = i;
+			minSize = byteHisto[i];
+		}
+	}
+	assert(st->terminator != 256);
+	// a random number between 0 and 128
+	auto rnd128 = [&](size_t i) { return 1 + (FSST_HASH((i+1UL)*sampleFrac)&127); };
+	// compress sample, and compute (pair-)frequencies
+	auto compressCount = [&](SymbolTable *st, Counters &counters) { // returns gain
+		int gain = 0;
+		for(size_t i=0; i<line.size(); i++) {
+			u8* cur = line[i];
+			u8* end = cur + len[i];
+			if (sampleFrac < 128) {
+				// in earlier rounds (sampleFrac < 128) we skip data in the sample (reduces overall work ~2x)
+				if (rnd128(i) > sampleFrac) continue;
+			}
+			if (cur < end) {
+				u8* start = cur;
+				u16 code2 = 255, code1 = st->findLongestSymbol(cur, end);
+				cur += st->symbols[code1].length();
+				gain += (int) (st->symbols[code1].length()-(1+isEscapeCode(code1)));
+				while (true) {
+					// count single symbol (i.e. an option is not extending it)
+					counters.count1Inc(code1);
-   // a random number between 0 and 128
-   auto rnd128 = [&](size_t i) { return 1 + (FSST_HASH((i+1UL)*sampleFrac)&127); };
+					// as an alternative, consider just using the next byte..
+					if (st->symbols[code1].length() != 1) // .. but do not count single byte symbols doubly
+						counters.count1Inc(*start);
-   // compress sample, and compute (pair-)frequencies
-   auto compressCount = [&](SymbolTable *st, Counters &counters) { // returns gain
-      int gain = 0;
+					if (cur==end) {
+						break;
+					}
-      for(size_t i=0; i<line.size(); i++) {
-         u8* cur = line[i];
-         u8* end = cur + len[i];
+					// now match a new symbol
+					start = cur;
+					if (cur<end-7) {
+						u64 word = fsst_unaligned_load(cur);
+						size_t code = word & 0xFFFFFF;
+						size_t idx = FSST_HASH(code)&(st->hashTabSize-1);
+						Symbol s = st->hashTab[idx];
+						code2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
+						word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
+						if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
+							code2 = s.code();
+							cur += s.length();
+						} else if (code2 >= FSST_CODE_BASE) {
+							cur += 2;
+						} else {
+							code2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
+							cur += 1;
+						}
+					} else {
+						code2 = st->findLongestSymbol(cur, end);
+						cur += st->symbols[code2].length();
+					}
-         if (sampleFrac < 128) {
-            // in earlier rounds (sampleFrac < 128) we skip data in the sample (reduces overall work ~2x)
-            if (rnd128(i) > sampleFrac) continue;
-         }
-         if (cur < end) {
-            u16 pos2 = 255, pos1 = st->findLongestSymbol(cur, end);
-            cur += st->symbols[pos1].length();
-            gain += (int) (st->symbols[pos1].length()-(1+isEscapeCode(pos1)));
-            while (true) {
-	       u8* old = cur;
-               counters.count1Inc(pos1);
-               // count single symbol (i.e. an option is not extending it)
-			   if (cur>=end)
-				   break;
-               if (st->symbols[pos1].length() != 1)
-                  counters.count1Inc(*cur);
-               if (cur<end-7) {
-                  u64 word = fsst_unaligned_load(cur);
-                  size_t pos = word & 0xFFFFFF;
-                  size_t idx = FSST_HASH(pos)&(st->hashTabSize-1);
-                  Symbol s = st->hashTab[idx];
-                  pos2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
-                  word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
-                  if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
-                     pos2 = s.code();
-		     cur += s.length();
-                  } else if (pos2 >= FSST_CODE_BASE) {
-                     cur += 2;
-                  } else {
-                     pos2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
-                     cur += 1;
-                  }
-               } else {
-                  assert(cur<end);
-                  pos2 = st->findLongestSymbol(cur, end);
-                  cur += st->symbols[pos2].length();
-               }
-               // compute compressed output size
-               gain += ((int) (cur-old))-(1+isEscapeCode(pos2));
-               // now count the subsequent two symbols we encode as an extension possibility
-               if (sampleFrac < 128) { // no need to count pairs in final round
-                  counters.count2Inc(pos1, pos2);
-                  if ((cur-old) > 1)  // do not count escaped bytes doubly
-                     counters.count2Inc(pos1, *old);
-               }
-               pos1 = pos2;
-            }
-         }
-      }
-      return gain;
-   };
+					// compute compressed output size
+					gain += ((int) (cur-start))-(1+isEscapeCode(code2));
-   auto makeTable = [&](SymbolTable *st, Counters &counters) {
-      // hashmap of c (needed because we can generate duplicate candidates)
-      unordered_set<QSymbol> cands;
-      // artificially make terminater the most frequent symbol so it gets included
-      u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator;
-      counters.count1Set(terminator,65535);
-      auto addOrInc = [&](unordered_set<QSymbol> &cands, Symbol s, u64 count) {
-         if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!!
-         QSymbol q;
-         q.symbol = s;
-         q.gain = count * s.length();
-         auto it = cands.find(q);
-         if (it != cands.end()) {
-            q.gain += (*it).gain;
-            cands.erase(*it);
-         }
-         cands.insert(q);
-      };
+					// now count the subsequent two symbols we encode as an extension codesibility
+					if (sampleFrac < 128) { // no need to count pairs in final round
+						                    // consider the symbol that is the concatenation of the two last symbols
+						counters.count2Inc(code1, code2);
-      // add candidate symbols based on counted frequency
-      for (u32 pos1=0; pos1<FSST_CODE_BASE+(size_t) st->nSymbols; pos1++) {
-         u32 cnt1 = counters.count1GetNext(pos1); // may advance pos1!!
-         if (!cnt1) continue;
+						// as an alternative, consider just extending with the next byte..
+						if ((cur-start) > 1)  // ..but do not count single byte extensions doubly
+							counters.count2Inc(code1, *start);
+					}
+					code1 = code2;
+				}
+			}
+		}
+		return gain;
+	};
-         // heuristic: promoting single-byte symbols (*8) helps reduce exception rates and increases [de]compression speed
-         Symbol s1 = st->symbols[pos1];
-         addOrInc(cands, s1, ((s1.length()==1)?8LL:1LL)*cnt1);
+	auto makeTable = [&](SymbolTable *st, Counters &counters) {
+		// hashmap of c (needed because we can generate duplicate candidates)
+		unordered_set<QSymbol> cands;
+		// artificially make terminater the most frequent symbol so it gets included
+		u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator;
+		counters.count1Set(terminator,65535);
+		auto addOrInc = [&](unordered_set<QSymbol> &cands, Symbol s, u64 count) {
+			if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!!
+			QSymbol q;
+			q.symbol = s;
+			q.gain = count * s.length();
+			auto it = cands.find(q);
+			if (it != cands.end()) {
+				q.gain += (*it).gain;
+				cands.erase(*it);
+			}
+			cands.insert(q);
+		};
-         if (sampleFrac >= 128 || // last round we do not create new (combined) symbols
-             s1.length() == Symbol::maxLength || // symbol cannot be extended
-             s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte
-            continue;
-         }
-         for (u32 pos2=0; pos2<FSST_CODE_BASE+(size_t)st->nSymbols; pos2++) {
-            u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!!
-            if (!cnt2) continue;
-            // create a new symbol
-            Symbol s2 = st->symbols[pos2];
-            Symbol s3 = concat(s1, s2);
-            if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte
-               addOrInc(cands, s3, cnt2);
-         }
-      }
+		// add candidate symbols based on counted frequency
+		for (u32 pos1=0; pos1<FSST_CODE_BASE+(size_t) st->nSymbols; pos1++) {
+			u32 cnt1 = counters.count1GetNext(pos1); // may advance pos1!!
+			if (!cnt1) continue;
-      // insert candidates into priority queue (by gain)
-      auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); };
-      priority_queue<QSymbol,vector<QSymbol>,decltype(cmpGn)> pq(cmpGn);
-      for (auto& q : cands)
-         pq.push(q);
-      // Create new symbol map using best candidates
-      st->clear();
-      while (st->nSymbols < 255 && !pq.empty()) {
-         QSymbol q = pq.top();
-         pq.pop();
-         st->add(q.symbol);
-      }
-   };
+			// heuristic: promoting single-byte symbols (*8) helps reduce exception rates and increases [de]compression speed
+			Symbol s1 = st->symbols[pos1];
+			addOrInc(cands, s1, ((s1.length()==1)?8LL:1LL)*cnt1);
+			if (sampleFrac >= 128 || // last round we do not create new (combined) symbols
+			    s1.length() == Symbol::maxLength || // symbol cannot be extended
+			    s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte
+				continue;
+			}
+			for (u32 pos2=0; pos2<FSST_CODE_BASE+(size_t)st->nSymbols; pos2++) {
+				u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!!
+				if (!cnt2) continue;
+				// create a new symbol
+				Symbol s2 = st->symbols[pos2];
+				Symbol s3 = concat(s1, s2);
+				if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte
+					addOrInc(cands, s3, cnt2);
+			}
+		}
+		// insert candidates into priority queue (by gain)
+		auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); };
+		priority_queue<QSymbol,vector<QSymbol>,decltype(cmpGn)> pq(cmpGn);
+		for (auto& q : cands)
+			pq.push(q);
-   u8 bestCounters[512*sizeof(u16)];
+		// Create new symbol map using best candidates
+		st->clear();
+		while (st->nSymbols < 255 && !pq.empty()) {
+			QSymbol q = pq.top();
+			pq.pop();
+			st->add(q.symbol);
+		}
+	};
+	u8 bestCounters[512*sizeof(u16)];
 #ifdef NONOPT_FSST
-   for(size_t frac : {127, 127, 127, 127, 127, 127, 127, 127, 127, 128}) {
-      sampleFrac = frac;
+	for(size_t frac : {127, 127, 127, 127, 127, 127, 127, 127, 127, 128}) {
+		sampleFrac = frac;
 #else
-   for(sampleFrac=8; true; sampleFrac += 30) {
+	for(sampleFrac=8; true; sampleFrac += 30) {
 #endif
-      memset(&counters, 0, sizeof(Counters));
-      long gain = compressCount(st, counters);
-      if (gain >= bestGain) { // a new best solution!
-         counters.backup1(bestCounters);
-         *bestTable = *st; bestGain = gain;
-      }
-      if (sampleFrac >= 128) break; // we do 5 rounds (sampleFrac=8,38,68,98,128)
-      makeTable(st, counters);
-   }
-   delete st;
-   counters.restore1(bestCounters);
-   makeTable(bestTable, counters);
-   bestTable->finalize(zeroTerminated); // renumber codes for more efficient compression
-   return bestTable;
+		memset(&counters, 0, sizeof(Counters));
+		long gain = compressCount(st, counters);
+		if (gain >= bestGain) { // a new best solution!
+			counters.backup1(bestCounters);
+			*bestTable = *st; bestGain = gain;
+		}
+		if (sampleFrac >= 128) break; // we do 5 rounds (sampleFrac=8,38,68,98,128)
+		makeTable(st, counters);
+	}
+	delete st;
+	counters.restore1(bestCounters);
+	makeTable(bestTable, counters);
+	bestTable->finalize(zeroTerminated); // renumber codes for more efficient compression
+	return bestTable;
 }
 static inline size_t compressSIMD(SymbolTable &symbolTable, u8* symbolBase, size_t nlines, size_t len[], u8* line[], size_t size, u8* dst, size_t lenOut[], u8* strOut[], int unroll) {
-   size_t curLine = 0, inOff = 0, outOff = 0, batchPos = 0, empty = 0, budget = size;
-   u8 *lim = dst + size, *codeBase = symbolBase + (1<<18); // 512KB temp space for compressing 512 strings
-   SIMDjob input[512];  // combined offsets of input strings (cur,end), and string #id (pos) and output (dst) pointer
-   SIMDjob output[512]; // output are (pos:9,dst:19) end pointers (compute compressed length from this)
-   size_t jobLine[512]; // for which line in the input sequence was this job (needed because we may split a line into multiple jobs)
-   while (curLine < nlines && outOff <= (1<<19)) {
-      size_t prevLine = curLine, chunk, curOff = 0;
-      // bail out if the output buffer cannot hold the compressed next string fully
-      if (((len[curLine]-curOff)*2 + 7) > budget) break; // see below for the +7
-      else budget -= (len[curLine]-curOff)*2;
-      strOut[curLine] = (u8*) 0;
-      lenOut[curLine] = 0;
+	size_t curLine = 0, inOff = 0, outOff = 0, batchPos = 0, empty = 0, budget = size;
+	u8 *lim = dst + size, *codeBase = symbolBase + (1<<18); // 512KB temp space for compressing 512 strings
+	SIMDjob input[512];  // combined offsets of input strings (cur,end), and string #id (pos) and output (dst) pointer
+	SIMDjob output[512]; // output are (pos:9,dst:19) end pointers (compute compressed length from this)
+	size_t jobLine[512]; // for which line in the input sequence was this job (needed because we may split a line into multiple jobs)
-      do {
-         do {
-            chunk = len[curLine] - curOff;
-            if (chunk > 511) {
-               chunk = 511; // large strings need to be chopped up into segments of 511 bytes
-            }
-            // create a job in this batch
-            SIMDjob job;
-            job.cur = inOff;
-            job.end = job.cur + chunk;
-            job.pos = batchPos;
-            job.out = outOff;
-            // worst case estimate for compressed size (+7 is for the scatter that writes extra 7 zeros)
-            outOff += 7 + 2*(size_t)(job.end - job.cur); // note, total size needed is 512*(511*2+7) bytes.
-            if (outOff > (1<<19)) break; // simdbuf may get full, stop before this chunk
-            // register job in this batch
-            input[batchPos] = job;
-            jobLine[batchPos] = curLine;
-            if (chunk == 0) {
-               empty++; // detect empty chunks -- SIMD code cannot handle empty strings, so they need to be filtered out
-            } else {
-               // copy string chunk into temp buffer
-               memcpy(symbolBase + inOff, line[curLine] + curOff, chunk);
-               inOff += chunk;
-               curOff += chunk;
-               symbolBase[inOff++] = (u8) symbolTable.terminator; // write an extra char at the end that will not be encoded
-            }
-            if (++batchPos == 512) break;
-         } while(curOff < len[curLine]);
-         if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more?
-            if (batchPos-empty >= 32) { // if we have enough work, fire off duckdb_fsst_compressAVX512 (32 is due to max 4x8 unrolling)
-               // radix-sort jobs on length (longest string first)
-               // -- this provides best load balancing and allows to skip empty jobs at the end
-               u16 sortpos[513];
-               memset(sortpos, 0, sizeof(sortpos));
-               // calculate length histo
-               for(size_t i=0; i<batchPos; i++) {
-                  size_t len = input[i].end - input[i].cur;
-                  sortpos[512UL - len]++;
-               }
-               // calculate running sum
-               for(size_t i=1; i<=512; i++)
-                  sortpos[i] += sortpos[i-1];
-               // move jobs to their final destination
-               SIMDjob inputOrdered[512];
-               for(size_t i=0; i<batchPos; i++) {
-                  size_t len = input[i].end - input[i].cur;
-                  size_t pos = sortpos[511UL - len]++;
-                  inputOrdered[pos] = input[i];
-                }
-               // finally.. SIMD compress max 256KB of simdbuf into (max) 512KB of simdbuf (but presumably much less..)
-               for(size_t done = duckdb_fsst_compressAVX512(symbolTable, codeBase, symbolBase, inputOrdered, output, batchPos-empty, unroll);
-                   done < batchPos; done++) output[done] = inputOrdered[done];
-            } else {
-               memcpy(output, input, batchPos*sizeof(SIMDjob));
-            }
-            // finish encoding (unfinished strings in process, plus the few last strings not yet processed)
-            for(size_t i=0; i<batchPos; i++) {
-               SIMDjob job = output[i];
-               if (job.cur < job.end) { // finish encoding this string with scalar code
-                  u8* cur = symbolBase + job.cur;
-                  u8* end = symbolBase + job.end;
-                  u8* out = codeBase + job.out;
-                  while (cur < end) {
-                     u64 word = fsst_unaligned_load(cur);
-                     size_t code = symbolTable.shortCodes[word & 0xFFFF];
-                     size_t pos = word & 0xFFFFFF;
-                     size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
-                     Symbol s = symbolTable.hashTab[idx];
-                     out[1] = (u8) word; // speculatively write out escaped byte
-                     word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
-                     if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
-                        *out++ = (u8) s.code(); cur += s.length();
-                     } else {
-                        // could be a 2-byte or 1-byte code, or miss
-                        // handle everything with predication
-                        *out = (u8) code;
-                        out += 1+((code&FSST_CODE_BASE)>>8);
-                        cur += (code>>FSST_LEN_BITS);
-                    }
-                  }
-                  job.out = out - codeBase;
-               }
-               // postprocess job info
-               job.cur = 0;
-               job.end = job.out - input[job.pos].out; // misuse .end field as compressed size
-               job.out = input[job.pos].out; // reset offset to start of encoded string
-               input[job.pos] = job;
-            }
-            // copy out the result data
-            for(size_t i=0; i<batchPos; i++) {
-               size_t lineNr = jobLine[i]; // the sort must be order-preserving, as we concatenate results string in order
-               size_t sz = input[i].end; // had stored compressed lengths here
-               if (!strOut[lineNr]) strOut[lineNr] = dst; // first segment will be the strOut pointer
-               lenOut[lineNr] += sz; // add segment (lenOut starts at 0 for this reason)
-               memcpy(dst, codeBase+input[i].out, sz);
-               dst += sz;
-            }
-            // go for the next batch of 512 chunks
-            inOff = outOff = batchPos = empty = 0;
-            budget = (size_t) (lim - dst);
-         }
-      } while (curLine == prevLine && outOff <= (1<<19));
-   }
-   return curLine;
+	while (curLine < nlines && outOff <= (1<<19)) {
+		size_t prevLine = curLine, chunk, curOff = 0;
+		// bail out if the output buffer cannot hold the compressed next string fully
+		if (((len[curLine]-curOff)*2 + 7) > budget) break; // see below for the +7
+		else budget -= (len[curLine]-curOff)*2;
+		strOut[curLine] = (u8*) 0;
+		lenOut[curLine] = 0;
+		do {
+			do {
+				chunk = len[curLine] - curOff;
+				if (chunk > 511) {
+					chunk = 511; // large strings need to be chopped up into segments of 511 bytes
+				}
+				// create a job in this batch
+				SIMDjob job;
+				job.cur = inOff;
+				job.end = job.cur + chunk;
+				job.pos = batchPos;
+				job.out = outOff;
+				// worst case estimate for compressed size (+7 is for the scatter that writes extra 7 zeros)
+				outOff += 7 + 2*(size_t)(job.end - job.cur); // note, total size needed is 512*(511*2+7) bytes.
+				if (outOff > (1<<19)) break; // simdbuf may get full, stop before this chunk
+				// register job in this batch
+				input[batchPos] = job;
+				jobLine[batchPos] = curLine;
+				if (chunk == 0) {
+					empty++; // detect empty chunks -- SIMD code cannot handle empty strings, so they need to be filtered out
+				} else {
+					// copy string chunk into temp buffer
+					memcpy(symbolBase + inOff, line[curLine] + curOff, chunk);
+					inOff += chunk;
+					curOff += chunk;
+					symbolBase[inOff++] = (u8) symbolTable.terminator; // write an extra char at the end that will not be encoded
+				}
+				if (++batchPos == 512) break;
+			} while(curOff < len[curLine]);
+			if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more?
+				if (batchPos-empty >= 32) { // if we have enough work, fire off fsst_compressAVX512 (32 is due to max 4x8 unrolling)
+					// radix-sort jobs on length (longest string first)
+					// -- this provides best load balancing and allows to skip empty jobs at the end
+					u16 sortpos[513];
+					memset(sortpos, 0, sizeof(sortpos));
+					// calculate length histo
+					for(size_t i=0; i<batchPos; i++) {
+						size_t len = input[i].end - input[i].cur;
+						sortpos[512UL - len]++;
+					}
+					// calculate running sum
+					for(size_t i=1; i<=512; i++)
+						sortpos[i] += sortpos[i-1];
+					// move jobs to their final destination
+					SIMDjob inputOrdered[512];
+					for(size_t i=0; i<batchPos; i++) {
+						size_t len = input[i].end - input[i].cur;
+						size_t pos = sortpos[511UL - len]++;
+						inputOrdered[pos] = input[i];
+					}
+					// finally.. SIMD compress max 256KB of simdbuf into (max) 512KB of simdbuf (but presumably much less..)
+					for(size_t done = duckdb_fsst_compressAVX512(symbolTable, codeBase, symbolBase, inputOrdered, output, batchPos-empty, unroll);
+					     done < batchPos; done++) output[done] = inputOrdered[done];
+				} else {
+					memcpy(output, input, batchPos*sizeof(SIMDjob));
+				}
+				// finish encoding (unfinished strings in process, plus the few last strings not yet processed)
+				for(size_t i=0; i<batchPos; i++) {
+					SIMDjob job = output[i];
+					if (job.cur < job.end) { // finish encoding this string with scalar code
+						u8* cur = symbolBase + job.cur;
+						u8* end = symbolBase + job.end;
+						u8* out = codeBase + job.out;
+						while (cur < end) {
+							u64 word = fsst_unaligned_load(cur);
+							size_t code = symbolTable.shortCodes[word & 0xFFFF];
+							size_t pos = word & 0xFFFFFF;
+							size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
+							Symbol s = symbolTable.hashTab[idx];
+							out[1] = (u8) word; // speculatively write out escaped byte
+							word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
+							if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
+								*out++ = (u8) s.code(); cur += s.length();
+							} else {
+								// could be a 2-byte or 1-byte code, or miss
+								// handle everything with predication
+								*out = (u8) code;
+								out += 1+((code&FSST_CODE_BASE)>>8);
+								cur += (code>>FSST_LEN_BITS);
+							}
+						}
+						job.out = out - codeBase;
+					}
+					// postprocess job info
+					job.cur = 0;
+					job.end = job.out - input[job.pos].out; // misuse .end field as compressed size
+					job.out = input[job.pos].out; // reset offset to start of encoded string
+					input[job.pos] = job;
+				}
+				// copy out the result data
+				for(size_t i=0; i<batchPos; i++) {
+					size_t lineNr = jobLine[i]; // the sort must be order-preserving, as we concatenate results string in order
+					size_t sz = input[i].end; // had stored compressed lengths here
+					if (!strOut[lineNr]) strOut[lineNr] = dst; // first segment will be the strOut pointer
+					lenOut[lineNr] += sz; // add segment (lenOut starts at 0 for this reason)
+					memcpy(dst, codeBase+input[i].out, sz);
+					dst += sz;
+				}
+				// go for the next batch of 512 chunks
+				inOff = outOff = batchPos = empty = 0;
+				budget = (size_t) (lim - dst);
+			}
+		} while (curLine == prevLine && outOff <= (1<<19));
+	}
+	return curLine;
 }
 // optimized adaptive *scalar* compression method
 static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_t lenIn[], u8* strIn[], size_t size, u8* out, size_t lenOut[], u8* strOut[], bool noSuffixOpt, bool avoidBranch) {
-   // TODO: PR this fix into main fsst REPO?
-   // - the issue is that for strings over the 512 buf size, the unaligned load will read past the end of the buf
-   //   due to the unaligned_load loading 64 bits, simply increasing the buffer size should be ok since the read word
-   //   is masked with 0xFFFF anyway
-   u8 buf[512 + 8];
-   u8 *cur = NULL, *end =  NULL, *lim = out + size;
-   size_t curLine, suffixLim = symbolTable.suffixLim;
-   u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0];
-   // three variants are possible. dead code falls away since the bool arguments are constants
-   auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) {
-      while (cur < end) {
-         u64 word = fsst_unaligned_load(cur);
-         size_t code = symbolTable.shortCodes[word & 0xFFFF];
-         if (noSuffixOpt && ((u8) code) < suffixLim) {
-            // 2 byte code without having to worry about longer matches
-            *out++ = (u8) code; cur += 2;
-         } else {
-            size_t pos = word & 0xFFFFFF;
-            size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
-            Symbol s = symbolTable.hashTab[idx];
-            out[1] = (u8) word; // speculatively write out escaped byte
-            word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
-            if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
-               *out++ = (u8) s.code(); cur += s.length();
-            } else if (avoidBranch) {
-               // could be a 2-byte or 1-byte code, or miss
-               // handle everything with predication
-               *out = (u8) code;
-               out += 1+((code&FSST_CODE_BASE)>>8);
-               cur += (code>>FSST_LEN_BITS);
-            } else if ((u8) code < byteLim) {
-               // 2 byte code after checking there is no longer pattern
-               *out++ = (u8) code; cur += 2;
-            } else {
-               // 1 byte code or miss.
-               *out = (u8) code;
-               out += 1+((code&FSST_CODE_BASE)>>8); // predicated - tested with a branch, that was always worse
-               cur++;
-            }
-         }
-      }
-   };
+	u8 *cur = NULL, *end =  NULL, *lim = out + size;
+	size_t curLine, suffixLim = symbolTable.suffixLim;
+	u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0];
+	u8 buf[512+7]; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
+	memset(buf+511, 0, 8); /* and initialize the sentinal bytes */
+	// three variants are possible. dead code falls away since the bool arguments are constants
+	auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) {
+		while (cur < end) {
+			u64 word = fsst_unaligned_load(cur);
+			size_t code = symbolTable.shortCodes[word & 0xFFFF];
+			if (noSuffixOpt && ((u8) code) < suffixLim) {
+				// 2 byte code without having to worry about longer matches
+				*out++ = (u8) code; cur += 2;
+			} else {
+				size_t pos = word & 0xFFFFFF;
+				size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
+				Symbol s = symbolTable.hashTab[idx];
+				out[1] = (u8) word; // speculatively write out escaped byte
+				word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
+				if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
+					*out++ = (u8) s.code(); cur += s.length();
+				} else if (avoidBranch) {
+					// could be a 2-byte or 1-byte code, or miss
+					// handle everything with predication
+					*out = (u8) code;
+					out += 1+((code&FSST_CODE_BASE)>>8);
+					cur += (code>>FSST_LEN_BITS);
+				} else if ((u8) code < byteLim) {
+					// 2 byte code after checking there is no longer pattern
+					*out++ = (u8) code; cur += 2;
+				} else {
+					// 1 byte code or miss.
+					*out = (u8) code;
+					out += 1+((code&FSST_CODE_BASE)>>8); // predicated - tested with a branch, that was always worse
+					cur++;
+				}
+			}
+		}
+	};
-   for(curLine=0; curLine<nlines; curLine++) {
-      size_t chunk, curOff = 0;
-      strOut[curLine] = out;
-      do {
-         bool skipCopy = symbolTable.zeroTerminated;
-         cur = strIn[curLine] + curOff;
-         chunk = lenIn[curLine] - curOff;
-         if (chunk > 511) {
-            chunk = 511; // we need to compress in chunks of 511 in order to be byte-compatible with simd-compressed FSST
-            skipCopy = false; // need to put terminator, so no in place mem usage possible
-         }
-         if ((2*chunk+7) > (size_t) (lim-out)) {
-            return curLine; // out of memory
-         }
-         if (!skipCopy) { // only in case of short zero-terminated strings, we can avoid copying
-            memcpy(buf, cur, chunk);
-            cur = buf;
-            buf[chunk] = (u8) symbolTable.terminator;
-         }
-         end = cur + chunk;
-         // based on symboltable stats, choose a variant that is nice to the branch predictor
-         if (noSuffixOpt) {
-            compressVariant(true,false);
-         } else if (avoidBranch) {
-            compressVariant(false,true);
-         } else {
-          compressVariant(false, false);
-         }
-      } while((curOff += chunk) < lenIn[curLine]);
-      lenOut[curLine] = (size_t) (out - strOut[curLine]);
-   }
-   return curLine;
+	for(curLine=0; curLine<nlines; curLine++) {
+		size_t chunk, curOff = 0;
+		strOut[curLine] = out;
+		do {
+			cur = strIn[curLine] + curOff;
+			chunk = lenIn[curLine] - curOff;
+			if (chunk > 511) {
+				chunk = 511; // we need to compress in chunks of 511 in order to be byte-compatible with simd-compressed FSST
+			}
+			if ((2*chunk+7) > (size_t) (lim-out)) {
+				return curLine; // out of memory
+			}
+			// copy the string to the 511-byte buffer
+			memcpy(buf, cur, chunk);
+			buf[chunk] = (u8) symbolTable.terminator;
+			cur = buf;
+			end = cur + chunk;
+			// based on symboltable stats, choose a variant that is nice to the branch predictor
+			if (noSuffixOpt) {
+				compressVariant(true,false);
+			} else if (avoidBranch) {
+				compressVariant(false,true);
+			} else {
+				compressVariant(false, false);
+			}
+		} while((curOff += chunk) < lenIn[curLine]);
+		lenOut[curLine] = (size_t) (out - strOut[curLine]);
+	}
+	return curLine;
 }
 #define FSST_SAMPLELINE ((size_t) 512)
 // quickly select a uniformly random set of lines such that we have between [FSST_SAMPLETARGET,FSST_SAMPLEMAXSZ) string bytes
 vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nlines) {
-   size_t totSize = 0, *lenIn = *lenRef;
-   vector<u8*> sample;
+	size_t totSize = 0, *lenIn = *lenRef;
+	vector<u8*> sample;
-   for(size_t i=0; i<nlines; i++)
-      totSize += lenIn[i];
+	for(size_t i=0; i<nlines; i++)
+		totSize += lenIn[i];
-   if (totSize < FSST_SAMPLETARGET) {
-      for(size_t i=0; i<nlines; i++)
-         sample.push_back(strIn[i]);
-   } else {
-      size_t sampleRnd = FSST_HASH(4637947);
-      u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
-      size_t *sampleLen = *lenRef = new size_t[nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE];
-      while(sampleBuf < sampleLim) {
-         // choose a non-empty line
-         sampleRnd = FSST_HASH(sampleRnd);
-         size_t linenr = sampleRnd % nlines;
-         while (lenIn[linenr] == 0)
-            if (++linenr == nlines) linenr = 0;
-         // choose a chunk
-         size_t chunks = 1 + ((lenIn[linenr]-1) / FSST_SAMPLELINE);
-         sampleRnd = FSST_HASH(sampleRnd);
-         size_t chunk = FSST_SAMPLELINE*(sampleRnd % chunks);
-         // add the chunk to the sample
-         size_t len = min(lenIn[linenr]-chunk,FSST_SAMPLELINE);
-         memcpy(sampleBuf, strIn[linenr]+chunk, len);
-         sample.push_back(sampleBuf);
-         sampleBuf += *sampleLen++ = len;
-      }
-   }
-   return sample;
+	if (totSize < FSST_SAMPLETARGET) {
+		for(size_t i=0; i<nlines; i++)
+			sample.push_back(strIn[i]);
+	} else {
+		size_t sampleRnd = FSST_HASH(4637947);
+		u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
+		size_t *sampleLen = *lenRef = new size_t[nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE];
+		while(sampleBuf < sampleLim) {
+			// choose a non-empty line
+			sampleRnd = FSST_HASH(sampleRnd);
+			size_t linenr = sampleRnd % nlines;
+			while (lenIn[linenr] == 0)
+				if (++linenr == nlines) linenr = 0;
+			// choose a chunk
+			size_t chunks = 1 + ((lenIn[linenr]-1) / FSST_SAMPLELINE);
+			sampleRnd = FSST_HASH(sampleRnd);
+			size_t chunk = FSST_SAMPLELINE*(sampleRnd % chunks);
+			// add the chunk to the sample
+			size_t len = min(lenIn[linenr]-chunk,FSST_SAMPLELINE);
+			memcpy(sampleBuf, strIn[linenr]+chunk, len);
+			sample.push_back(sampleBuf);
+			sampleBuf += *sampleLen++ = len;
+		}
+	}
+	return sample;
 }
 extern "C" duckdb_fsst_encoder_t* duckdb_fsst_create(size_t n, size_t lenIn[], u8 *strIn[], int zeroTerminated) {
-   u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ];
-   size_t *sampleLen = lenIn;
-   vector<u8*> sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample
-   Encoder *encoder = new Encoder();
-   encoder->symbolTable = shared_ptr<SymbolTable>(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated));
-   if (sampleLen != lenIn) delete[] sampleLen;
-   delete[] sampleBuf;
-   return (duckdb_fsst_encoder_t*) encoder;
+	u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ];
+	size_t *sampleLen = lenIn;
+	vector<u8*> sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample
+	Encoder *encoder = new Encoder();
+	encoder->symbolTable = shared_ptr<SymbolTable>(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated));
+	if (sampleLen != lenIn) delete[] sampleLen;
+	delete[] sampleBuf;
+	return (duckdb_fsst_encoder_t*) encoder;
 }
 /* create another encoder instance, necessary to do multi-threaded encoding using the same symbol table */
 extern "C" duckdb_fsst_encoder_t* duckdb_fsst_duplicate(duckdb_fsst_encoder_t *encoder) {
-   Encoder *e = new Encoder();
-   e->symbolTable = ((Encoder*)encoder)->symbolTable; // it is a shared_ptr
-   return (duckdb_fsst_encoder_t*) e;
+	Encoder *e = new Encoder();
+	e->symbolTable = ((Encoder*)encoder)->symbolTable; // it is a shared_ptr
+	return (duckdb_fsst_encoder_t*) e;
 }
-// export a symbol table in compact format.
+// export a symbol table in compact format.
 extern "C" u32 duckdb_fsst_export(duckdb_fsst_encoder_t *encoder, u8 *buf) {
-   Encoder *e = (Encoder*) encoder;
-   // In ->version there is a versionnr, but we hide also suffixLim/terminator/nSymbols there.
-   // This is sufficient in principle to *reconstruct* a duckdb_fsst_encoder_t from a duckdb_fsst_decoder_t
-   // (such functionality could be useful to append compressed data to an existing block).
-   //
-   // However, the hash function in the encoder hash table is endian-sensitive, and given its
-   // 'lossy perfect' hashing scheme is *unable* to contain other-endian-produced symbol tables.
-   // Doing a endian-conversion during hashing will be slow and self-defeating.
-   //
-   // Overall, we could support reconstructing an encoder for incremental compression, but
-   // should enforce equal-endianness. Bit of a bummer. Not going there now.
-   //
-   // The version field is now there just for future-proofness, but not used yet
-   // version allows keeping track of fsst versions, track endianness, and encoder reconstruction
-   u64 version = (FSST_VERSION << 32) |  // version is 24 bits, most significant byte is 0
-                 (((u64) e->symbolTable->suffixLim) << 24) |
-                 (((u64) e->symbolTable->terminator) << 16) |
-                 (((u64) e->symbolTable->nSymbols) << 8) |
-                 FSST_ENDIAN_MARKER; // least significant byte is nonzero
+	Encoder *e = (Encoder*) encoder;
+	// In ->version there is a versionnr, but we hide also suffixLim/terminator/nSymbols there.
+	// This is sufficient in principle to *reconstruct* a duckdb_fsst_encoder_t from a duckdb_fsst_decoder_t
+	// (such functionality could be useful to append compressed data to an existing block).
+	//
+	// However, the hash function in the encoder hash table is endian-sensitive, and given its
+	// 'lossy perfect' hashing scheme is *unable* to contain other-endian-produced symbol tables.
+	// Doing a endian-conversion during hashing will be slow and self-defeating.
+	//
+	// Overall, we could support reconstructing an encoder for incremental compression, but
+	// should enforce equal-endianness. Bit of a bummer. Not going there now.
+	//
+	// The version field is now there just for future-proofness, but not used yet
+	// version allows keeping track of fsst versions, track endianness, and encoder reconstruction
+	u64 version = (FSST_VERSION << 32) |  // version is 24 bits, most significant byte is 0
+	              (((u64) e->symbolTable->suffixLim) << 24) |
+	              (((u64) e->symbolTable->terminator) << 16) |
+	              (((u64) e->symbolTable->nSymbols) << 8) |
+	              FSST_ENDIAN_MARKER; // least significant byte is nonzero
-   /* do not assume unaligned reads here */
-   memcpy(buf, &version, 8);
-   buf[8] = e->symbolTable->zeroTerminated;
-   for(u32 i=0; i<8; i++)
-      buf[9+i] = (u8) e->symbolTable->lenHisto[i];
-   u32 pos = 17;
+	/* do not assume unaligned reads here */
+	memcpy(buf, &version, 8);
+	buf[8] = e->symbolTable->zeroTerminated;
+	for(u32 i=0; i<8; i++)
+		buf[9+i] = (u8) e->symbolTable->lenHisto[i];
+	u32 pos = 17;
-   // emit only the used bytes of the symbols
-   for(u32 i = e->symbolTable->zeroTerminated; i < e->symbolTable->nSymbols; i++)
-      for(u32 j = 0; j < e->symbolTable->symbols[i].length(); j++)
-         buf[pos++] = e->symbolTable->symbols[i].val.str[j]; // serialize used symbol bytes
+	// emit only the used bytes of the symbols
+	for(u32 i = e->symbolTable->zeroTerminated; i < e->symbolTable->nSymbols; i++)
+		for(u32 j = 0; j < e->symbolTable->symbols[i].length(); j++)
+			buf[pos++] = e->symbolTable->symbols[i].val.str[j]; // serialize used symbol bytes
-   return pos; // length of what was serialized
+	return pos; // length of what was serialized
 }
 #define FSST_CORRUPT 32774747032022883 /* 7-byte number in little endian containing "corrupt" */
 extern "C" u32 duckdb_fsst_import(duckdb_fsst_decoder_t *decoder, u8 *buf) {
-   u64 version = 0;
-   u32 code, pos = 17;
-   u8 lenHisto[8];
-   // version field (first 8 bytes) is now there just for future-proofness, unused still (skipped)
-   memcpy(&version, buf, 8);
-   if ((version>>32) != FSST_VERSION) return 0;
-   decoder->zeroTerminated = buf[8]&1;
-   memcpy(lenHisto, buf+9, 8);
-   // in case of zero-terminated, first symbol is "" (zero always, may be overwritten)
-   decoder->len[0] = 1;
-   decoder->symbol[0] = 0;
-   // we use lenHisto[0] as 1-byte symbol run length (at the end)
-   code = decoder->zeroTerminated;
-   if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end
-   // now get all symbols from the buffer
-   for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */
-      for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++)  {
-         decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1  */
-         decoder->symbol[code] = 0;
-         for(u32 j=0; j<decoder->len[code]; j++)
-            ((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols
-      }
-   }
-   if (decoder->zeroTerminated) lenHisto[0]++;
+	u64 version = 0;
+	u32 code, pos = 17;
+	u8 lenHisto[8];
-   // fill unused symbols with text "corrupt". Gives a chance to detect corrupted code sequences (if there are unused symbols).
-   while(code<255) {
-       decoder->symbol[code] = FSST_CORRUPT;
-       decoder->len[code++] = 8;
-   }
-   return pos;
+	// version field (first 8 bytes) is now there just for future-proofness, unused still (skipped)
+	memcpy(&version, buf, 8);
+	if ((version>>32) != FSST_VERSION) return 0;
+	decoder->zeroTerminated = buf[8]&1;
+	memcpy(lenHisto, buf+9, 8);
+	// in case of zero-terminated, first symbol is "" (zero always, may be overwritten)
+	decoder->len[0] = 1;
+	decoder->symbol[0] = 0;
+	// we use lenHisto[0] as 1-byte symbol run length (at the end)
+	code = decoder->zeroTerminated;
+	if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end
+	// now get all symbols from the buffer
+	for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */
+		for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++)  {
+			decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1  */
+			decoder->symbol[code] = 0;
+			for(u32 j=0; j<decoder->len[code]; j++)
+				((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols
+		}
+	}
+	if (decoder->zeroTerminated) lenHisto[0]++;
+	// fill unused symbols with text "corrupt". Gives a chance to detect corrupted code sequences (if there are unused symbols).
+	while(code<255) {
+		decoder->symbol[code] = FSST_CORRUPT;
+		decoder->len[code++] = 8;
+	}
+	return pos;
 }
 // runtime check for simd
 inline size_t _compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) {
 #ifndef NONOPT_FSST
-   if (simd && duckdb_fsst_hasAVX512())
-      return compressSIMD(*e->symbolTable, e->simdbuf, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
+	if (simd && duckdb_fsst_hasAVX512())
+		return compressSIMD(*e->symbolTable, e->simdbuf, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
 #endif
-   (void) simd;
-   return compressBulk(*e->symbolTable, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch);
+	(void) simd;
+	return compressBulk(*e->symbolTable, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch);
 }
 size_t compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) {
-   return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
+	return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
 }
-// adaptive choosing of scalar compression method based on symbol length histogram
+// adaptive choosing of scalar compression method based on symbol length histogram
 inline size_t _compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
-   bool avoidBranch = false, noSuffixOpt = false;
-   if (100*e->symbolTable->lenHisto[1] > 65*e->symbolTable->nSymbols && 100*e->symbolTable->suffixLim > 95*e->symbolTable->lenHisto[1]) {
-      noSuffixOpt = true;
-   } else if ((e->symbolTable->lenHisto[0] > 24 && e->symbolTable->lenHisto[0] < 92) &&
-              (e->symbolTable->lenHisto[0] < 43 || e->symbolTable->lenHisto[6] + e->symbolTable->lenHisto[7] < 29) &&
-              (e->symbolTable->lenHisto[0] < 72 || e->symbolTable->lenHisto[2] < 72)) {
-      avoidBranch = true;
-   }
-   return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
+	bool avoidBranch = false, noSuffixOpt = false;
+	if (100*e->symbolTable->lenHisto[1] > 65*e->symbolTable->nSymbols && 100*e->symbolTable->suffixLim > 95*e->symbolTable->lenHisto[1]) {
+		noSuffixOpt = true;
+	} else if ((e->symbolTable->lenHisto[0] > 24 && e->symbolTable->lenHisto[0] < 92) &&
+	           (e->symbolTable->lenHisto[0] < 43 || e->symbolTable->lenHisto[6] + e->symbolTable->lenHisto[7] < 29) &&
+	           (e->symbolTable->lenHisto[0] < 72 || e->symbolTable->lenHisto[2] < 72)) {
+		avoidBranch = true;
+	}
+	return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
 }
 size_t compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
-   return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
+	return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
 }
 // the main compression function (everything automatic)
 extern "C" size_t duckdb_fsst_compress(duckdb_fsst_encoder_t *encoder, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[]) {
-   // to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB)
-   size_t totLen = accumulate(lenIn, lenIn+nlines, 0);
-   int simd = totLen > nlines*12 && (nlines > 64 || totLen > (size_t) 1<<15);
-   return _compressAuto((Encoder*) encoder, nlines, lenIn, strIn, size, output, lenOut, strOut, 3*simd);
+	// to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB)
+	size_t totLen = accumulate(lenIn, lenIn+nlines, 0);
+	int simd = totLen > nlines*12 && (nlines > 64 || totLen > (size_t) 1<<15);
+	return _compressAuto((Encoder*) encoder, nlines, lenIn, strIn, size, output, lenOut, strOut, 3*simd);
 }
 /* deallocate encoder */
 extern "C" void duckdb_fsst_destroy(duckdb_fsst_encoder_t* encoder) {
-   Encoder *e = (Encoder*) encoder;
-   delete e;
+	Encoder *e = (Encoder*) encoder;
+	delete e;
 }
 /* very lazy implementation relying on export and import */
 extern "C" duckdb_fsst_decoder_t duckdb_fsst_decoder(duckdb_fsst_encoder_t *encoder) {
-   u8 buf[sizeof(duckdb_fsst_decoder_t)];
-   u32 cnt1 = duckdb_fsst_export(encoder, buf);
-   duckdb_fsst_decoder_t decoder;
-   u32 cnt2 = duckdb_fsst_import(&decoder, buf);
-   assert(cnt1 == cnt2); (void) cnt1; (void) cnt2;
-   return decoder;
+	u8 buf[sizeof(duckdb_fsst_decoder_t)];
+	u32 cnt1 = duckdb_fsst_export(encoder, buf);
+	duckdb_fsst_decoder_t decoder;
+	u32 cnt2 = duckdb_fsst_import(&decoder, buf);
+	assert(cnt1 == cnt2); (void) cnt1; (void) cnt2;
+	return decoder;
 }
 // LICENSE_CHANGE_END