npm - duckdb - Versions diffs - 0.5.1-dev97.0 → 0.5.1 - Mend

duckdb 0.5.1-dev97.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +583 -226
package/src/duckdb.hpp +32 -8
package/src/parquet-amalgamation.cpp +37525 -37499

package/src/duckdb.cpp CHANGED Viewed

@@ -3763,6 +3763,19 @@ idx_t TableCatalogEntry::StandardColumnCount() const {
 	return count;
 }
+unique_ptr<BaseStatistics> TableCatalogEntry::GetStatistics(ClientContext &context, column_t column_id) {
+	if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
+		return nullptr;
+	}
+	if (column_id >= columns.size()) {
+		throw InternalException("TableCatalogEntry::GetStatistics column_id out of range");
+	}
+	if (columns[column_id].Generated()) {
+		return nullptr;
+	}
+	return storage->GetStatistics(context, columns[column_id].StorageOid());
+}
 unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) {
 	D_ASSERT(!internal);
 	if (info->type != AlterType::ALTER_TABLE) {
@@ -3830,6 +3843,9 @@ static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) {
 unique_ptr<CatalogEntry> TableCatalogEntry::RenameColumn(ClientContext &context, RenameColumnInfo &info) {
 	auto rename_idx = GetColumnIndex(info.old_name);
+	if (rename_idx == COLUMN_IDENTIFIER_ROW_ID) {
+		throw CatalogException("Cannot rename rowid column");
+	}
 	auto create_info = make_unique<CreateTableInfo>(schema->name, name);
 	create_info->temporary = temporary;
 	for (idx_t i = 0; i < columns.size(); i++) {
@@ -3932,6 +3948,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AddColumn(ClientContext &context, Ad
 unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context, RemoveColumnInfo &info) {
 	auto removed_index = GetColumnIndex(info.removed_column, info.if_column_exists);
 	if (removed_index == DConstants::INVALID_INDEX) {
+		if (!info.if_column_exists) {
+			throw CatalogException("Cannot drop column: rowid column cannot be dropped");
+		}
 		return nullptr;
 	}
@@ -4038,7 +4057,7 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
 		return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
 		                                      storage);
 	}
-	auto new_storage = make_shared<DataTable>(context, *storage, removed_index);
+	auto new_storage = make_shared<DataTable>(context, *storage, columns[removed_index].StorageOid());
 	return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
 	                                      new_storage);
 }
@@ -4046,13 +4065,18 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
 unique_ptr<CatalogEntry> TableCatalogEntry::SetDefault(ClientContext &context, SetDefaultInfo &info) {
 	auto create_info = make_unique<CreateTableInfo>(schema->name, name);
 	auto default_idx = GetColumnIndex(info.column_name);
+	if (default_idx == COLUMN_IDENTIFIER_ROW_ID) {
+		throw CatalogException("Cannot SET DEFAULT for rowid column");
+	}
 	// Copy all the columns, changing the value of the one that was specified by 'column_name'
 	for (idx_t i = 0; i < columns.size(); i++) {
 		auto copy = columns[i].Copy();
 		if (default_idx == i) {
 			// set the default value of this column
-			D_ASSERT(!copy.Generated()); // Shouldnt reach here - DEFAULT value isn't supported for Generated Columns
+			if (copy.Generated()) {
+				throw BinderException("Cannot SET DEFAULT for generated column \"%s\"", columns[i].Name());
+			}
 			copy.SetDefaultValue(info.expression ? info.expression->Copy() : nullptr);
 		}
 		create_info->columns.push_back(move(copy));
@@ -4077,6 +4101,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
 	}
 	idx_t not_null_idx = GetColumnIndex(info.column_name);
+	if (columns[not_null_idx].Generated()) {
+		throw BinderException("Unsupported constraint for generated column!");
+	}
 	bool has_not_null = false;
 	for (idx_t i = 0; i < constraints.size(); i++) {
 		auto constraint = constraints[i]->Copy();
@@ -4100,8 +4127,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
 		                                      storage);
 	}
-	// Return with new storage info
-	auto new_storage = make_shared<DataTable>(context, *storage, make_unique<NotNullConstraint>(not_null_idx));
+	// Return with new storage info. Note that we need the bound column index here.
+	auto new_storage = make_shared<DataTable>(context, *storage,
+	                                          make_unique<BoundNotNullConstraint>(columns[not_null_idx].StorageOid()));
 	return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
 	                                      new_storage);
 }
@@ -4207,12 +4235,19 @@ unique_ptr<CatalogEntry> TableCatalogEntry::ChangeColumnType(ClientContext &cont
 	auto expression = info.expression->Copy();
 	auto bound_expression = expr_binder.Bind(expression);
 	auto bound_create_info = binder->BindCreateTableInfo(move(create_info));
+	vector<column_t> storage_oids;
 	if (bound_columns.empty()) {
-		bound_columns.push_back(COLUMN_IDENTIFIER_ROW_ID);
+		storage_oids.push_back(COLUMN_IDENTIFIER_ROW_ID);
+	}
+	// transform to storage_oid
+	else {
+		for (idx_t i = 0; i < bound_columns.size(); i++) {
+			storage_oids.push_back(columns[bound_columns[i]].StorageOid());
+		}
 	}
-	auto new_storage =
-	    make_shared<DataTable>(context, *storage, change_idx, info.target_type, move(bound_columns), *bound_expression);
+	auto new_storage = make_shared<DataTable>(context, *storage, columns[change_idx].StorageOid(), info.target_type,
+	                                          move(storage_oids), *bound_expression);
 	auto result =
 	    make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), new_storage);
 	return move(result);
@@ -4460,7 +4495,7 @@ void TableCatalogEntry::CommitAlter(AlterInfo &info) {
 		}
 	}
 	D_ASSERT(removed_index != DConstants::INVALID_INDEX);
-	storage->CommitDropColumn(removed_index);
+	storage->CommitDropColumn(columns[removed_index].StorageOid());
 }
 void TableCatalogEntry::CommitDrop() {
@@ -5030,11 +5065,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
 				throw CatalogException(rename_err_msg, original_name, value->name);
 			}
 		}
+	}
+	if (value->name != original_name) {
+		// Do PutMapping and DeleteMapping after dependency check
 		PutMapping(context, value->name, entry_index);
 		DeleteMapping(context, original_name);
 	}
-	//! Check the dependency manager to verify that there are no conflicting dependencies with this alter
-	catalog.dependency_manager->AlterObject(context, entry, value.get());
 	value->timestamp = transaction.transaction_id;
 	value->child = move(entries[entry_index]);
@@ -5046,10 +5083,18 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
 	alter_info->Serialize(serializer);
 	BinaryData serialized_alter = serializer.GetData();
+	auto new_entry = value.get();
 	// push the old entry in the undo buffer for this transaction
 	transaction.PushCatalogEntry(value->child.get(), serialized_alter.data.get(), serialized_alter.size);
 	entries[entry_index] = move(value);
+	// Check the dependency manager to verify that there are no conflicting dependencies with this alter
+	// Note that we do this AFTER the new entry has been entirely set up in the catalog set
+	// that is because in case the alter fails because of a dependency conflict, we need to be able to cleanly roll back
+	// to the old entry.
+	catalog.dependency_manager->AlterObject(context, entry, new_entry);
 	return true;
 }
@@ -6602,7 +6647,7 @@ static void GetBitPosition(idx_t row_idx, idx_t &current_byte, uint8_t &current_
 }
 static void UnsetBit(uint8_t *data, idx_t current_byte, uint8_t current_bit) {
-	data[current_byte] &= ~(1 << current_bit);
+	data[current_byte] &= ~((uint64_t)1 << current_bit);
 }
 static void NextBit(idx_t &current_byte, uint8_t &current_bit) {
@@ -9546,6 +9591,8 @@ void Exception::ThrowAsTypeWithMessage(ExceptionType type, const string &message
 		throw ParameterNotAllowedException(message);
 	case ExceptionType::PARAMETER_NOT_RESOLVED:
 		throw ParameterNotResolvedException();
+	case ExceptionType::FATAL:
+		throw FatalException(message);
 	default:
 		throw Exception(type, message);
 	}
@@ -16811,9 +16858,15 @@ string FileSystem::ConvertSeparators(const string &path) {
 }
 string FileSystem::ExtractBaseName(const string &path) {
+	if (path.empty()) {
+		return string();
+	}
 	auto normalized_path = ConvertSeparators(path);
 	auto sep = PathSeparator();
-	auto vec = StringUtil::Split(StringUtil::Split(normalized_path, sep).back(), ".");
+	auto splits = StringUtil::Split(normalized_path, sep);
+	D_ASSERT(!splits.empty());
+	auto vec = StringUtil::Split(splits.back(), ".");
+	D_ASSERT(!vec.empty());
 	return vec[0];
 }
@@ -18888,7 +18941,8 @@ namespace duckdb {
 static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
                                                             unordered_map<string, column_t> &column_map,
-                                                            bool filename_col, bool hive_partition_cols) {
+                                                            duckdb_re2::RE2 &compiled_regex, bool filename_col,
+                                                            bool hive_partition_cols) {
 	unordered_map<column_t, string> result;
 	if (filename_col) {
@@ -18899,7 +18953,7 @@ static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
 	}
 	if (hive_partition_cols) {
-		auto partitions = HivePartitioning::Parse(filename);
+		auto partitions = HivePartitioning::Parse(filename, compiled_regex);
 		for (auto &partition : partitions) {
 			auto lookup_column_id = column_map.find(partition.first);
 			if (lookup_column_id != column_map.end()) {
@@ -18937,10 +18991,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
 // 	- s3://bucket/var1=value1/bla/bla/var2=value2
 //  - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
 //  - folder/folder/folder/../var1=value1/etc/.//var2=value2
-std::map<string, string> HivePartitioning::Parse(string &filename) {
-	std::map<string, string> result;
+const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
-	string regex = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
+std::map<string, string> HivePartitioning::Parse(string &filename, duckdb_re2::RE2 &regex) {
+	std::map<string, string> result;
 	duckdb_re2::StringPiece input(filename); // Wrap a StringPiece around it
 	string var;
@@ -18951,6 +19005,11 @@ std::map<string, string> HivePartitioning::Parse(string &filename) {
 	return result;
 }
+std::map<string, string> HivePartitioning::Parse(string &filename) {
+	duckdb_re2::RE2 regex(REGEX_STRING);
+	return Parse(filename, regex);
+}
 // TODO: this can still be improved by removing the parts of filter expressions that are true for all remaining files.
 //		 currently, only expressions that cannot be evaluated during pushdown are removed.
 void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
@@ -18958,6 +19017,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
                                               bool hive_enabled, bool filename_enabled) {
 	vector<string> pruned_files;
 	vector<unique_ptr<Expression>> pruned_filters;
+	duckdb_re2::RE2 regex(REGEX_STRING);
 	if ((!filename_enabled && !hive_enabled) || filters.empty()) {
 		return;
@@ -18966,7 +19026,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
 	for (idx_t i = 0; i < files.size(); i++) {
 		auto &file = files[i];
 		bool should_prune_file = false;
-		auto known_values = GetKnownColumnValues(file, column_map, filename_enabled, hive_enabled);
+		auto known_values = GetKnownColumnValues(file, column_map, regex, filename_enabled, hive_enabled);
 		FilterCombiner combiner;
 		for (auto &filter : filters) {
@@ -19198,6 +19258,8 @@ private:
 	//! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
 	void SetFilePointer(FileHandle &handle, idx_t location);
 	idx_t GetFilePointer(FileHandle &handle);
+	vector<string> FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path);
 };
 } // namespace duckdb
@@ -20079,6 +20141,26 @@ static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bo
 	});
 }
+vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
+	vector<string> result;
+	if (FileExists(path) || IsPipe(path)) {
+		result.push_back(path);
+	} else if (!absolute_path) {
+		Value value;
+		if (opener->TryGetCurrentSetting("file_search_path", value)) {
+			auto search_paths_str = value.ToString();
+			std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
+			for (const auto &search_path : search_paths) {
+				auto joined_path = JoinPath(search_path, path);
+				if (FileExists(joined_path) || IsPipe(joined_path)) {
+					result.push_back(joined_path);
+				}
+			}
+		}
+	}
+	return result;
+}
 vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
 	if (path.empty()) {
 		return vector<string>();
@@ -20125,23 +20207,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
 	// Check if the path has a glob at all
 	if (!HasGlob(path)) {
 		// no glob: return only the file (if it exists or is a pipe)
-		vector<string> result;
-		if (FileExists(path) || IsPipe(path)) {
-			result.push_back(path);
-		} else if (!absolute_path) {
-			Value value;
-			if (opener->TryGetCurrentSetting("file_search_path", value)) {
-				auto search_paths_str = value.ToString();
-				std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
-				for (const auto &search_path : search_paths) {
-					auto joined_path = JoinPath(search_path, path);
-					if (FileExists(joined_path) || IsPipe(joined_path)) {
-						result.push_back(joined_path);
-					}
-				}
-			}
-		}
-		return result;
+		return FetchFileWithoutGlob(path, opener, absolute_path);
 	}
 	vector<string> previous_directories;
 	if (absolute_path) {
@@ -20175,7 +20241,12 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
 				}
 			}
 		}
-		if (is_last_chunk || result.empty()) {
+		if (result.empty()) {
+			// no result found that matches the glob
+			// last ditch effort: search the path as a string literal
+			return FetchFileWithoutGlob(path, opener, absolute_path);
+		}
+		if (is_last_chunk) {
 			return result;
 		}
 		previous_directories = move(result);
@@ -22524,14 +22595,16 @@ struct IntervalToStringCast {
 			if (micros < 0) {
 				// negative time: append negative sign
 				buffer[length++] = '-';
+			} else {
 				micros = -micros;
 			}
-			int64_t hour = micros / Interval::MICROS_PER_HOUR;
-			micros -= hour * Interval::MICROS_PER_HOUR;
-			int64_t min = micros / Interval::MICROS_PER_MINUTE;
-			micros -= min * Interval::MICROS_PER_MINUTE;
-			int64_t sec = micros / Interval::MICROS_PER_SEC;
-			micros -= sec * Interval::MICROS_PER_SEC;
+			int64_t hour = -(micros / Interval::MICROS_PER_HOUR);
+			micros += hour * Interval::MICROS_PER_HOUR;
+			int64_t min = -(micros / Interval::MICROS_PER_MINUTE);
+			micros += min * Interval::MICROS_PER_MINUTE;
+			int64_t sec = -(micros / Interval::MICROS_PER_SEC);
+			micros += sec * Interval::MICROS_PER_SEC;
+			micros = -micros;
 			if (hour < 10) {
 				buffer[length++] = '0';
@@ -28654,7 +28727,7 @@ template <idx_t radix_bits>
 struct RadixPartitioningConstants {
 public:
 	static constexpr const idx_t NUM_RADIX_BITS = radix_bits;
-	static constexpr const idx_t NUM_PARTITIONS = 1 << NUM_RADIX_BITS;
+	static constexpr const idx_t NUM_PARTITIONS = (idx_t)1 << NUM_RADIX_BITS;
 	static constexpr const idx_t TMP_BUF_SIZE = 8;
 public:
@@ -28672,7 +28745,7 @@ private:
 struct RadixPartitioning {
 public:
 	static idx_t NumberOfPartitions(idx_t radix_bits) {
-		return 1 << radix_bits;
+		return (idx_t)1 << radix_bits;
 	}
 	//! Partition the data in block_collection/string_heap to multiple partitions
@@ -39531,7 +39604,7 @@ public:
 namespace duckdb {
 enum class UnicodeType { INVALID, ASCII, UNICODE };
-enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
+enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
 class Utf8Proc {
 public:
@@ -50360,6 +50433,24 @@ void StringVector::AddHeapReference(Vector &vector, Vector &other) {
 	StringVector::AddBuffer(vector, other.auxiliary);
 }
+Vector &MapVector::GetKeys(Vector &vector) {
+	auto &entries = StructVector::GetEntries(vector);
+	D_ASSERT(entries.size() == 2);
+	return *entries[0];
+}
+Vector &MapVector::GetValues(Vector &vector) {
+	auto &entries = StructVector::GetEntries(vector);
+	D_ASSERT(entries.size() == 2);
+	return *entries[1];
+}
+const Vector &MapVector::GetKeys(const Vector &vector) {
+	return GetKeys((Vector &)vector);
+}
+const Vector &MapVector::GetValues(const Vector &vector) {
+	return GetValues((Vector &)vector);
+}
 vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
 	D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP);
 	if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
@@ -63217,6 +63308,16 @@ bool DistinctAggregateData::IsDistinct(idx_t index) const {
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/parallel/base_pipeline_event.hpp
+//
+//
+//===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 //                         DuckDB
 //
@@ -63290,6 +63391,22 @@ protected:
+namespace duckdb {
+//! A BasePipelineEvent is used as the basis of any event that belongs to a specific pipeline
+class BasePipelineEvent : public Event {
+public:
+	BasePipelineEvent(shared_ptr<Pipeline> pipeline);
+	BasePipelineEvent(Pipeline &pipeline);
+	//! The pipeline that this event belongs to
+	shared_ptr<Pipeline> pipeline;
+};
+} // namespace duckdb
 namespace duckdb {
 PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
@@ -63446,16 +63563,15 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
 	}
 }
-class HashAggregateFinalizeEvent : public Event {
+class HashAggregateFinalizeEvent : public BasePipelineEvent {
 public:
 	HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
 	                           Pipeline *pipeline_p)
-	    : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p) {
+	    : BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
 	}
 	const PhysicalHashAggregate &op;
 	HashAggregateGlobalState &gstate;
-	Pipeline *pipeline;
 public:
 	void Schedule() override {
@@ -64717,15 +64833,14 @@ private:
 };
 // TODO: Create tasks and run these in parallel instead of doing this all in Schedule, single threaded
-class DistinctAggregateFinalizeEvent : public Event {
+class DistinctAggregateFinalizeEvent : public BasePipelineEvent {
 public:
 	DistinctAggregateFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
-	                               Pipeline *pipeline_p, ClientContext &context)
-	    : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), context(context) {
+	                               Pipeline &pipeline_p, ClientContext &context)
+	    : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
 	}
 	const PhysicalUngroupedAggregate &op;
 	UngroupedAggregateGlobalState &gstate;
-	Pipeline *pipeline;
 	ClientContext &context;
 public:
@@ -64738,16 +64853,15 @@ public:
 	}
 };
-class DistinctCombineFinalizeEvent : public Event {
+class DistinctCombineFinalizeEvent : public BasePipelineEvent {
 public:
 	DistinctCombineFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
-	                             Pipeline *pipeline_p, ClientContext &client)
-	    : Event(pipeline_p->executor), op(op_p), gstate(gstate_p), pipeline(pipeline_p), client(client) {
+	                             Pipeline &pipeline_p, ClientContext &client)
+	    : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
 	}
 	const PhysicalUngroupedAggregate &op;
 	UngroupedAggregateGlobalState &gstate;
-	Pipeline *pipeline;
 	ClientContext &client;
 public:
@@ -64763,7 +64877,7 @@ public:
 		SetTasks(move(tasks));
 		//! Now that all tables are combined, it's time to do the distinct aggregations
-		auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, pipeline, client);
+		auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
 		this->InsertEvent(move(new_event));
 	}
 };
@@ -64792,12 +64906,12 @@ SinkFinalizeType PhysicalUngroupedAggregate::FinalizeDistinct(Pipeline &pipeline
 		}
 	}
 	if (any_partitioned) {
-		auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, &pipeline, context);
+		auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
 		event.InsertEvent(move(new_event));
 	} else {
 		//! Hashtables aren't partitioned, they dont need to be joined first
 		//! So we can compute the aggregate already
-		auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, &pipeline, context);
+		auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
 		event.InsertEvent(move(new_event));
 	}
 	return SinkFinalizeType::READY;
@@ -66543,19 +66657,18 @@ private:
 	WindowGlobalHashGroup &hash_group;
 };
-class WindowMergeEvent : public Event {
+class WindowMergeEvent : public BasePipelineEvent {
 public:
 	WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
-	    : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p), hash_group(hash_group_p) {
+	    : BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
 	}
 	WindowGlobalSinkState &gstate;
-	Pipeline &pipeline;
 	WindowGlobalHashGroup &hash_group;
 public:
 	void Schedule() override {
-		auto &context = pipeline.GetClientContext();
+		auto &context = pipeline->GetClientContext();
 		// Schedule tasks equal to the number of threads, which will each merge multiple partitions
 		auto &ts = TaskScheduler::GetScheduler(context);
@@ -66570,7 +66683,7 @@ public:
 	void FinishEvent() override {
 		hash_group.global_sort->CompleteMergeRound(true);
-		CreateMergeTasks(pipeline, *this, gstate, hash_group);
+		CreateMergeTasks(*pipeline, *this, gstate, hash_group);
 	}
 	static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
@@ -67979,6 +68092,11 @@ public:
 private:
 	static const vector<string> PathComponents();
+	//! For tagged releases we use the tag, else we use the git commit hash
+	static const string GetVersionDirectoryName();
+	//! Version tags occur with and without 'v', tag in extension path is always with 'v'
+	static const string NormalizeVersionTag(const string &version_tag);
+	static bool IsRelease(const string &version_tag);
 private:
 	static ExtensionLoadResult LoadExtensionInternal(DuckDB &db, const std::string &extension, bool initial_load);
@@ -70905,18 +71023,17 @@ private:
 	bool parallel;
 };
-class HashJoinFinalizeEvent : public Event {
+class HashJoinFinalizeEvent : public BasePipelineEvent {
 public:
 	HashJoinFinalizeEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink)
-	    : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink) {
+	    : BasePipelineEvent(pipeline_p), sink(sink) {
 	}
-	Pipeline &pipeline;
 	HashJoinGlobalSinkState &sink;
 public:
 	void Schedule() override {
-		auto &context = pipeline.GetClientContext();
+		auto &context = pipeline->GetClientContext();
 		auto parallel_construct_count =
 		    context.config.verify_parallelism ? STANDARD_VECTOR_SIZE : PARALLEL_CONSTRUCT_COUNT;
@@ -70983,20 +71100,19 @@ private:
 	JoinHashTable &local_ht;
 };
-class HashJoinPartitionEvent : public Event {
+class HashJoinPartitionEvent : public BasePipelineEvent {
 public:
 	HashJoinPartitionEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink,
 	                       vector<unique_ptr<JoinHashTable>> &local_hts)
-	    : Event(pipeline_p.executor), pipeline(pipeline_p), sink(sink), local_hts(local_hts) {
+	    : BasePipelineEvent(pipeline_p), sink(sink), local_hts(local_hts) {
 	}
-	Pipeline &pipeline;
 	HashJoinGlobalSinkState &sink;
 	vector<unique_ptr<JoinHashTable>> &local_hts;
 public:
 	void Schedule() override {
-		auto &context = pipeline.GetClientContext();
+		auto &context = pipeline->GetClientContext();
 		vector<unique_ptr<Task>> partition_tasks;
 		partition_tasks.reserve(local_hts.size());
 		for (auto &local_ht : local_hts) {
@@ -71009,7 +71125,7 @@ public:
 	void FinishEvent() override {
 		local_hts.clear();
 		sink.hash_table->PrepareExternalFinalize();
-		sink.ScheduleFinalize(pipeline, *this);
+		sink.ScheduleFinalize(*pipeline, *this);
 	}
 };
@@ -74713,21 +74829,20 @@ private:
 	GlobalSortedTable &table;
 };
-class RangeJoinMergeEvent : public Event {
+class RangeJoinMergeEvent : public BasePipelineEvent {
 public:
 	using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
 public:
 	RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
-	    : Event(pipeline_p.executor), table(table_p), pipeline(pipeline_p) {
+	    : BasePipelineEvent(pipeline_p), table(table_p) {
 	}
 	GlobalSortedTable &table;
-	Pipeline &pipeline;
 public:
 	void Schedule() override {
-		auto &context = pipeline.GetClientContext();
+		auto &context = pipeline->GetClientContext();
 		// Schedule tasks equal to the number of threads, which will each merge multiple partitions
 		auto &ts = TaskScheduler::GetScheduler(context);
@@ -74746,7 +74861,7 @@ public:
 		global_sort_state.CompleteMergeRound(true);
 		if (global_sort_state.sorted_blocks.size() > 1) {
 			// Multiple blocks remaining: Schedule the next round
-			table.ScheduleMergeTasks(pipeline, *this);
+			table.ScheduleMergeTasks(*pipeline, *this);
 		}
 	}
 };
@@ -75134,18 +75249,17 @@ private:
 	OrderGlobalState &state;
 };
-class OrderMergeEvent : public Event {
+class OrderMergeEvent : public BasePipelineEvent {
 public:
 	OrderMergeEvent(OrderGlobalState &gstate_p, Pipeline &pipeline_p)
-	    : Event(pipeline_p.executor), gstate(gstate_p), pipeline(pipeline_p) {
+	    : BasePipelineEvent(pipeline_p), gstate(gstate_p) {
 	}
 	OrderGlobalState &gstate;
-	Pipeline &pipeline;
 public:
 	void Schedule() override {
-		auto &context = pipeline.GetClientContext();
+		auto &context = pipeline->GetClientContext();
 		// Schedule tasks equal to the number of threads, which will each merge multiple partitions
 		auto &ts = TaskScheduler::GetScheduler(context);
@@ -75164,7 +75278,7 @@ public:
 		global_sort_state.CompleteMergeRound();
 		if (global_sort_state.sorted_blocks.size() > 1) {
 			// Multiple blocks remaining: Schedule the next round
-			PhysicalOrder::ScheduleMergeTasks(pipeline, *this, gstate);
+			PhysicalOrder::ScheduleMergeTasks(*pipeline, *this, gstate);
 		}
 	}
 };
@@ -80064,10 +80178,17 @@ void PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk, G
 		return;
 	}
+	// convert virtual column ids to storage column ids
+	vector<column_t> storage_ids;
+	for (auto &column_id : column_ids) {
+		D_ASSERT(column_id < table.columns.size());
+		storage_ids.push_back(table.columns[column_id].StorageOid());
+	}
 	unique_ptr<Index> index;
 	switch (info->index_type) {
 	case IndexType::ART: {
-		index = make_unique<ART>(column_ids, unbound_expressions, info->constraint_type, *context.client.db);
+		index = make_unique<ART>(storage_ids, unbound_expressions, info->constraint_type, *context.client.db);
 		break;
 	}
 	default:
@@ -80372,11 +80493,10 @@ unique_ptr<GlobalSinkState> PhysicalCreateTableAs::GetGlobalSinkState(ClientCont
 SinkResultType PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p,
                                            DataChunk &input) const {
 	auto &sink = (CreateTableAsGlobalState &)state;
-	if (sink.table) {
-		lock_guard<mutex> client_guard(sink.append_lock);
-		sink.table->storage->Append(*sink.table, context.client, input);
-		sink.inserted_count += input.size();
-	}
+	D_ASSERT(sink.table);
+	lock_guard<mutex> client_guard(sink.append_lock);
+	sink.table->storage->Append(*sink.table, context.client, input);
+	sink.inserted_count += input.size();
 	return SinkResultType::NEED_MORE_INPUT;
 }
@@ -80786,6 +80906,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
 void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
 	op_state.reset();
 	sink_state.reset();
+	pipelines.clear();
 	// recursive CTE
 	state.SetPipelineSource(current, this);
@@ -81085,7 +81206,7 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(Allocator &allocator, Buffe
 		total_required_bits += group_bits;
 	}
 	// the total amount of groups we allocate space for is 2^required_bits
-	total_groups = 1 << total_required_bits;
+	total_groups = (uint64_t)1 << total_required_bits;
 	// we don't need to store the groups in a perfect hash table, since the group keys can be deduced by their location
 	grouping_columns = group_types_p.size();
 	layout.Initialize(move(aggregate_objects_p));
@@ -81269,7 +81390,7 @@ static void ReconstructGroupVectorTemplated(uint32_t group_values[], Value &min,
 static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t required_bits, idx_t shift,
                                    idx_t entry_count, Vector &result) {
 	// construct the mask for this entry
-	idx_t mask = (1 << required_bits) - 1;
+	idx_t mask = ((uint64_t)1 << required_bits) - 1;
 	switch (result.GetType().InternalType()) {
 	case PhysicalType::INT8:
 		ReconstructGroupVectorTemplated<int8_t>(group_values, min, mask, shift, entry_count, result);
@@ -85516,7 +85637,7 @@ void RadixPartitionedHashTable::SetGroupingValues() {
 		for (idx_t i = 0; i < grouping.size(); i++) {
 			if (grouping_set.find(grouping[i]) == grouping_set.end()) {
 				// we don't group on this value!
-				grouping_value += 1 << (grouping.size() - (i + 1));
+				grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
 			}
 		}
 		grouping_values.push_back(Value::BIGINT(grouping_value));
@@ -91074,7 +91195,21 @@ struct ModeIncluded {
 	const idx_t bias;
 };
-template <typename KEY_TYPE>
+struct ModeAssignmentStandard {
+	template <class INPUT_TYPE, class RESULT_TYPE>
+	static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
+		return RESULT_TYPE(input);
+	}
+};
+struct ModeAssignmentString {
+	template <class INPUT_TYPE, class RESULT_TYPE>
+	static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
+		return StringVector::AddString(result, input);
+	}
+};
+template <typename KEY_TYPE, typename ASSIGN_OP>
 struct ModeFunction {
 	template <class STATE>
 	static void Initialize(STATE *state) {
@@ -91187,7 +91322,7 @@ struct ModeFunction {
 		}
 		if (state->valid) {
-			rdata[rid] = RESULT_TYPE(*state->mode);
+			rdata[rid] = ASSIGN_OP::template Assign<INPUT_TYPE, RESULT_TYPE>(result, *state->mode);
 		} else {
 			rmask.Set(rid, false);
 		}
@@ -91203,10 +91338,10 @@ struct ModeFunction {
 	}
 };
-template <typename INPUT_TYPE, typename KEY_TYPE>
+template <typename INPUT_TYPE, typename KEY_TYPE, typename ASSIGN_OP = ModeAssignmentStandard>
 AggregateFunction GetTypedModeFunction(const LogicalType &type) {
 	using STATE = ModeState<KEY_TYPE>;
-	using OP = ModeFunction<KEY_TYPE>;
+	using OP = ModeFunction<KEY_TYPE, ASSIGN_OP>;
 	auto func = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, INPUT_TYPE, OP>(type, type);
 	func.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, INPUT_TYPE, OP>;
 	return func;
@@ -91242,7 +91377,7 @@ AggregateFunction GetModeAggregate(const LogicalType &type) {
 		return GetTypedModeFunction<interval_t, interval_t>(type);
 	case PhysicalType::VARCHAR:
-		return GetTypedModeFunction<string_t, string>(type);
+		return GetTypedModeFunction<string_t, string, ModeAssignmentString>(type);
 	default:
 		throw NotImplementedException("Unimplemented mode aggregate");
@@ -105407,16 +105542,21 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
 	auto &map = args.data[0];
 	auto &key = args.data[1];
-	UnifiedVectorFormat offset_data;
+	UnifiedVectorFormat map_keys_data;
+	UnifiedVectorFormat key_data;
-	auto &children = StructVector::GetEntries(map);
+	auto &map_keys = MapVector::GetKeys(map);
+	auto &map_values = MapVector::GetValues(map);
+	map_keys.ToUnifiedFormat(args.size(), map_keys_data);
+	key.ToUnifiedFormat(args.size(), key_data);
-	children[0]->ToUnifiedFormat(args.size(), offset_data);
 	for (idx_t row = 0; row < args.size(); row++) {
-		idx_t row_index = offset_data.sel->get_index(row);
-		auto key_value = key.GetValue(row_index);
-		auto offsets = ListVector::Search(*children[0], key_value, offset_data.sel->get_index(row));
-		auto values = ListVector::GetValuesFromOffsets(*children[1], offsets);
+		idx_t row_index = map_keys_data.sel->get_index(row);
+		idx_t key_index = key_data.sel->get_index(row);
+		auto key_value = key.GetValue(key_index);
+		auto offsets = ListVector::Search(map_keys, key_value, row_index);
+		auto values = ListVector::GetValuesFromOffsets(map_values, offsets);
 		FillResult(values, result, row);
 	}
@@ -108311,6 +108451,24 @@ interval_t DivideOperator::Operation(interval_t left, int64_t right) {
 	return left;
 }
+struct BinaryNumericDivideWrapper {
+	template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
+	static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
+		if (left == NumericLimits<LEFT_TYPE>::Minimum() && right == -1) {
+			throw OutOfRangeException("Overflow in division of %d / %d", left, right);
+		} else if (right == 0) {
+			mask.SetInvalid(idx);
+			return left;
+		} else {
+			return OP::template Operation<LEFT_TYPE, RIGHT_TYPE, RESULT_TYPE>(left, right);
+		}
+	}
+	static bool AddsNulls() {
+		return true;
+	}
+};
 struct BinaryZeroIsNullWrapper {
 	template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
 	static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
@@ -108352,13 +108510,13 @@ template <class OP>
 static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
 	switch (type.id()) {
 	case LogicalTypeId::TINYINT:
-		return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP>;
+		return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP, BinaryNumericDivideWrapper>;
 	case LogicalTypeId::SMALLINT:
-		return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP>;
+		return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP, BinaryNumericDivideWrapper>;
 	case LogicalTypeId::INTEGER:
-		return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP>;
+		return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP, BinaryNumericDivideWrapper>;
 	case LogicalTypeId::BIGINT:
-		return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP>;
+		return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP, BinaryNumericDivideWrapper>;
 	case LogicalTypeId::UTINYINT:
 		return BinaryScalarFunctionIgnoreZero<uint8_t, uint8_t, uint8_t, OP>;
 	case LogicalTypeId::USMALLINT:
@@ -114806,11 +114964,22 @@ static void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vect
 // current_schemas
 static void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) {
+	if (!input.AllConstant()) {
+		throw NotImplementedException("current_schemas requires a constant input");
+	}
+	if (ConstantVector::IsNull(input.data[0])) {
+		result.SetVectorType(VectorType::CONSTANT_VECTOR);
+		ConstantVector::SetNull(result, true);
+		return;
+	}
+	auto implicit_schemas = *ConstantVector::GetData<bool>(input.data[0]);
 	vector<Value> schema_list;
-	vector<string> search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path->Get();
+	auto &catalog_search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path;
+	vector<string> search_path = implicit_schemas ? catalog_search_path->Get() : catalog_search_path->GetSetPaths();
 	std::transform(search_path.begin(), search_path.end(), std::back_inserter(schema_list),
 	               [](const string &s) -> Value { return Value(s); });
-	auto val = Value::LIST(schema_list);
+	auto val = Value::LIST(LogicalType::VARCHAR, schema_list);
 	result.Reference(val);
 }
@@ -115109,8 +115278,8 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
 struct ArrowScanGlobalState : public GlobalTableFunctionState {
 	unique_ptr<ArrowArrayStreamWrapper> stream;
 	mutex main_mutex;
-	bool ready = false;
 	idx_t max_threads = 1;
+	bool done = false;
 	idx_t MaxThreads() const override {
 		return max_threads;
@@ -115398,6 +115567,9 @@ idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const Func
 bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
                                 ArrowScanGlobalState &parallel_state) {
 	lock_guard<mutex> parallel_lock(parallel_state.main_mutex);
+	if (parallel_state.done) {
+		return false;
+	}
 	state.chunk_offset = 0;
 	auto current_chunk = parallel_state.stream->GetNextChunk();
@@ -115407,6 +115579,7 @@ bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind
 	state.chunk = move(current_chunk);
 	//! have we run out of chunks? we are done
 	if (!state.chunk->arrow_array.release) {
+		parallel_state.done = true;
 		return false;
 	}
 	return true;
@@ -117808,6 +117981,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
 	table_function.named_parameters["skip"] = LogicalType::BIGINT;
 	table_function.named_parameters["max_line_size"] = LogicalType::VARCHAR;
 	table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
+	table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
 }
 double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
@@ -121638,8 +121812,7 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
 		// we don't emit any statistics for tables that have outstanding transaction-local data
 		return nullptr;
 	}
-	auto storage_idx = GetStorageIndex(*bind_data.table, column_id);
-	return bind_data.table->storage->GetStatistics(context, storage_idx);
+	return bind_data.table->GetStatistics(context, column_id);
 }
 static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
@@ -123211,7 +123384,7 @@ bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row) {
 	}
 	idx_t entry_idx = row / 64;
 	idx_t idx_in_entry = row % 64;
-	return validity[entry_idx] & (1 << idx_in_entry);
+	return validity[entry_idx] & ((idx_t)1 << idx_in_entry);
 }
 void duckdb_validity_set_row_validity(uint64_t *validity, idx_t row, bool valid) {
@@ -123228,7 +123401,7 @@ void duckdb_validity_set_row_invalid(uint64_t *validity, idx_t row) {
 	}
 	idx_t entry_idx = row / 64;
 	idx_t idx_in_entry = row % 64;
-	validity[entry_idx] &= ~(1 << idx_in_entry);
+	validity[entry_idx] &= ~((uint64_t)1 << idx_in_entry);
 }
 void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
@@ -123237,7 +123410,7 @@ void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
 	}
 	idx_t entry_idx = row / 64;
 	idx_t idx_in_entry = row % 64;
-	validity[entry_idx] |= 1 << idx_in_entry;
+	validity[entry_idx] |= (uint64_t)1 << idx_in_entry;
 }
@@ -126420,6 +126593,11 @@ PendingExecutionResult ClientContext::ExecuteTaskInternal(ClientContextLock &loc
 			query_progress = active_query->progress_bar->GetCurrentPercentage();
 		}
 		return result;
+	} catch (FatalException &ex) {
+		// fatal exceptions invalidate the entire database
+		result.SetError(PreservedError(ex));
+		auto &db = DatabaseInstance::GetDatabase(*this);
+		db.Invalidate();
 	} catch (const Exception &ex) {
 		result.SetError(PreservedError(ex));
 	} catch (std::exception &ex) {
@@ -126639,9 +126817,19 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
 		case StatementType::INSERT_STATEMENT:
 		case StatementType::DELETE_STATEMENT:
 		case StatementType::UPDATE_STATEMENT: {
-			auto sql = statement->ToString();
 			Parser parser;
-			parser.ParseQuery(sql);
+			PreservedError error;
+			try {
+				parser.ParseQuery(statement->ToString());
+			} catch (const Exception &ex) {
+				error = PreservedError(ex);
+			} catch (std::exception &ex) {
+				error = PreservedError(ex);
+			}
+			if (error) {
+				// error in verifying query
+				return make_unique<PendingQueryResult>(error);
+			}
 			statement = move(parser.statements[0]);
 			break;
 		}
@@ -137289,8 +137477,27 @@ namespace duckdb {
 //===--------------------------------------------------------------------===//
 // Install Extension
 //===--------------------------------------------------------------------===//
+const string ExtensionHelper::NormalizeVersionTag(const string &version_tag) {
+	if (version_tag.length() > 0 && version_tag[0] != 'v') {
+		return "v" + version_tag;
+	}
+	return version_tag;
+}
+bool ExtensionHelper::IsRelease(const string &version_tag) {
+	return !StringUtil::Contains(version_tag, "-dev");
+}
+const string ExtensionHelper::GetVersionDirectoryName() {
+	if (IsRelease(DuckDB::LibraryVersion())) {
+		return NormalizeVersionTag(DuckDB::LibraryVersion());
+	} else {
+		return DuckDB::SourceID();
+	}
+}
 const vector<string> ExtensionHelper::PathComponents() {
-	return vector<string> {".duckdb", "extensions", DuckDB::SourceID(), DuckDB::Platform()};
+	return vector<string> {".duckdb", "extensions", GetVersionDirectoryName(), DuckDB::Platform()};
 }
 string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
@@ -137363,7 +137570,7 @@ void ExtensionHelper::InstallExtension(ClientContext &context, const string &ext
 		extension_name = "";
 	}
-	auto url = StringUtil::Replace(url_template, "${REVISION}", DuckDB::SourceID());
+	auto url = StringUtil::Replace(url_template, "${REVISION}", GetVersionDirectoryName());
 	url = StringUtil::Replace(url, "${PLATFORM}", DuckDB::Platform());
 	url = StringUtil::Replace(url, "${NAME}", extension_name);
@@ -142541,9 +142748,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
 			// Get HLL stats here
 			auto actual_binding = relation_column_to_original_column[key];
-			// sometimes base stats is null (test_709.test) returns null for base stats while
-			// there is still a catalog table. Anybody know anything about this?
-			auto base_stats = catalog_table->storage->GetStatistics(context, actual_binding.column_index);
+			auto base_stats = catalog_table->GetStatistics(context, actual_binding.column_index);
 			if (base_stats) {
 				count = base_stats->GetDistinctCount();
 			}
@@ -143239,6 +143444,7 @@ private:
 namespace duckdb {
 class DeliminatorPlanUpdater : LogicalOperatorVisitor {
@@ -143266,7 +143472,15 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
 			    cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
 				continue;
 			}
-			auto &colref = (BoundColumnRefExpression &)*cond.right;
+			Expression *rhs = cond.right.get();
+			while (rhs->type == ExpressionType::OPERATOR_CAST) {
+				auto &cast = (BoundCastExpression &)*rhs;
+				rhs = cast.child.get();
+			}
+			if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
+				throw InternalException("Erorr in deliminator: expected a bound column reference");
+			}
+			auto &colref = (BoundColumnRefExpression &)*rhs;
 			if (projection_map.find(colref.binding) != projection_map.end()) {
 				// value on the right is a projection of removed DelimGet
 				for (idx_t i = 0; i < decs->size(); i++) {
@@ -144414,7 +144628,10 @@ FilterResult FilterCombiner::AddBoundComparisonFilter(Expression *expr) {
 		auto node = GetNode(left_is_scalar ? comparison.right.get() : comparison.left.get());
 		idx_t equivalence_set = GetEquivalenceSet(node);
 		auto scalar = left_is_scalar ? comparison.left.get() : comparison.right.get();
-		auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
+		Value constant_value;
+		if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
+			return FilterResult::UNSATISFIABLE;
+		}
 		if (constant_value.IsNull()) {
 			// comparisons with null are always null (i.e. will never result in rows)
 			return FilterResult::UNSATISFIABLE;
@@ -144495,7 +144712,11 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
 	}
 	if (expr->IsFoldable()) {
 		// scalar condition, evaluate it
-		auto result = ExpressionExecutor::EvaluateScalar(*expr).CastAs(LogicalType::BOOLEAN);
+		Value result;
+		if (!ExpressionExecutor::TryEvaluateScalar(*expr, result)) {
+			return FilterResult::UNSUPPORTED;
+		}
+		result = result.CastAs(LogicalType::BOOLEAN);
 		// check if the filter passes
 		if (result.IsNull() || !BooleanValue::Get(result)) {
 			// the filter does not pass the scalar test, create an empty result
@@ -144519,7 +144740,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
 			if (lower_is_scalar) {
 				auto scalar = comparison.lower.get();
-				auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
+				Value constant_value;
+				if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
+					return FilterResult::UNSUPPORTED;
+				}
 				// create the ExpressionValueInformation
 				ExpressionValueInformation info;
@@ -144552,7 +144776,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
 			if (upper_is_scalar) {
 				auto scalar = comparison.upper.get();
-				auto constant_value = ExpressionExecutor::EvaluateScalar(*scalar);
+				Value constant_value;
+				if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
+					return FilterResult::UNSUPPORTED;
+				}
 				// create the ExpressionValueInformation
 				ExpressionValueInformation info;
@@ -145464,7 +145691,6 @@ unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &e
 	// IN clause with many children: try to generate a mark join that replaces this IN expression
 	// we can only do this if the expressions in the expression list are scalar
 	for (idx_t i = 1; i < expr.children.size(); i++) {
-		D_ASSERT(expr.children[i]->return_type == in_type);
 		if (!expr.children[i]->IsFoldable()) {
 			// non-scalar expression
 			all_scalar = false;
@@ -147903,21 +148129,35 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical
 	FilterPushdown child_pushdown(optimizer);
 	for (idx_t i = 0; i < filters.size(); i++) {
 		auto &f = *filters[i];
-		// check if any aggregate or GROUPING functions are in the set
-		if (f.bindings.find(aggr.aggregate_index) == f.bindings.end() &&
-		    f.bindings.find(aggr.groupings_index) == f.bindings.end()) {
-			// no aggregate! we can push this down
-			// rewrite any group bindings within the filter
-			f.filter = ReplaceGroupBindings(aggr, move(f.filter));
-			// add the filter to the child node
-			if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
-				// filter statically evaluates to false, strip tree
-				return make_unique<LogicalEmptyResult>(move(op));
+		if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
+			// filter on aggregate: cannot pushdown
+			continue;
+		}
+		if (f.bindings.find(aggr.groupings_index) != f.bindings.end()) {
+			// filter on GROUPINGS function: cannot pushdown
+			continue;
+		}
+		// if there are any empty grouping sets, we cannot push down filters
+		bool has_empty_grouping_sets = false;
+		for (auto &grp : aggr.grouping_sets) {
+			if (grp.empty()) {
+				has_empty_grouping_sets = true;
 			}
-			// erase the filter from here
-			filters.erase(filters.begin() + i);
-			i--;
 		}
+		if (has_empty_grouping_sets) {
+			continue;
+		}
+		// no aggregate! we can push this down
+		// rewrite any group bindings within the filter
+		f.filter = ReplaceGroupBindings(aggr, move(f.filter));
+		// add the filter to the child node
+		if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
+			// filter statically evaluates to false, strip tree
+			return make_unique<LogicalEmptyResult>(move(op));
+		}
+		// erase the filter from here
+		filters.erase(filters.begin() + i);
+		i--;
 	}
 	child_pushdown.GenerateFilters();
@@ -152623,6 +152863,19 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
 } // namespace duckdb
+namespace duckdb {
+BasePipelineEvent::BasePipelineEvent(shared_ptr<Pipeline> pipeline_p)
+    : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
+}
+BasePipelineEvent::BasePipelineEvent(Pipeline &pipeline_p)
+    : Event(pipeline_p.executor), pipeline(pipeline_p.shared_from_this()) {
+}
+} // namespace duckdb
@@ -152742,16 +152995,13 @@ public:
 namespace duckdb {
-class PipelineEvent : public Event {
+//! A PipelineEvent is responsible for scheduling a pipeline
+class PipelineEvent : public BasePipelineEvent {
 public:
 	PipelineEvent(shared_ptr<Pipeline> pipeline);
-	//! The pipeline that this event belongs to
-	shared_ptr<Pipeline> pipeline;
 public:
 	void Schedule() override;
 	void FinishEvent() override;
@@ -152879,17 +153129,13 @@ private:
 namespace duckdb {
 class Executor;
-class PipelineFinishEvent : public Event {
+class PipelineFinishEvent : public BasePipelineEvent {
 public:
 	PipelineFinishEvent(shared_ptr<Pipeline> pipeline);
-	//! The pipeline that this event belongs to
-	shared_ptr<Pipeline> pipeline;
 public:
 	void Schedule() override;
 	void FinishEvent() override;
@@ -152916,6 +153162,9 @@ Executor &Executor::Get(ClientContext &context) {
 void Executor::AddEvent(shared_ptr<Event> event) {
 	lock_guard<mutex> elock(executor_lock);
+	if (cancelled) {
+		return;
+	}
 	events.push_back(move(event));
 }
@@ -153219,6 +153468,7 @@ void Executor::CancelTasks() {
 	vector<weak_ptr<Pipeline>> weak_references;
 	{
 		lock_guard<mutex> elock(executor_lock);
+		cancelled = true;
 		weak_references.reserve(pipelines.size());
 		for (auto &pipeline : pipelines) {
 			weak_references.push_back(weak_ptr<Pipeline>(pipeline));
@@ -153295,10 +153545,10 @@ PendingExecutionResult Executor::ExecuteTask() {
 	lock_guard<mutex> elock(executor_lock);
 	pipelines.clear();
 	NextExecutor();
-	if (!exceptions.empty()) { // LCOV_EXCL_START
+	if (HasError()) { // LCOV_EXCL_START
 		// an exception has occurred executing one of the pipelines
 		execution_result = PendingExecutionResult::EXECUTION_ERROR;
-		ThrowExceptionInternal();
+		ThrowException();
 	} // LCOV_EXCL_STOP
 	execution_result = PendingExecutionResult::RESULT_READY;
 	return execution_result;
@@ -153307,6 +153557,7 @@ PendingExecutionResult Executor::ExecuteTask() {
 void Executor::Reset() {
 	lock_guard<mutex> elock(executor_lock);
 	physical_plan = nullptr;
+	cancelled = false;
 	owned_plan.reset();
 	root_executor.reset();
 	root_pipelines.clear();
@@ -153343,7 +153594,7 @@ vector<LogicalType> Executor::GetTypes() {
 }
 void Executor::PushError(PreservedError exception) {
-	lock_guard<mutex> elock(executor_lock);
+	lock_guard<mutex> elock(error_lock);
 	// interrupt execution of any other pipelines that belong to this executor
 	context.interrupted = true;
 	// push the exception onto the stack
@@ -153351,20 +153602,16 @@ void Executor::PushError(PreservedError exception) {
 }
 bool Executor::HasError() {
-	lock_guard<mutex> elock(executor_lock);
+	lock_guard<mutex> elock(error_lock);
 	return !exceptions.empty();
 }
 void Executor::ThrowException() {
-	lock_guard<mutex> elock(executor_lock);
-	ThrowExceptionInternal();
-}
-void Executor::ThrowExceptionInternal() { // LCOV_EXCL_START
+	lock_guard<mutex> elock(error_lock);
 	D_ASSERT(!exceptions.empty());
 	auto &entry = exceptions[0];
 	entry.Throw();
-} // LCOV_EXCL_STOP
+}
 void Executor::Flush(ThreadContext &tcontext) {
 	profiler->Flush(tcontext.profiler);
@@ -153629,6 +153876,9 @@ void Pipeline::Ready() {
 }
 void Pipeline::Finalize(Event &event) {
+	if (executor.HasError()) {
+		return;
+	}
 	D_ASSERT(ready);
 	try {
 		auto sink_state = sink->Finalize(*this, event, executor.context, *sink->sink_state);
@@ -153739,16 +153989,25 @@ void PipelineCompleteEvent::FinalizeFinish() {
 } // namespace duckdb
 namespace duckdb {
-PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p)
-    : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
+PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
 }
 void PipelineEvent::Schedule() {
 	auto event = shared_from_this();
-	pipeline->Schedule(event);
-	D_ASSERT(total_tasks > 0);
+	auto &executor = pipeline->executor;
+	try {
+		pipeline->Schedule(event);
+		D_ASSERT(total_tasks > 0);
+	} catch (Exception &ex) {
+		executor.PushError(PreservedError(ex));
+	} catch (std::exception &ex) {
+		executor.PushError(PreservedError(ex));
+	} catch (...) { // LCOV_EXCL_START
+		executor.PushError(PreservedError("Unknown exception in Finalize!"));
+	} // LCOV_EXCL_STOP
 }
 void PipelineEvent::FinishEvent() {
@@ -154131,8 +154390,7 @@ void PipelineExecutor::EndOperator(PhysicalOperator *op, DataChunk *chunk) {
 namespace duckdb {
-PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p)
-    : Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
+PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
 }
 void PipelineFinishEvent::Schedule() {
@@ -167667,7 +167925,7 @@ string QueryNode::ResultModifiersToString() const {
 		} else if (modifier.type == ResultModifierType::LIMIT_PERCENT_MODIFIER) {
 			auto &limit_p_modifier = (LimitPercentModifier &)modifier;
 			if (limit_p_modifier.limit) {
-				result += " LIMIT " + limit_p_modifier.limit->ToString() + " %";
+				result += " LIMIT (" + limit_p_modifier.limit->ToString() + ") %";
 			}
 			if (limit_p_modifier.offset) {
 				result += " OFFSET " + limit_p_modifier.offset->ToString();
@@ -175139,6 +175397,8 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
 			// we didn't bind columns, try again in children
 			return BindResult(error);
 		}
+	} else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
+		return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
 	}
 	if (!filter_error.empty()) {
 		return BindResult(filter_error);
@@ -175146,8 +175406,9 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
 	if (aggr.filter) {
 		auto &child = (BoundExpression &)*aggr.filter;
-		bound_filter = move(child.expr);
+		bound_filter = BoundCastExpression::AddCastToType(move(child.expr), LogicalType::BOOLEAN);
 	}
 	// all children bound successfully
 	// extract the children and types
 	vector<LogicalType> types;
@@ -176300,7 +176561,7 @@ BindResult ExpressionBinder::BindMacro(FunctionExpression &function, ScalarMacro
 	string error =
 	    MacroFunction::ValidateArguments(*macro_func->function, macro_func->name, function, positionals, defaults);
 	if (!error.empty()) {
-		return BindResult(binder.FormatError(*expr->get(), error));
+		throw BinderException(binder.FormatError(*expr->get(), error));
 	}
 	// create a MacroBinding to bind this macro's parameters to its arguments
@@ -177323,10 +177584,13 @@ public:
 public:
 	unique_ptr<Expression> Bind(unique_ptr<ParsedExpression> expr);
-	idx_t MaxCount() {
+	idx_t MaxCount() const {
 		return max_count;
 	}
+	bool HasExtraList() const {
+		return extra_list;
+	}
 	unique_ptr<Expression> CreateExtraReference(unique_ptr<ParsedExpression> expr);
 private:
@@ -177368,6 +177632,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
                                              Value &delimiter_value) {
 	auto new_binder = Binder::CreateBinder(context, this, true);
 	if (delimiter->HasSubquery()) {
+		if (!order_binder.HasExtraList()) {
+			throw BinderException("Subquery in LIMIT/OFFSET not supported in set operation");
+		}
 		return order_binder.CreateExtraReference(move(delimiter));
 	}
 	ExpressionBinder expr_binder(*new_binder, context);
@@ -177378,6 +177645,8 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
 		delimiter_value = ExpressionExecutor::EvaluateScalar(*expr).CastAs(type);
 		return nullptr;
 	}
+	// move any correlated columns to this binder
+	MoveCorrelatedExpressions(*new_binder);
 	return expr;
 }
@@ -179981,11 +180250,13 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
 		BindDefaultValues(base.columns, result->bound_defaults);
 	}
+	idx_t regular_column_count = 0;
 	// bind collations to detect any unsupported collation errors
 	for (auto &column : base.columns) {
 		if (column.Generated()) {
 			continue;
 		}
+		regular_column_count++;
 		if (column.Type().id() == LogicalTypeId::VARCHAR) {
 			ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
 		}
@@ -179997,6 +180268,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
 			result->dependencies.insert(type_dependency);
 		}
 	}
+	if (regular_column_count == 0) {
+		throw BinderException("Creating a table without physical (non-generated) columns is not supported");
+	}
 	properties.allow_stream_result = false;
 	return result;
 }
@@ -180424,6 +180698,13 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
 		info->schema = table->schema->name;
 		info->table = table->name;
+		// We can not export generated columns
+		for (auto &col : table->columns) {
+			if (!col.Generated()) {
+				info->select_list.push_back(col.GetName());
+			}
+		}
 		exported_data.table_name = info->table;
 		exported_data.schema_name = info->schema;
 		exported_data.file_path = info->file_path;
@@ -180669,7 +180950,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
 	}
 	// parse select statement and add to logical plan
-	auto root_select = Bind(*stmt.select_statement);
+	auto select_binder = Binder::CreateBinder(context, this);
+	auto root_select = select_binder->Bind(*stmt.select_statement);
+	MoveCorrelatedExpressions(*select_binder);
 	CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
 	                               table->name.c_str());
@@ -181951,6 +182235,18 @@ string Binder::RetrieveUsingBinding(Binder &current_binder, UsingColumnSet *curr
 	return binding;
 }
+static vector<string> RemoveDuplicateUsingColumns(const vector<string> &using_columns) {
+	vector<string> result;
+	case_insensitive_set_t handled_columns;
+	for (auto &using_column : using_columns) {
+		if (handled_columns.find(using_column) == handled_columns.end()) {
+			handled_columns.insert(using_column);
+			result.push_back(using_column);
+		}
+	}
+	return result;
+}
 unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
 	auto result = make_unique<BoundJoinRef>();
 	result->left_binder = Binder::CreateBinder(context, this);
@@ -182020,6 +182316,8 @@ unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
 		D_ASSERT(!result->condition);
 		extra_using_columns = ref.using_columns;
 	}
+	extra_using_columns = RemoveDuplicateUsingColumns(extra_using_columns);
 	if (!extra_using_columns.empty()) {
 		vector<UsingColumnSet *> left_using_bindings;
 		vector<UsingColumnSet *> right_using_bindings;
@@ -182465,7 +182763,7 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundEmptyTableRef &ref) {
 namespace duckdb {
 unique_ptr<LogicalOperator> Binder::CreatePlan(BoundExpressionListRef &ref) {
-	auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(0);
+	auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(GenerateTableIndex());
 	// values list, first plan any subqueries in the list
 	for (auto &expr_list : ref.values) {
 		for (auto &expr : expr_list) {
@@ -185018,7 +185316,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr
 	case ExpressionClass::COLUMN_REF:
 		return BindResult(clause + " cannot contain column names");
 	case ExpressionClass::SUBQUERY:
-		return BindResult(clause + " cannot contain subqueries");
+		throw BinderException(clause + " cannot contain subqueries");
 	case ExpressionClass::DEFAULT:
 		return BindResult(clause + " cannot contain DEFAULT clause");
 	case ExpressionClass::WINDOW:
@@ -185278,6 +185576,9 @@ unique_ptr<Expression> OrderBinder::CreateProjectionReference(ParsedExpression &
 }
 unique_ptr<Expression> OrderBinder::CreateExtraReference(unique_ptr<ParsedExpression> expr) {
+	if (!extra_list) {
+		throw InternalException("CreateExtraReference called without extra_list");
+	}
 	auto result = CreateProjectionReference(*expr, extra_list->size());
 	extra_list->push_back(move(expr));
 	return result;
@@ -189404,6 +189705,9 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
 	case LogicalOperatorType::LOGICAL_ORDER_BY:
 		plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
 		return plan;
+	case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: {
+		throw ParserException("Recursive CTEs not supported in correlated subquery");
+	}
 	default:
 		throw InternalException("Logical operator type \"%s\" for dependent join", LogicalOperatorToString(plan->type));
 	}
@@ -191530,7 +191834,7 @@ void CheckpointManager::CreateCheckpoint() {
 	wal->Flush();
 	if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_HEADER) {
-		throw IOException("Checkpoint aborted before header write because of PRAGMA checkpoint_abort flag");
+		throw FatalException("Checkpoint aborted before header write because of PRAGMA checkpoint_abort flag");
 	}
 	// finally write the updated header
@@ -191539,7 +191843,7 @@ void CheckpointManager::CreateCheckpoint() {
 	block_manager.WriteHeader(header);
 	if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_TRUNCATE) {
-		throw IOException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
+		throw FatalException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
 	}
 	// truncate the WAL
@@ -197090,7 +197394,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
 }
 // Alter column to add new constraint
-DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<Constraint> constraint)
+DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint)
     : info(parent.info), db(parent.db), total_rows(parent.total_rows.load()), row_groups(parent.row_groups),
       is_root(true) {
@@ -197265,7 +197569,7 @@ void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScan
 bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state,
                                  const vector<column_t> &column_ids) {
-	while (state.current_row_group) {
+	while (state.current_row_group && state.current_row_group->count > 0) {
 		idx_t vector_index;
 		idx_t max_row;
 		if (ClientConfig::GetConfig(context).verify_parallelism) {
@@ -197279,13 +197583,8 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState
 			max_row = state.current_row_group->start + state.current_row_group->count;
 		}
 		max_row = MinValue<idx_t>(max_row, state.max_row);
-		bool need_to_scan;
-		if (state.current_row_group->count == 0) {
-			need_to_scan = false;
-		} else {
-			need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
-			                                        state.current_row_group, vector_index, max_row);
-		}
+		bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
+		                                             state.current_row_group, vector_index, max_row);
 		if (ClientConfig::GetConfig(context).verify_parallelism) {
 			state.vector_index++;
 			if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
@@ -197544,14 +197843,15 @@ static void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bf
 	VerifyForeignKeyConstraint(bfk, context, chunk, false);
 }
-void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const Constraint *constraint) {
+void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint) {
 	if (constraint->type != ConstraintType::NOT_NULL) {
 		throw NotImplementedException("FIXME: ALTER COLUMN with such constraint is not supported yet");
 	}
 	// scan the original table, check if there's any null value
-	auto &not_null_constraint = (NotNullConstraint &)*constraint;
+	auto &not_null_constraint = (BoundNotNullConstraint &)*constraint;
 	auto &transaction = Transaction::GetTransaction(context);
 	vector<LogicalType> scan_types;
+	D_ASSERT(not_null_constraint.index < parent.column_definitions.size());
 	scan_types.push_back(parent.column_definitions[not_null_constraint.index].Type());
 	DataChunk scan_chunk;
 	auto &allocator = Allocator::Get(context);
@@ -198308,6 +198608,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
 		return nullptr;
 	}
 	lock_guard<mutex> stats_guard(stats_lock);
+	if (column_id >= column_stats.size()) {
+		throw InternalException("Call to GetStatistics is out of range");
+	}
 	return column_stats[column_id]->stats->Copy();
 }
@@ -199596,7 +199899,7 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
 	auto &config = DBConfig::GetConfig(db);
 	if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_AFTER_FREE_LIST_WRITE) {
-		throw IOException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
+		throw FatalException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
 	}
 	if (!use_direct_io) {
@@ -201122,6 +201425,7 @@ idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t coun
 		}
 		// after verifying that there are no conflicts we mark the tuple as deleted
 		deleted[rows[i]] = transaction.transaction_id;
+		rows[deleted_tuples] = rows[i];
 		deleted_tuples++;
 	}
 	return deleted_tuples;
@@ -201449,6 +201753,8 @@ public:
 	idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override;
 	idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override;
+	void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE) override;
 	void InitializeAppend(ColumnAppendState &state) override;
 	void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) override;
 	void RevertAppend(row_t start_row) override;
@@ -204028,9 +204334,15 @@ void VersionDeleteState::Flush() {
 		return;
 	}
 	// delete in the current info
-	delete_count += current_info->Delete(transaction, rows, count);
-	// now push the delete into the undo buffer
-	transaction.PushDelete(table, current_info, rows, count, base_row + chunk_row);
+	// it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
+	// in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
+	// this is returned in the actual_delete_count
+	auto actual_delete_count = current_info->Delete(transaction, rows, count);
+	delete_count += actual_delete_count;
+	if (actual_delete_count > 0) {
+		// now push the delete into the undo buffer, but only if any deletes were actually performed
+		transaction.PushDelete(table, current_info, rows, actual_delete_count, base_row + chunk_row);
+	}
 	count = 0;
 }
@@ -204407,6 +204719,15 @@ idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t
 	return scan_count;
 }
+void StructColumnData::Skip(ColumnScanState &state, idx_t count) {
+	validity.Skip(state.child_states[0], count);
+	// skip inside the sub-columns
+	for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) {
+		sub_columns[child_idx]->Skip(state.child_states[child_idx + 1], count);
+	}
+}
 void StructColumnData::InitializeAppend(ColumnAppendState &state) {
 	ColumnAppendState validity_append;
 	validity.InitializeAppend(validity_append);
@@ -206866,6 +207187,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) {
 void CleanupState::CleanupDelete(DeleteInfo *info) {
 	auto version_table = info->table;
+	D_ASSERT(version_table->info->cardinality >= info->count);
 	version_table->info->cardinality -= info->count;
 	if (version_table->info->indexes.Empty()) {
 		// this table has no indexes: no cleanup to be done
@@ -260291,49 +260613,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
 	}
 }
-UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
-	UnicodeType type = UnicodeType::ASCII;
-	char c;
-	for (size_t i = 0; i < len; i++) {
-		c = s[i];
-		if (c == '\0') {
-			AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
-			return UnicodeType::INVALID;
-		}
-		// 1 Byte / ASCII
-		if ((c & 0x80) == 0) {
-			continue;
-		}
-		type = UnicodeType::UNICODE;
-		if ((s[++i] & 0xC0) != 0x80) {
-			AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
-			return UnicodeType::INVALID;
-		}
-		if ((c & 0xE0) == 0xC0) {
-			continue;
-		}
-		if ((s[++i] & 0xC0) != 0x80) {
-			AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
-			return UnicodeType::INVALID;
-		}
-		if ((c & 0xF0) == 0xE0) {
-			continue;
-		}
-		if ((s[++i] & 0xC0) != 0x80) {
+template <const int nextra_bytes, const int mask>
+static inline UnicodeType
+UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
+				  const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
+	if ((len - i) < (nextra_bytes + 1)) {
+		/* incomplete byte sequence */
+		AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
+		return UnicodeType::INVALID;
+	}
+	for (size_t j = 0 ; j < nextra_bytes; j++) {
+		int c = (int) s[++i];
+		/* now validate the extra bytes */
+		if ((c & 0xC0) != 0x80) {
+			/* extra byte is not in the format 10xxxxxx */
 			AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
 			return UnicodeType::INVALID;
 		}
-		if ((c & 0xF8) == 0xF0) {
-			continue;
-		}
-		AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
+		utf8char = (utf8char << 6) | (c & 0x3F);
+	}
+	if ((utf8char & mask) == 0) {
+		/* invalid UTF-8 codepoint, not shortest possible */
+		AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
+		return UnicodeType::INVALID;
+	}
+	if (utf8char > 0x10FFFF) {
+		/* value not representable by Unicode */
+		AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
 		return UnicodeType::INVALID;
 	}
+	if ((utf8char & 0x1FFF800) == 0xD800) {
+		/* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
+		AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
+		return UnicodeType::INVALID;
+	}
+	return UnicodeType::UNICODE;
+}
+UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
+	UnicodeType type = UnicodeType::ASCII;
+	for (size_t i = 0; i < len; i++) {
+		int c = (int) s[i];
+		if ((c & 0x80) == 0) {
+			/* 1 byte sequence */
+			if (c == '\0') {
+				/* NULL byte not allowed */
+				AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
+				return UnicodeType::INVALID;
+			}
+		} else {
+			int first_pos_seq = i;
+			if ((c & 0xE0) == 0xC0) {
+				/* 2 byte sequence */
+				int utf8char = c & 0x1F;
+				type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
+			} else if ((c & 0xF0) == 0xE0) {
+				/* 3 byte sequence */
+				int utf8char = c & 0x0F;
+				type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
+			} else if ((c & 0xF8) == 0xF0) {
+				/* 4 byte sequence */
+				int utf8char = c & 0x07;
+				type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
+			} else {
+				/* invalid UTF-8 start byte */
+				AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
+				return UnicodeType::INVALID;
+			}
+			if (type == UnicodeType::INVALID) {
+				return type;
+			}
+		}
+	}
 	return type;
 }
 char* Utf8Proc::Normalize(const char *s, size_t len) {
 	assert(s);
 	assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);