npm - duckdb - Versions diffs - 0.5.2-dev1295.0 → 0.5.2-dev1312.0 - Mend

duckdb 0.5.2-dev1295.0 → 0.5.2-dev1312.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +366 -186
package/src/duckdb.hpp +5 -2
package/src/parquet-amalgamation.cpp +37760 -37760

package/src/duckdb.cpp CHANGED Viewed

@@ -17668,6 +17668,25 @@ public:
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/common/likely.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#if __GNUC__
+#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value))
+#else
+#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond)
+#endif
+#define DUCKDB_LIKELY(...)   DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1)
+#define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0)
 namespace duckdb {
 struct StringDictionaryContainer {
@@ -17723,31 +17742,33 @@ public:
 		return StringAppendBase(append_state.handle, segment, stats, data, offset, count);
 	}
-	template <bool DUPLICATE_ELIMINATE = false>
 	static idx_t StringAppendBase(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
-	                              idx_t offset, idx_t count,
-	                              std::unordered_map<string, int32_t> *seen_strings = nullptr) {
+	                              idx_t offset, idx_t count) {
 		auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
 		auto handle = buffer_manager.Pin(segment.block);
 		return StringAppendBase(handle, segment, stats, data, offset, count);
 	}
-	template <bool DUPLICATE_ELIMINATE = false>
 	static idx_t StringAppendBase(BufferHandle &handle, ColumnSegment &segment, SegmentStatistics &stats,
-	                              UnifiedVectorFormat &data, idx_t offset, idx_t count,
-	                              std::unordered_map<string, int32_t> *seen_strings = nullptr) {
+	                              UnifiedVectorFormat &data, idx_t offset, idx_t count) {
 		D_ASSERT(segment.GetBlockOffset() == 0);
+		auto handle_ptr = handle.Ptr();
 		auto source_data = (string_t *)data.data;
-		auto result_data = (int32_t *)(handle.Ptr() + DICTIONARY_HEADER_SIZE);
+		auto result_data = (int32_t *)(handle_ptr + DICTIONARY_HEADER_SIZE);
+		uint32_t *dictionary_size = (uint32_t *)handle_ptr;
+		uint32_t *dictionary_end = (uint32_t *)(handle_ptr + sizeof(uint32_t));
+		idx_t remaining_space = RemainingSpace(segment, handle);
+		auto base_count = segment.count.load();
 		for (idx_t i = 0; i < count; i++) {
 			auto source_idx = data.sel->get_index(offset + i);
-			auto target_idx = segment.count.load();
-			idx_t remaining_space = RemainingSpace(segment, handle);
+			auto target_idx = base_count + i;
 			if (remaining_space < sizeof(int32_t)) {
 				// string index does not fit in the block at all
+				segment.count += i;
 				return i;
 			}
 			remaining_space -= sizeof(int32_t);
-			auto dictionary = GetDictionary(segment, handle);
 			if (!data.validity.RowIsValid(source_idx)) {
 				// null value is stored as a copy of the last value, this is done to be able to efficiently do the
 				// string_length calculation
@@ -17756,82 +17777,68 @@ public:
 				} else {
 					result_data[target_idx] = 0;
 				}
-			} else {
-				auto end = handle.Ptr() + dictionary.end;
-				dictionary.Verify();
-				int32_t match;
-				bool found;
-				if (DUPLICATE_ELIMINATE) {
-					auto search = seen_strings->find(source_data[source_idx].GetString());
-					if (search != seen_strings->end()) {
-						match = search->second;
-						found = true;
-					} else {
-						found = false;
-					}
-				}
-				if (DUPLICATE_ELIMINATE && found) {
-					// We have seen this string
-					result_data[target_idx] = match;
-				} else {
-					// Unknown string, continue
-					// non-null value, check if we can fit it within the block
-					idx_t string_length = source_data[source_idx].GetSize();
-					idx_t dictionary_length = string_length;
-					// determine whether or not we have space in the block for this string
-					bool use_overflow_block = false;
-					idx_t required_space = dictionary_length;
-					if (required_space >= StringUncompressed::STRING_BLOCK_LIMIT) {
-						// string exceeds block limit, store in overflow block and only write a marker here
-						required_space = BIG_STRING_MARKER_SIZE;
-						use_overflow_block = true;
-					}
-					if (required_space > remaining_space) {
-						// no space remaining: return how many tuples we ended up writing
-						return i;
-					}
+				continue;
+			}
+			auto end = handle.Ptr() + *dictionary_end;
-					// we have space: write the string
-					UpdateStringStats(stats, source_data[source_idx]);
+#ifdef DEBUG
+			GetDictionary(segment, handle).Verify();
+#endif
+			// Unknown string, continue
+			// non-null value, check if we can fit it within the block
+			idx_t string_length = source_data[source_idx].GetSize();
+			// determine whether or not we have space in the block for this string
+			bool use_overflow_block = false;
+			idx_t required_space = string_length;
+			if (DUCKDB_UNLIKELY(required_space >= StringUncompressed::STRING_BLOCK_LIMIT)) {
+				// string exceeds block limit, store in overflow block and only write a marker here
+				required_space = BIG_STRING_MARKER_SIZE;
+				use_overflow_block = true;
+			}
+			if (DUCKDB_UNLIKELY(required_space > remaining_space)) {
+				// no space remaining: return how many tuples we ended up writing
+				segment.count += i;
+				return i;
+			}
-					if (use_overflow_block) {
-						// write to overflow blocks
-						block_id_t block;
-						int32_t offset;
-						// write the string into the current string block
-						WriteString(segment, source_data[source_idx], block, offset);
-						dictionary.size += BIG_STRING_MARKER_SIZE;
-						auto dict_pos = end - dictionary.size;
+			// we have space: write the string
+			UpdateStringStats(stats, source_data[source_idx]);
-						// write a big string marker into the dictionary
-						WriteStringMarker(dict_pos, block, offset);
-					} else {
-						// string fits in block, append to dictionary and increment dictionary position
-						D_ASSERT(string_length < NumericLimits<uint16_t>::Maximum());
-						dictionary.size += required_space;
-						auto dict_pos = end - dictionary.size;
-						// now write the actual string data into the dictionary
-						memcpy(dict_pos, source_data[source_idx].GetDataUnsafe(), string_length);
-					}
-					D_ASSERT(RemainingSpace(segment, handle) <= Storage::BLOCK_SIZE);
-					// place the dictionary offset into the set of vectors
-					dictionary.Verify();
+			if (DUCKDB_UNLIKELY(use_overflow_block)) {
+				// write to overflow blocks
+				block_id_t block;
+				int32_t offset;
+				// write the string into the current string block
+				WriteString(segment, source_data[source_idx], block, offset);
+				*dictionary_size += BIG_STRING_MARKER_SIZE;
+				remaining_space -= BIG_STRING_MARKER_SIZE;
+				auto dict_pos = end - *dictionary_size;
-					// note: for overflow strings we write negative value
-					result_data[target_idx] = use_overflow_block ? -1 * dictionary.size : dictionary.size;
+				// write a big string marker into the dictionary
+				WriteStringMarker(dict_pos, block, offset);
-					if (DUPLICATE_ELIMINATE) {
-						seen_strings->insert({source_data[source_idx].GetString(), dictionary.size});
-					}
-					SetDictionary(segment, handle, dictionary);
-				}
-			}
-			segment.count++;
+				// place the dictionary offset into the set of vectors
+				// note: for overflow strings we write negative value
+				result_data[target_idx] = -(*dictionary_size);
+			} else {
+				// string fits in block, append to dictionary and increment dictionary position
+				D_ASSERT(string_length < NumericLimits<uint16_t>::Maximum());
+				*dictionary_size += required_space;
+				remaining_space -= required_space;
+				auto dict_pos = end - *dictionary_size;
+				// now write the actual string data into the dictionary
+				memcpy(dict_pos, source_data[source_idx].GetDataUnsafe(), string_length);
+				// place the dictionary offset into the set of vectors
+				result_data[target_idx] = *dictionary_size;
+			}
+			D_ASSERT(RemainingSpace(segment, handle) <= Storage::BLOCK_SIZE);
+#ifdef DEBUG
+			GetDictionary(segment, handle).Verify();
+#endif
 		}
+		segment.count += count;
 		return count;
 	}
@@ -53611,24 +53618,6 @@ void VectorOperations::Not(Vector &input, Vector &result, idx_t count) {
-//===----------------------------------------------------------------------===//
-//                         DuckDB
-//
-// duckdb/common/likely.hpp
-//
-//
-//===----------------------------------------------------------------------===//
-#if __GNUC__
-#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value))
-#else
-#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond)
-#endif
-#define DUCKDB_LIKELY(...)   DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1)
-#define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0)
 namespace duckdb {
@@ -64202,6 +64191,10 @@ public:
 	OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
 	                           GlobalOperatorState &gstate, OperatorState &state) const override;
+	bool IsOrderDependent() const override {
+		return true;
+	}
 	string ParamsToString() const override;
 };
@@ -65145,6 +65138,9 @@ public:
 	//! The projection list of the WINDOW statement (may contain aggregates)
 	vector<unique_ptr<Expression>> select_list;
+	//! Whether or not the window is order dependent (only true if all window functions contain neither an order nor a
+	//! partition clause)
+	bool is_order_dependent;
 public:
 	// Source interface
@@ -65178,7 +65174,11 @@ public:
 	}
 	bool ParallelSink() const override {
-		return true;
+		return !is_order_dependent;
+	}
+	bool IsOrderDependent() const override {
+		return is_order_dependent;
 	}
 public:
@@ -65873,9 +65873,17 @@ void WindowGlobalSinkState::Finalize() {
 }
 // this implements a sorted window functions variant
-PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list,
+PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list_p,
                                idx_t estimated_cardinality, PhysicalOperatorType type)
-    : PhysicalOperator(type, move(types), estimated_cardinality), select_list(move(select_list)) {
+    : PhysicalOperator(type, move(types), estimated_cardinality), select_list(move(select_list_p)) {
+	is_order_dependent = false;
+	for (auto &expr : select_list) {
+		D_ASSERT(expr->expression_class == ExpressionClass::BOUND_WINDOW);
+		auto &bound_window = (BoundWindowExpression &)*expr;
+		if (bound_window.partitions.empty() && bound_window.orders.empty()) {
+			is_order_dependent = true;
+		}
+	}
 }
 static idx_t FindNextStart(const ValidityMask &mask, idx_t l, const idx_t r, idx_t &n) {
@@ -78838,6 +78846,84 @@ PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry
 //===--------------------------------------------------------------------===//
 // Sink
 //===--------------------------------------------------------------------===//
+class CollectionMerger {
+public:
+	explicit CollectionMerger(ClientContext &context) : context(context) {
+	}
+	ClientContext &context;
+	vector<unique_ptr<RowGroupCollection>> current_collections;
+public:
+	void AddCollection(unique_ptr<RowGroupCollection> collection) {
+		current_collections.push_back(move(collection));
+	}
+	bool Empty() {
+		return current_collections.empty();
+	}
+	unique_ptr<RowGroupCollection> Flush(OptimisticDataWriter &writer) {
+		if (Empty()) {
+			return nullptr;
+		}
+		unique_ptr<RowGroupCollection> new_collection;
+		if (current_collections.size() == 1) {
+			// we have gathered only one row group collection: merge it directly
+			new_collection = move(current_collections[0]);
+		} else {
+			// we have gathered multiple collections: create one big collection and merge that
+			// find the biggest collection
+			idx_t biggest_index = 0;
+			for (idx_t i = 1; i < current_collections.size(); i++) {
+				D_ASSERT(current_collections[i]);
+				if (current_collections[i]->GetTotalRows() > current_collections[biggest_index]->GetTotalRows()) {
+					biggest_index = i;
+				}
+			}
+			// now append all the other collections to this collection
+			new_collection = move(current_collections[biggest_index]);
+			auto &types = new_collection->GetTypes();
+			TableAppendState append_state;
+			new_collection->InitializeAppend(append_state);
+			DataChunk scan_chunk;
+			scan_chunk.Initialize(context, types);
+			vector<column_t> column_ids;
+			for (idx_t i = 0; i < types.size(); i++) {
+				column_ids.push_back(i);
+			}
+			for (auto &collection : current_collections) {
+				if (!collection) {
+					continue;
+				}
+				TableScanState scan_state;
+				scan_state.Initialize(column_ids);
+				collection->InitializeScan(scan_state.local_state, column_ids, nullptr);
+				while (true) {
+					scan_chunk.Reset();
+					scan_state.local_state.ScanCommitted(scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
+					if (scan_chunk.size() == 0) {
+						break;
+					}
+					auto new_row_group = new_collection->Append(scan_chunk, append_state);
+					if (new_row_group) {
+						writer.CheckFlushToDisk(*new_collection);
+					}
+				}
+			}
+			new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
+			writer.FlushToDisk(*new_collection);
+		}
+		current_collections.clear();
+		return new_collection;
+	}
+};
 class BatchInsertGlobalState : public GlobalSinkState {
 public:
 	explicit BatchInsertGlobalState() : insert_count(0) {
@@ -78848,16 +78934,124 @@ public:
 	idx_t insert_count;
 	map<idx_t, unique_ptr<RowGroupCollection>> collections;
-	void AddCollection(idx_t batch_index, unique_ptr<RowGroupCollection> current_collection) {
-		lock_guard<mutex> l(lock);
-		insert_count += current_collection->GetTotalRows();
+	bool CheckMergeInternal(idx_t batch_index, vector<unique_ptr<RowGroupCollection>> *result, idx_t *merge_count) {
+		auto entry = collections.find(batch_index);
+		if (entry == collections.end()) {
+			// no collection at this index
+			return false;
+		}
+		auto row_count = entry->second->GetTotalRows();
+		if (row_count >= LocalStorage::MERGE_THRESHOLD) {
+			// the collection at this batch index is large and has already been written
+			return false;
+		}
+		// we can merge this collection!
+		if (merge_count) {
+			// add the count
+			D_ASSERT(!result);
+			*merge_count += row_count;
+		} else {
+			// add the
+			D_ASSERT(result);
+			result->push_back(move(entry->second));
+			collections.erase(batch_index);
+		}
+		return true;
+	}
+	bool CheckMerge(idx_t batch_index, idx_t &merge_count) {
+		return CheckMergeInternal(batch_index, nullptr, &merge_count);
+	}
+	bool CheckMerge(idx_t batch_index, vector<unique_ptr<RowGroupCollection>> &result) {
+		return CheckMergeInternal(batch_index, &result, nullptr);
+	}
+	unique_ptr<RowGroupCollection> MergeCollections(ClientContext &context,
+	                                                vector<unique_ptr<RowGroupCollection>> merge_collections,
+	                                                OptimisticDataWriter &writer) {
+		CollectionMerger merger(context);
+		for (auto &collection : merge_collections) {
+			merger.AddCollection(move(collection));
+		}
+		return merger.Flush(writer);
+	}
+	void VerifyUniqueBatch(idx_t batch_index) {
 		if (collections.find(batch_index) != collections.end()) {
 			throw InternalException("PhysicalBatchInsert::AddCollection error: batch index %d is present in multiple "
 			                        "collections. This occurs when "
 			                        "batch indexes are not uniquely distributed over threads",
 			                        batch_index);
 		}
-		collections[batch_index] = move(current_collection);
+	}
+	void AddCollection(ClientContext &context, idx_t batch_index, unique_ptr<RowGroupCollection> current_collection,
+	                   OptimisticDataWriter *writer = nullptr, bool *written_to_disk = nullptr) {
+		vector<unique_ptr<RowGroupCollection>> merge_collections;
+		idx_t merge_count;
+		{
+			lock_guard<mutex> l(lock);
+			auto new_count = current_collection->GetTotalRows();
+			insert_count += new_count;
+			VerifyUniqueBatch(batch_index);
+			if (writer && new_count < LocalStorage::MERGE_THRESHOLD) {
+				// we are inserting a small collection that has not yet been written to disk
+				// check if there are any collections with adjacent batch indexes that we can merge together
+				// first check how many rows we will end up with by performing such a merge
+				// check backwards
+				merge_count = new_count;
+				idx_t start_batch_index;
+				idx_t end_batch_index;
+				for (start_batch_index = batch_index; start_batch_index > 0; start_batch_index--) {
+					if (!CheckMerge(start_batch_index - 1, merge_count)) {
+						break;
+					}
+				}
+				// check forwards
+				for (end_batch_index = batch_index;; end_batch_index++) {
+					if (!CheckMerge(end_batch_index + 1, merge_count)) {
+						break;
+					}
+				}
+				// merging together creates a big enough row group
+				// merge!
+				if (merge_count >= RowGroup::ROW_GROUP_SIZE) {
+					// gather the row groups to merge
+					// note that we need to gather them in order of batch index
+					for (idx_t i = start_batch_index; i <= end_batch_index; i++) {
+						if (i == batch_index) {
+							merge_collections.push_back(move(current_collection));
+							continue;
+						}
+						auto can_merge = CheckMerge(i, merge_collections);
+						if (!can_merge) {
+							throw InternalException("Could not merge row group in batch insert?!");
+						}
+					}
+				}
+			}
+			if (merge_collections.empty()) {
+				// no collections to merge together - add the collection to the batch index
+				collections[batch_index] = move(current_collection);
+			}
+		}
+		if (!merge_collections.empty()) {
+			// merge together the collections
+			D_ASSERT(writer);
+			auto final_collection = MergeCollections(context, move(merge_collections), *writer);
+			D_ASSERT(final_collection->GetTotalRows() == merge_count);
+			D_ASSERT(final_collection->GetTotalRows() >= RowGroup::ROW_GROUP_SIZE);
+			if (written_to_disk) {
+				*written_to_disk = true;
+			}
+			// add the merged-together collection to the
+			{
+				lock_guard<mutex> l(lock);
+				VerifyUniqueBatch(batch_index);
+				collections[batch_index] = move(final_collection);
+			}
+		}
 	}
 };
@@ -78877,6 +79071,16 @@ public:
 	unique_ptr<OptimisticDataWriter> writer;
 	bool written_to_disk;
+	void FlushToDisk() {
+		if (!current_collection) {
+			return;
+		}
+		if (!written_to_disk || current_collection->GetTotalRows() < LocalStorage::MERGE_THRESHOLD) {
+			return;
+		}
+		writer->FlushToDisk(*current_collection);
+	}
 	void CreateNewCollection(TableCatalogEntry *table, const vector<LogicalType> &insert_types) {
 		auto &table_info = table->storage->info;
 		auto &block_manager = TableIOManager::Get(*table->storage).GetBlockManagerForRowData();
@@ -78921,10 +79125,10 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, GlobalSinkSt
 		// batch index has changed: move the old collection to the global state and create a new collection
 		TransactionData tdata(0, 0);
 		lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
-		if (lstate.written_to_disk) {
-			lstate.writer->FlushToDisk(*lstate.current_collection);
-		}
-		gstate.AddCollection(lstate.current_index, move(lstate.current_collection));
+		lstate.FlushToDisk();
+		gstate.AddCollection(context.client, lstate.current_index, move(lstate.current_collection), lstate.writer.get(),
+		                     &lstate.written_to_disk);
 		lstate.CreateNewCollection(table, insert_types);
 	}
 	lstate.current_index = lstate.batch_index;
@@ -78948,94 +79152,61 @@ void PhysicalBatchInsert::Combine(ExecutionContext &context, GlobalSinkState &gs
 	if (!lstate.current_collection) {
 		return;
 	}
-	if (lstate.written_to_disk) {
-		lstate.writer->FlushToDisk(*lstate.current_collection);
-	}
+	lstate.FlushToDisk();
 	lstate.writer->FinalFlush();
 	TransactionData tdata(0, 0);
 	lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
-	gstate.AddCollection(lstate.current_index, move(lstate.current_collection));
+	gstate.AddCollection(context.client, lstate.current_index, move(lstate.current_collection));
 }
-struct CollectionMerger {
-	vector<unique_ptr<RowGroupCollection>> current_collections;
-	void AddCollection(unique_ptr<RowGroupCollection> collection) {
-		current_collections.push_back(move(collection));
-	}
-	bool Empty() {
-		return current_collections.empty();
-	}
-	void Flush(ClientContext &context, DataTable &storage) {
-		if (Empty()) {
-			return;
-		}
-		unique_ptr<RowGroupCollection> new_collection;
-		if (current_collections.size() == 1) {
-			// we have gathered only one row group collection: merge it directly
-			new_collection = move(current_collections[0]);
-		} else {
-			// we have gathered multiple collections: create one big collection and merge that
-			auto &table_info = storage.info;
-			auto &block_manager = TableIOManager::Get(storage).GetBlockManagerForRowData();
-			auto types = storage.GetTypes();
-			new_collection = make_unique<RowGroupCollection>(table_info, block_manager, types, MAX_ROW_ID);
-			TableAppendState append_state;
-			new_collection->InitializeEmpty();
-			new_collection->InitializeAppend(append_state);
-			DataChunk scan_chunk;
-			scan_chunk.Initialize(context, types);
-			vector<column_t> column_ids;
-			for (idx_t i = 0; i < types.size(); i++) {
-				column_ids.push_back(i);
-			}
-			for (auto &collection : current_collections) {
-				TableScanState scan_state;
-				scan_state.Initialize(column_ids);
-				collection->InitializeScan(scan_state.local_state, column_ids, nullptr);
-				while (true) {
-					scan_chunk.Reset();
-					scan_state.local_state.ScanCommitted(scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
-					if (scan_chunk.size() == 0) {
-						break;
-					}
-					new_collection->Append(scan_chunk, append_state);
-				}
-			}
-			new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
-		}
-		storage.LocalMerge(context, *new_collection);
-		current_collections.clear();
-	}
-};
 SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
                                                GlobalSinkState &gstate_p) const {
 	auto &gstate = (BatchInsertGlobalState &)gstate_p;
-	CollectionMerger merger;
+	// in the finalize, do a final pass over all of the collections we created and try to merge smaller collections
+	// together
+	vector<unique_ptr<CollectionMerger>> mergers;
+	unique_ptr<CollectionMerger> current_merger;
 	auto &storage = *gstate.table->storage;
 	for (auto &collection : gstate.collections) {
 		if (collection.second->GetTotalRows() < LocalStorage::MERGE_THRESHOLD) {
 			// this collection has very few rows: add it to the merge set
-			merger.AddCollection(move(collection.second));
+			if (!current_merger) {
+				current_merger = make_unique<CollectionMerger>(context);
+			}
+			current_merger->AddCollection(move(collection.second));
 		} else {
-			if (!merger.Empty()) {
+			// this collection has a lot of rows: it does not need to be merged
+			// create a separate collection merger only for this entry
+			if (current_merger) {
 				// we have small collections remaining: flush them
-				merger.Flush(context, storage);
+				mergers.push_back(move(current_merger));
+				current_merger.reset();
 			}
-			storage.LocalMerge(context, *collection.second);
+			auto larger_merger = make_unique<CollectionMerger>(context);
+			larger_merger->AddCollection(move(collection.second));
+			mergers.push_back(move(larger_merger));
 		}
 	}
-	merger.Flush(context, storage);
+	if (current_merger) {
+		mergers.push_back(move(current_merger));
+	}
+	// now that we have created all of the mergers, perform the actual merging
+	vector<unique_ptr<RowGroupCollection>> final_collections;
+	final_collections.reserve(mergers.size());
+	auto writer = make_unique<OptimisticDataWriter>(&storage);
+	for (auto &merger : mergers) {
+		final_collections.push_back(merger->Flush(*writer));
+	}
+	writer->FinalFlush();
+	// finally, merge the row groups into the local storage
+	for (auto &collection : final_collections) {
+		storage.LocalMerge(context, *collection);
+	}
 	return SinkFinalizeType::READY;
 }
@@ -79114,6 +79285,10 @@ public:
 	bool IsSink() const override {
 		return true;
 	}
+	bool IsOrderDependent() const override {
+		return true;
+	}
 };
 } // namespace duckdb
@@ -82913,19 +83088,24 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
 	auto child_result = ExecuteInternal(context, input, chunk, gstate, state);
 #if STANDARD_VECTOR_SIZE >= 128
-	if (!context.pipeline || !caching_supported) {
-		return child_result;
-	}
+	if (!state.initialized) {
+		state.initialized = true;
+		state.can_cache_chunk = true;
+		if (!context.pipeline || !caching_supported) {
+			state.can_cache_chunk = false;
+		}
-	if (context.pipeline->GetSink() && context.pipeline->GetSink()->RequiresBatchIndex() &&
-	    context.pipeline->GetSource()->SupportsBatchIndex()) {
-		return child_result;
-	}
+		if (context.pipeline->GetSink() && context.pipeline->GetSink()->RequiresBatchIndex()) {
+			state.can_cache_chunk = false;
+		}
-	if (context.pipeline->IsOrderDependent()) {
+		if (context.pipeline->IsOrderDependent()) {
+			state.can_cache_chunk = false;
+		}
+	}
+	if (!state.can_cache_chunk) {
 		return child_result;
 	}
 	if (chunk.size() < CACHE_THRESHOLD) {
 		// we have filtered out a significant amount of tuples
 		// add this chunk to the cache and continue