npm - tigerbeetle-node - Versions diffs - 0.11.9 → 0.11.10 - Mend

tigerbeetle-node 0.11.9 → 0.11.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/.client.node.sha256 +1 -1
package/package.json +1 -1
package/scripts/build_lib.sh +22 -10
package/src/tigerbeetle/src/config.zig +2 -0
package/src/tigerbeetle/src/constants.zig +18 -0
package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -2
package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +6 -5
package/src/tigerbeetle/src/lsm/test.zig +5 -4
package/src/tigerbeetle/src/lsm/tree.zig +3 -30
package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -2
package/src/tigerbeetle/src/simulator.zig +34 -53
package/src/tigerbeetle/src/testing/cluster/state_checker.zig +1 -1
package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +6 -6
package/src/tigerbeetle/src/testing/cluster.zig +5 -99
package/src/tigerbeetle/src/testing/storage.zig +26 -23
package/src/tigerbeetle/src/vsr/journal.zig +86 -90
package/src/tigerbeetle/src/vsr/replica.zig +631 -687
package/src/tigerbeetle/src/vsr/replica_format.zig +12 -12
package/src/tigerbeetle/src/vsr/superblock.zig +192 -110
package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +35 -8
package/src/tigerbeetle/src/vsr/superblock_quorums.zig +48 -48
package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +50 -50
package/src/tigerbeetle/src/vsr.zig +93 -29

package/dist/.client.node.sha256 CHANGED Viewed

	@@ -1 +1 @@
1	- ~~ed71bbee01e27a1f4638bfc2b0542414c1fc8b503bce5bbcc45ae2b80b1edb26~~ dist/client.node
1	+ 21c6105d76e0efc68fe5cbe799d363a083a9b44359e16d7dbfbb172e381ea0c3 dist/client.node

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tigerbeetle-node",
-  "version": "0.11.9",
+  "version": "0.11.10",
   "description": "TigerBeetle Node.js client",
   "main": "dist/index.js",
   "typings": "dist/index.d.ts",

package/scripts/build_lib.sh CHANGED Viewed

@@ -5,25 +5,37 @@ set -e
 # macOS 13 Ventura is not supported on Zig 0.9.x.
 # Overriding -target is one workaround Andrew suggests.
 # https://github.com/ziglang/zig/issues/10478#issuecomment-1294313967
-# Cut everything after the first `.` in the target query result
-# because the rest of it doesn't always seem to be valid when passed
-# back in to `-target`.
-target="$(./zig/zig targets | grep triple |cut -d '"' -f 4 | cut -d '.' -f 1)"
+target=""
 if [ "$(./zig/zig targets | grep triple |cut -d '"' -f 4 | cut -d '.' -f 1,2)" = "aarch64-macos.13" ]; then
-    target="native-macos.11"
+    target="-target native-macos.11"
 fi
-echo "Building for $target"
+# Zig picks musl libc on RHEL instead of glibc, incorrectly
+# https://github.com/ziglang/zig/issues/12156
+if [ -f "/etc/redhat-release" ]; then
+   if ! grep Fedora /etc/redhat-release; then
+       target="-target native-native-gnu"
+   fi
+fi
+if [ "$target" = "" ]; then
+    echo "Building default target"
+else
+    echo "Building for '$target'"
+fi
 mkdir -p dist
-./zig/zig build-lib \
+# Need to do string eval-ing because of shellcheck's strict string
+# interpolation rules.
+cmd="./zig/zig build-lib \
 	-mcpu=baseline \
 	-OReleaseSafe \
 	-dynamic \
 	-lc \
-	-isystem build/node-"$(node --version)"/include/node \
+	-isystem build/node-$(node --version)/include/node \
 	-fallow-shlib-undefined \
 	-femit-bin=dist/client.node \
-	-target "$target" \
-	src/node.zig
+        $target src/node.zig"
+eval "$cmd"

package/src/tigerbeetle/src/config.zig CHANGED Viewed

@@ -80,6 +80,7 @@ const ConfigCluster = struct {
     cache_line_size: comptime_int = 64,
     clients_max: usize,
     pipeline_prepare_queue_max: usize = 8,
+    view_change_headers_max: usize = 8,
     quorum_replication_max: u8 = 3,
     journal_slot_count: usize = 1024,
     message_size_max: usize = 1 * 1024 * 1024,
@@ -180,6 +181,7 @@ pub const configs = struct {
         .cluster = .{
             .clients_max = 4 + 3,
             .pipeline_prepare_queue_max = 4,
+            .view_change_headers_max = 4,
             .journal_slot_count = Config.Cluster.journal_slot_count_min,
             .message_size_max = Config.Cluster.message_size_max_min(4),
             .storage_size_max = 4 * 1024 * 1024 * 1024,

package/src/tigerbeetle/src/constants.zig CHANGED Viewed

@@ -157,6 +157,9 @@ comptime {
     assert(message_size_max >= @sizeOf(vsr.Header));
     assert(message_size_max >= sector_size);
     assert(message_size_max >= Config.Cluster.message_size_max_min(clients_max));
+    // Ensure that DVC/SV messages can fit all necessary headers.
+    assert(message_body_size_max >= view_change_headers_max * @sizeOf(vsr.Header));
 }
 /// The maximum number of Viewstamped Replication prepare messages that can be inflight at a time.
@@ -184,6 +187,21 @@ comptime {
     assert(pipeline_request_queue_max >= 0);
 }
+/// The number of prepare headers to include in the body of a DVC/SV.
+///
+/// CRITICAL:
+/// We must provide enough headers to cover all uncommitted headers so that the new
+/// primary (if we are in a view change) can decide whether to discard uncommitted headers
+/// that cannot be repaired because they are gaps. See DVCQuorum for more detail.
+pub const view_change_headers_max = config.cluster.view_change_headers_max;
+comptime {
+    assert(view_change_headers_max > 0);
+    assert(view_change_headers_max >= pipeline_prepare_queue_max);
+    assert(view_change_headers_max <= journal_slot_count);
+    assert(view_change_headers_max <= @divFloor(message_body_size_max, @sizeOf(vsr.Header)));
+}
 /// The minimum and maximum amount of time in milliseconds to wait before initiating a connection.
 /// Exponential backoff and jitter are applied within this range.
 pub const connection_delay_min_ms = config.process.connection_delay_min_ms;

package/src/tigerbeetle/src/lsm/forest_fuzz.zig CHANGED Viewed

@@ -185,8 +185,6 @@ const Environment = struct {
             .commit_min_checksum = env.superblock.working.vsr_state.commit_min_checksum + 1,
             .commit_min = op,
             .commit_max = op + 1,
-            .log_view = 0,
-            .view = 0,
         });
     }

package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig CHANGED Viewed

@@ -419,16 +419,17 @@ const Environment = struct {
         env.manifest_log.checkpoint(checkpoint_callback);
         env.wait(&env.manifest_log);
-        var vsr_state = env.manifest_log.superblock.working.vsr_state;
-        vsr_state.commit_min += 1;
-        vsr_state.commit_min_checksum += 1;
-        vsr_state.commit_max += 1;
+        const vsr_state = &env.manifest_log.superblock.working.vsr_state;
         env.pending += 1;
         env.manifest_log.superblock.checkpoint(
             checkpoint_superblock_callback,
             &env.superblock_context,
-            vsr_state,
+            .{
+                .commit_min_checksum = vsr_state.commit_min_checksum + 1,
+                .commit_min = vsr_state.commit_min + 1,
+                .commit_max = vsr_state.commit_max + 1,
+            },
         );
         env.wait(&env.manifest_log);

package/src/tigerbeetle/src/lsm/test.zig CHANGED Viewed

@@ -202,15 +202,16 @@ const Environment = struct {
         log.debug("forest checkpointing completed!", .{});
-        var vsr_state = env.superblock.staging.vsr_state;
-        vsr_state.commit_min += 1;
-        vsr_state.commit_min_checkpoint += 1;
+        const vsr_state = &env.superblock.staging.vsr_state;
         env.state = .superblock_checkpointing;
         env.superblock.checkpoint(
             superblock_checkpoint_callback,
             &env.superblock_context,
-            vsr_state,
+            .{
+                .commit_min_checkpoint = vsr_state.commit_min_checkpoint + 1,
+                .commit_min = vsr_state.commit_min + 1,
+            },
         );
     }

package/src/tigerbeetle/src/lsm/tree.zig CHANGED Viewed

@@ -145,7 +145,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
         compaction_io_pending: usize,
         compaction_callback: ?fn (*Tree) void,
-        compaction_next_tick: Grid.NextTick = undefined,
         checkpoint_callback: ?fn (*Tree) void,
         open_callback: ?fn (*Tree) void,
@@ -337,8 +336,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
             }
             if (index_block_count == 0) {
-                context.callback = callback;
-                tree.grid.on_next_tick(lookup_invalid_tick_callback, &context.next_tick);
+                callback(context, null);
                 return;
             }
@@ -348,7 +346,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
             context.* = .{
                 .tree = tree,
                 .completion = undefined,
-                .next_tick = undefined,
                 .key = key,
                 .fingerprint = fingerprint,
@@ -363,18 +360,12 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
             context.read_index_block();
         }
-        fn lookup_invalid_tick_callback(next_tick: *Grid.NextTick) void {
-            const context = @fieldParentPtr(LookupContext, "next_tick", next_tick);
-            context.callback(context, null);
-        }
         pub const LookupContext = struct {
             const Read = Grid.Read;
             const BlockPtrConst = Grid.BlockPtrConst;
             tree: *Tree,
             completion: Read,
-            next_tick: Grid.NextTick,
             key: Key,
             fingerprint: bloom_filter.Fingerprint,
@@ -569,8 +560,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
                     tree.compact_mutable_table_into_immutable();
                 }
-                tree.compaction_callback = callback;
-                tree.grid.on_next_tick(compact_skip_tick_callback, &tree.compaction_next_tick);
+                callback(tree);
                 return;
             }
@@ -598,13 +588,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
             tree.compact_drive();
         }
-        fn compact_skip_tick_callback(next_tick: *Grid.NextTick) void {
-            const tree = @fieldParentPtr(Tree, "compaction_next_tick", next_tick);
-            const callback = tree.compaction_callback.?;
-            tree.compaction_callback = null;
-            callback(tree);
-        }
         fn compact_start(tree: *Tree, callback: fn (*Tree) void) void {
             assert(tree.compaction_io_pending == 0);
             assert(tree.compaction_callback == null);
@@ -874,7 +857,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
                 // We are at the end of a half-bar, but the compactions have not finished.
                 // We keep ticking them until they finish.
                 log.debug(tree_name ++ ": compact_done: driving outstanding compactions", .{});
-                tree.grid.on_next_tick(compact_drive_tick_callback, &tree.compaction_next_tick);
+                tree.compact_drive();
                 return;
             }
@@ -952,16 +935,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
             tree.manifest.compact(compact_manifest_callback);
         }
-        /// Asynchronously continue to drive the compactions when they haven't finished at the time
-        /// they were supposed to at the end of a half-bar.
-        fn compact_drive_tick_callback(next_tick: *Grid.NextTick) void {
-            const tree = @fieldParentPtr(Tree, "compaction_next_tick", next_tick);
-            assert(tree.compaction_io_pending == 0);
-            assert(tree.compaction_callback != null);
-            assert(tree.compaction_op == tree.lookup_snapshot_max);
-            tree.compact_drive();
-        }
         /// Called after the last beat of a full compaction bar.
         fn compact_mutable_table_into_immutable(tree: *Tree) void {
             assert(tree.table_immutable.free);

package/src/tigerbeetle/src/lsm/tree_fuzz.zig CHANGED Viewed

@@ -241,8 +241,6 @@ fn EnvironmentType(comptime table_usage: TableUsage) type {
                 .commit_min_checksum = env.superblock.working.vsr_state.commit_min_checksum + 1,
                 .commit_min = op,
                 .commit_max = op + 1,
-                .log_view = 0,
-                .view = 0,
             });
         }

package/src/tigerbeetle/src/simulator.zig CHANGED Viewed

@@ -143,7 +143,7 @@ pub fn main() !void {
     const simulator_options = Simulator.Options{
         .cluster = cluster_options,
         .workload = workload_options,
-        .replica_crash_probability = 0.000001,
+        .replica_crash_probability = 0.00002,
         .replica_crash_stability = random.uintLessThan(u32, 1_000),
         .replica_restart_probability = 0.0001,
         .replica_restart_stability = random.uintLessThan(u32, 1_000),
@@ -444,43 +444,11 @@ pub const Simulator = struct {
     }
     fn tick_crash(simulator: *Simulator) void {
-        // The maximum number of replicas that can crash, with the cluster still able to recover.
-        var crashes = blk: {
-            // The minimum number of healthy replicas required for a crashed replica to be able to
-            // recover. A cluster of 1 can crash safely (as long as there is no disk corruption)
-            // since it does not run the recovery protocol.
-            var replica_normal_min = if (simulator.options.cluster.replica_count == 1)
-                0
-            else
-                vsr.quorums(simulator.options.cluster.replica_count).view_change;
-            break :blk simulator.cluster.replica_normal_count() -| replica_normal_min;
-        };
-        for (simulator.cluster.storages) |*storage, replica| {
-            if (simulator.cluster.replicas[replica].journal.status == .recovered) {
-                // TODO Remove this workaround when VSR recovery protocol is disabled.
-                // When only the minimum number of replicas are healthy (no more crashes allowed),
-                // disable storage faults on all healthy replicas.
-                //
-                // This is a workaround to avoid the deadlock that occurs when (for example) in a
-                // cluster of 3 replicas, one is down, another has a corrupt prepare, and the last does
-                // not have the prepare. The two healthy replicas can never complete a view change,
-                // because two replicas are not enough to nack, and the unhealthy replica cannot
-                // complete the VSR recovery protocol either.
-                if (simulator.cluster.replica_health[replica] == .up and crashes == 0) {
-                    if (storage.faulty) {
-                        log_simulator.debug("{}: disable storage faults", .{replica});
-                        storage.faulty = false;
-                    }
-                } else {
-                    // When a journal recovers for the first time, enable its storage faults.
-                    // Future crashes will recover in the presence of faults.
-                    if (!storage.faulty) {
-                        log_simulator.debug("{}: enable storage faults", .{replica});
-                        storage.faulty = true;
-                    }
-                }
-            }
+        const recoverable_count_min =
+            vsr.quorums(simulator.options.cluster.replica_count).view_change;
+        var recoverable_count: usize = 0;
+        for (simulator.cluster.replicas) |*replica| {
+            recoverable_count += @boolToInt(replica.status != .recovering_head);
         }
         for (simulator.cluster.replicas) |*replica| {
@@ -490,28 +458,41 @@ pub const Simulator = struct {
             switch (simulator.cluster.replica_health[replica.replica]) {
                 .up => {
-                    if (crashes == 0) continue;
-                    const replica_writes = simulator.cluster.storages[replica.replica].writes.count();
+                    const storage = &simulator.cluster.storages[replica.replica];
+                    const replica_writes = storage.writes.count();
                     const crash_probability = simulator.options.replica_crash_probability *
                         @as(f64, if (replica_writes == 0) 1.0 else 10.0);
                     if (!chance_f64(simulator.random, crash_probability)) continue;
-                    const replica_crashed = simulator.cluster.crash_replica(replica.replica) catch |err| {
-                        log_simulator.err("{}: crash replica: unable to open after crash (err={})", .{
-                            replica.replica,
-                            err,
-                        });
-                        unreachable;
-                    };
-                    if (replica_crashed) {
-                        log_simulator.debug("{}: crash replica", .{replica.replica});
-                        crashes -= 1;
-                        simulator.replica_stability[replica.replica] =
-                            simulator.options.replica_crash_stability;
+                    const fault = recoverable_count > recoverable_count_min;
+                    replica.superblock.storage.faulty = fault;
+                    if (!fault) {
+                        // The journal writes redundant headers of faulty ops as zeroes to ensure
+                        // that they remain faulty after a crash/recover. Since that fault cannot
+                        // be disabled by `storage.faulty`, we must manually repair it here to
+                        // ensure a cluster cannot become stuck in status=recovering_head.
+                        // See recover_slots() for more detail.
+                        const offset = vsr.Zone.wal_headers.offset(0);
+                        const size = vsr.Zone.wal_headers.size().?;
+                        const headers_bytes = storage.memory[offset..][0..size];
+                        const headers = mem.bytesAsSlice(vsr.Header, headers_bytes);
+                        for (headers) |*h, slot| {
+                            if (h.checksum == 0) h.* = storage.wal_prepares()[slot].header;
+                        }
                     }
+                    log_simulator.debug("{}: crash replica (faults={})", .{ replica.replica, fault });
+                    simulator.cluster.crash_replica(replica.replica) catch unreachable;
+                    replica.superblock.storage.faulty = true;
+                    recoverable_count -= @boolToInt(replica.status == .recovering_head);
+                    assert(replica.status != .recovering_head or fault);
+                    simulator.replica_stability[replica.replica] =
+                        simulator.options.replica_crash_stability;
                 },
                 .down => {
-                    assert(replica.status == .recovering);
                     if (chance_f64(simulator.random, simulator.options.replica_restart_probability)) {
                         simulator.cluster.restart_replica(replica.replica);
                         log_simulator.debug("{}: restart replica", .{replica.replica});

package/src/tigerbeetle/src/testing/cluster/state_checker.zig CHANGED Viewed

@@ -72,7 +72,7 @@ pub fn StateCheckerType(comptime Client: type, comptime Replica: type) type {
             const commit_b = replica.commit_min;
             const header_b = replica.journal.header_with_op(replica.commit_min);
-            assert(header_b != null or replica.commit_min == replica.op_checkpoint);
+            assert(header_b != null or replica.commit_min == replica.op_checkpoint());
             assert(header_b == null or header_b.?.op == commit_b);
             const checksum_a = state_checker.commits.items[commit_a].header.checksum;

package/src/tigerbeetle/src/testing/cluster/storage_checker.zig CHANGED Viewed

@@ -14,7 +14,7 @@
 //! - Acquired Grid blocks
 //!
 //! Areas not verified:
-//! - SuperBlock sectors, which hold replica-specific state.
+//! - SuperBlock headers, which hold replica-specific state.
 //! - WAL headers, which may differ because the WAL writes deliberately corrupt redundant headers
 //!   to faulty slots to ensure recovery is consistent.
 //! - Non-allocated Grid blocks, which may differ due to state transfer.
@@ -25,7 +25,7 @@ const log = std.log.scoped(.storage_checker);
 const constants = @import("../../constants.zig");
 const vsr = @import("../../vsr.zig");
 const superblock = @import("../../vsr/superblock.zig");
-const SuperBlockSector = superblock.SuperBlockSector;
+const SuperBlockHeader = superblock.SuperBlockHeader;
 const Storage = @import("../storage.zig").Storage;
 /// After each compaction half measure, save the cumulative hash of all acquired grid blocks.
@@ -44,7 +44,7 @@ const Checkpoints = std.AutoHashMap(u64, Checkpoint);
 const Checkpoint = struct {
     // The superblock trailers are an XOR of all copies of all respective trailers, not the
-    // `SuperBlockSector.{trailer}_checksum`.
+    // `SuperBlockHeader.{trailer}_checksum`.
     checksum_superblock_manifest: u128,
     checksum_superblock_free_set: u128,
     checksum_superblock_client_table: u128,
@@ -145,15 +145,15 @@ pub fn StorageCheckerType(comptime Replica: type) type {
             inline for (std.meta.fields(Checkpoint)) |field| {
                 log.debug("{}: replica_checkpoint: checkpoint={} area={s} value={}", .{
                     replica.replica,
-                    replica.op_checkpoint,
+                    replica.op_checkpoint(),
                     field.name,
                     @field(checkpoint, field.name),
                 });
             }
-            const checkpoint_expect = checker.checkpoints.get(replica.op_checkpoint) orelse {
+            const checkpoint_expect = checker.checkpoints.get(replica.op_checkpoint()) orelse {
                 // This replica is the first to reach op_checkpoint.
-                try checker.checkpoints.putNoClobber(replica.op_checkpoint, checkpoint);
+                try checker.checkpoints.putNoClobber(replica.op_checkpoint(), checkpoint);
                 return;
             };

package/src/tigerbeetle/src/testing/cluster.zig CHANGED Viewed

@@ -142,7 +142,7 @@ pub fn ClusterType(comptime StateMachineType: fn (comptime Storage: type, compti
                 storage_options.replica_index = @intCast(u8, replica_index);
                 storage_options.fault_atlas = storage_fault_atlas;
                 storage.* = try Storage.init(allocator, options.storage_size_limit, storage_options);
-                // Disable most faults at startup, so that the replicas don't get stuck in recovery mode.
+                // Disable most faults at startup, so that the replicas don't get stuck recovering_head.
                 storage.faulty = replica_index >= vsr.quorums(options.replica_count).view_change;
             }
             errdefer for (storages) |*storage| storage.deinit(allocator);
@@ -301,99 +301,17 @@ pub fn ClusterType(comptime StateMachineType: fn (comptime Storage: type, compti
         ///
         /// Returns whether the replica was crashed.
         /// Returns an error when the replica was unable to recover (open).
-        pub fn crash_replica(cluster: *Self, replica_index: u8) !bool {
+        pub fn crash_replica(cluster: *Self, replica_index: u8) !void {
             assert(cluster.replica_health[replica_index] == .up);
-            const replica = &cluster.replicas[replica_index];
-            if (replica.op == 0) {
-                // Only crash when `replica.op > 0` — an empty WAL would skip recovery after a crash.
-                return false;
-            }
-            // TODO Remove this workaround when VSR recovery protocol is disabled.
-            for (replica.journal.prepare_inhabited) |inhabited, i| {
-                if (i == 0) {
-                    // Ignore the root header.
-                } else {
-                    if (inhabited) break;
-                }
-            } else {
-                // Only crash when at least one header has been written to the WAL.
-                // An empty WAL would skip recovery after a crash.
-                return false;
-            }
-            // Ensure that the cluster can eventually recover without this replica.
-            // Verify that each op is recoverable by the current healthy cluster (minus the replica we
-            // are trying to crash).
-            // TODO Remove this workaround when VSR recovery protocol is disabled.
-            if (cluster.options.replica_count != 1) {
-                var parent: u128 = undefined;
-                const cluster_op_max = op_max: {
-                    var v: ?u32 = null;
-                    var op_max: ?u64 = null;
-                    for (cluster.replicas) |other_replica, i| {
-                        if (cluster.replica_health[i] == .down) continue;
-                        if (other_replica.status == .recovering) continue;
-                        if (v == null or other_replica.log_view > v.? or
-                            (other_replica.log_view == v.? and other_replica.op > op_max.?))
-                        {
-                            v = other_replica.log_view;
-                            op_max = other_replica.op;
-                            parent = other_replica.journal.header_with_op(op_max.?).?.checksum;
-                        }
-                    }
-                    break :op_max op_max.?;
-                };
-                // This whole workaround doesn't handle log wrapping correctly.
-                // If the log has wrapped, don't crash the replica.
-                if (cluster_op_max >= constants.journal_slot_count) {
-                    return false;
-                }
-                var op: u64 = cluster_op_max + 1;
-                while (op > 0) {
-                    op -= 1;
-                    var cluster_op_known: bool = false;
-                    for (cluster.replicas) |other_replica, i| {
-                        // Ignore replicas that are ineligible to assist recovery.
-                        if (replica_index == i) continue;
-                        if (cluster.replica_health[i] == .down) continue;
-                        if (other_replica.status == .recovering) continue;
-                        if (other_replica.journal.header_with_op_and_checksum(op, parent)) |header| {
-                            parent = header.parent;
-                            if (!other_replica.journal.dirty.bit(.{ .index = op })) {
-                                // The op is recoverable if this replica crashes.
-                                break;
-                            }
-                            cluster_op_known = true;
-                        }
-                    } else {
-                        if (op == cluster_op_max and !cluster_op_known) {
-                            // The replica can crash; it will be able to truncate the last op.
-                        } else {
-                            // The op isn't recoverable if this replica is crashed.
-                            return false;
-                        }
-                    }
-                }
-                // We can't crash this replica because without it we won't be able to repair a broken
-                // hash chain.
-                if (parent != 0) return false;
-            }
-            cluster.replica_health[replica_index] = .down;
             // Reset the storage before the replica so that pending writes can (partially) finish.
             cluster.storages[replica_index].reset();
+            const replica = &cluster.replicas[replica_index];
             const replica_time = replica.time;
             replica.deinit(cluster.allocator);
             cluster.network.process_disable(.{ .replica = replica_index });
+            cluster.replica_health[replica_index] = .down;
             // Ensure that none of the replica's messages leaked when it was deinitialized.
             var messages_in_pool: usize = 0;
@@ -411,18 +329,6 @@ pub fn ClusterType(comptime StateMachineType: fn (comptime Storage: type, compti
             // Pass the old replica's Time through to the new replica. It will continue to tick while
             // the replica is crashed, to ensure the clocks don't desyncronize too far to recover.
             try cluster.open_replica(replica_index, replica_time);
-            return true;
-        }
-        /// Returns the number of replicas capable of helping a crashed node recover (i.e. with
-        /// replica.status=normal).
-        pub fn replica_normal_count(cluster: *const Self) u8 {
-            var count: u8 = 0;
-            for (cluster.replicas) |*replica| {
-                if (replica.status == .normal) count += 1;
-            }
-            return count;
         }
         fn open_replica(cluster: *Self, replica_index: u8, time: Time) !void {