npm - tigerbeetle-node - Versions diffs - 0.8.1 → 0.9.143 - Mend

tigerbeetle-node 0.8.1 → 0.9.143

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/README.md +584 -184
package/dist/benchmark.js +59 -51
package/dist/benchmark.js.map +1 -1
package/dist/bin/aarch64-linux-gnu/client.node +0 -0
package/dist/bin/aarch64-linux-musl/client.node +0 -0
package/dist/bin/aarch64-macos/client.node +0 -0
package/dist/bin/x86_64-linux-gnu/client.node +0 -0
package/dist/bin/x86_64-linux-musl/client.node +0 -0
package/dist/bin/x86_64-macos/client.node +0 -0
package/dist/bin/x86_64-windows/client.node +0 -0
package/dist/bindings.d.ts +141 -0
package/dist/bindings.js +112 -0
package/dist/bindings.js.map +1 -0
package/dist/index.d.ts +2 -125
package/dist/index.js +51 -101
package/dist/index.js.map +1 -1
package/dist/test.js +69 -55
package/dist/test.js.map +1 -1
package/package-lock.json +26 -0
package/package.json +17 -28
package/src/benchmark.ts +58 -49
package/src/bindings.ts +631 -0
package/src/index.ts +71 -163
package/src/node.zig +169 -148
package/src/test.ts +71 -57
package/src/translate.zig +19 -36
package/.yarn/releases/yarn-berry.cjs +0 -55
package/.yarnrc.yml +0 -1
package/scripts/download_node_headers.sh +0 -25
package/scripts/postinstall.sh +0 -6
package/src/tigerbeetle/scripts/benchmark.bat +0 -46
package/src/tigerbeetle/scripts/benchmark.sh +0 -55
package/src/tigerbeetle/scripts/install.sh +0 -6
package/src/tigerbeetle/scripts/install_zig.bat +0 -109
package/src/tigerbeetle/scripts/install_zig.sh +0 -84
package/src/tigerbeetle/scripts/lint.zig +0 -199
package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -39
package/src/tigerbeetle/scripts/vopr.bat +0 -48
package/src/tigerbeetle/scripts/vopr.sh +0 -33
package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
package/src/tigerbeetle/src/benchmark.zig +0 -290
package/src/tigerbeetle/src/cli.zig +0 -244
package/src/tigerbeetle/src/config.zig +0 -239
package/src/tigerbeetle/src/demo.zig +0 -125
package/src/tigerbeetle/src/demo_01_create_accounts.zig +0 -35
package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +0 -7
package/src/tigerbeetle/src/demo_03_create_transfers.zig +0 -24
package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +0 -61
package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +0 -37
package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +0 -24
package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +0 -7
package/src/tigerbeetle/src/fifo.zig +0 -104
package/src/tigerbeetle/src/io/benchmark.zig +0 -213
package/src/tigerbeetle/src/io/darwin.zig +0 -793
package/src/tigerbeetle/src/io/linux.zig +0 -1038
package/src/tigerbeetle/src/io/test.zig +0 -643
package/src/tigerbeetle/src/io/windows.zig +0 -1161
package/src/tigerbeetle/src/io.zig +0 -34
package/src/tigerbeetle/src/main.zig +0 -144
package/src/tigerbeetle/src/message_bus.zig +0 -1000
package/src/tigerbeetle/src/message_pool.zig +0 -142
package/src/tigerbeetle/src/ring_buffer.zig +0 -289
package/src/tigerbeetle/src/simulator.zig +0 -417
package/src/tigerbeetle/src/state_machine.zig +0 -2470
package/src/tigerbeetle/src/storage.zig +0 -308
package/src/tigerbeetle/src/test/cluster.zig +0 -351
package/src/tigerbeetle/src/test/message_bus.zig +0 -93
package/src/tigerbeetle/src/test/network.zig +0 -179
package/src/tigerbeetle/src/test/packet_simulator.zig +0 -387
package/src/tigerbeetle/src/test/state_checker.zig +0 -145
package/src/tigerbeetle/src/test/state_machine.zig +0 -76
package/src/tigerbeetle/src/test/storage.zig +0 -438
package/src/tigerbeetle/src/test/time.zig +0 -84
package/src/tigerbeetle/src/tigerbeetle.zig +0 -222
package/src/tigerbeetle/src/time.zig +0 -113
package/src/tigerbeetle/src/unit_tests.zig +0 -14
package/src/tigerbeetle/src/vsr/client.zig +0 -505
package/src/tigerbeetle/src/vsr/clock.zig +0 -812
package/src/tigerbeetle/src/vsr/journal.zig +0 -2293
package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
package/src/tigerbeetle/src/vsr/replica.zig +0 -5015
package/src/tigerbeetle/src/vsr.zig +0 -1017
package/yarn.lock +0 -42

package/src/tigerbeetle/src/storage.zig DELETED Viewed

@@ -1,308 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const os = std.os;
-const Allocator = std.mem.Allocator;
-const assert = std.debug.assert;
-const log = std.log.scoped(.storage);
-const IO = @import("io.zig").IO;
-const config = @import("config.zig");
-const vsr = @import("vsr.zig");
-pub const Storage = struct {
-    /// See usage in Journal.write_sectors() for details.
-    pub const synchronicity: enum {
-        always_synchronous,
-        always_asynchronous,
-    } = .always_asynchronous;
-    pub const Read = struct {
-        completion: IO.Completion,
-        callback: fn (read: *Storage.Read) void,
-        /// The buffer to read into, re-sliced and re-assigned as we go, e.g. after partial reads.
-        buffer: []u8,
-        /// The position into the file descriptor from where we should read, also adjusted as we go.
-        offset: u64,
-        /// The maximum amount of bytes to read per syscall. We use this to subdivide troublesome
-        /// reads into smaller reads to work around latent sector errors (LSEs).
-        target_max: u64,
-        /// Returns a target slice into `buffer` to read into, capped by `target_max`.
-        /// If the previous read was a partial read of physical sectors (e.g. 512 bytes) less than
-        /// our logical sector size (e.g. 4 KiB), so that the remainder of the buffer is no longer
-        /// aligned to a logical sector, then we further cap the slice to get back onto a logical
-        /// sector boundary.
-        fn target(read: *Read) []u8 {
-            // A worked example of a partial read that leaves the rest of the buffer unaligned:
-            // This could happen for non-Advanced Format disks with a physical sector of 512 bytes.
-            // We want to read 8 KiB:
-            //     buffer.ptr = 0
-            //     buffer.len = 8192
-            // ... and then experience a partial read of only 512 bytes:
-            //     buffer.ptr = 512
-            //     buffer.len = 7680
-            // We can now see that `buffer.len` is no longer a sector multiple of 4 KiB and further
-            // that we have 3584 bytes left of the partial sector read. If we subtract this amount
-            // from our logical sector size of 4 KiB we get 512 bytes, which is the alignment error
-            // that we need to subtract from `target_max` to get back onto the boundary.
-            var max = read.target_max;
-            const partial_sector_read_remainder = read.buffer.len % config.sector_size;
-            if (partial_sector_read_remainder != 0) {
-                // TODO log.debug() because this is interesting, and to ensure fuzz test coverage.
-                const partial_sector_read = config.sector_size - partial_sector_read_remainder;
-                max -= partial_sector_read;
-            }
-            return read.buffer[0..std.math.min(read.buffer.len, max)];
-        }
-    };
-    pub const Write = struct {
-        completion: IO.Completion,
-        callback: fn (write: *Storage.Write) void,
-        buffer: []const u8,
-        offset: u64,
-    };
-    size: u64,
-    fd: os.fd_t,
-    io: *IO,
-    pub fn init(size: u64, fd: os.fd_t, io: *IO) !Storage {
-        return Storage{
-            .size = size,
-            .fd = fd,
-            .io = io,
-        };
-    }
-    pub fn deinit() void {}
-    pub fn read_sectors(
-        self: *Storage,
-        callback: fn (read: *Storage.Read) void,
-        read: *Storage.Read,
-        buffer: []u8,
-        offset: u64,
-    ) void {
-        assert_alignment(buffer, offset);
-        read.* = .{
-            .completion = undefined,
-            .callback = callback,
-            .buffer = buffer,
-            .offset = offset,
-            .target_max = buffer.len,
-        };
-        self.start_read(read, 0);
-    }
-    fn start_read(self: *Storage, read: *Storage.Read, bytes_read: usize) void {
-        assert(bytes_read <= read.target().len);
-        read.offset += bytes_read;
-        read.buffer = read.buffer[bytes_read..];
-        const target = read.target();
-        if (target.len == 0) {
-            read.callback(read);
-            return;
-        }
-        self.assert_bounds(target, read.offset);
-        self.io.read(
-            *Storage,
-            self,
-            on_read,
-            &read.completion,
-            self.fd,
-            target,
-            read.offset,
-        );
-    }
-    fn on_read(self: *Storage, completion: *IO.Completion, result: IO.ReadError!usize) void {
-        const read = @fieldParentPtr(Storage.Read, "completion", completion);
-        const bytes_read = result catch |err| switch (err) {
-            error.InputOutput => {
-                // The disk was unable to read some sectors (an internal CRC or hardware failure):
-                // We may also have already experienced a partial unaligned read, reading less
-                // physical sectors than the logical sector size, so we cannot expect `target.len`
-                // to be an exact logical sector multiple.
-                const target = read.target();
-                if (target.len > config.sector_size) {
-                    // We tried to read more than a logical sector and failed.
-                    log.err("latent sector error: offset={}, subdividing read...", .{read.offset});
-                    // Divide the buffer in half and try to read each half separately:
-                    // This creates a recursive binary search for the sector(s) causing the error.
-                    // This is considerably slower than doing a single bulk read and by now we might
-                    // also have experienced the disk's read retry timeout (in seconds).
-                    // TODO Our docs must instruct on why and how to reduce disk firmware timeouts.
-                    // These lines both implement ceiling division e.g. `((3 - 1) / 2) + 1 == 2` and
-                    // require that the numerator is always greater than zero:
-                    assert(target.len > 0);
-                    const target_sectors = @divFloor(target.len - 1, config.sector_size) + 1;
-                    assert(target_sectors > 0);
-                    read.target_max = (@divFloor(target_sectors - 1, 2) + 1) * config.sector_size;
-                    assert(read.target_max >= config.sector_size);
-                    // Pass 0 for `bytes_read`, we want to retry the read with smaller `target_max`:
-                    self.start_read(read, 0);
-                    return;
-                } else {
-                    // We tried to read at (or less than) logical sector granularity and failed.
-                    log.err("latent sector error: offset={}, zeroing sector...", .{read.offset});
-                    // Zero this logical sector which can't be read:
-                    // We will treat these EIO errors the same as a checksum failure.
-                    // TODO This could be an interesting avenue to explore further, whether
-                    // temporary or permanent EIO errors should be conflated with checksum failures.
-                    assert(target.len > 0);
-                    std.mem.set(u8, target, 0);
-                    // We could set `read.target_max` to `vsr.sector_ceil(read.buffer.len)` here
-                    // in order to restart our pseudo-binary search on the rest of the sectors to be
-                    // read, optimistically assuming that this is the last failing sector.
-                    // However, data corruption that causes EIO errors often has spacial locality.
-                    // Therefore, restarting our pseudo-binary search here might give us abysmal
-                    // performance in the (not uncommon) case of many successive failing sectors.
-                    self.start_read(read, target.len);
-                    return;
-                }
-            },
-            error.WouldBlock,
-            error.NotOpenForReading,
-            error.ConnectionResetByPeer,
-            error.Alignment,
-            error.IsDir,
-            error.SystemResources,
-            error.Unseekable,
-            error.Unexpected,
-            => {
-                log.err(
-                    "impossible read: offset={} buffer.len={} error={s}",
-                    .{ read.offset, read.buffer.len, @errorName(err) },
-                );
-                @panic("impossible read");
-            },
-        };
-        if (bytes_read == 0) {
-            // We tried to read more than there really is available to read.
-            // In other words, we thought we could read beyond the end of the file descriptor.
-            // This can happen if the data file inode `size` was truncated or corrupted.
-            log.err(
-                "short read: buffer.len={} offset={} bytes_read={}",
-                .{ read.offset, read.buffer.len, bytes_read },
-            );
-            @panic("data file inode size was truncated or corrupted");
-        }
-        // If our target was limited to a single sector, perhaps because of a latent sector error,
-        // then increase `target_max` according to AIMD now that we have read successfully and
-        // hopefully cleared the faulty zone.
-        // We assume that `target_max` may exceed `read.buffer.len` at any time.
-        if (read.target_max == config.sector_size) {
-            // TODO Add log.debug because this is interesting.
-            read.target_max += config.sector_size;
-        }
-        self.start_read(read, bytes_read);
-    }
-    pub fn write_sectors(
-        self: *Storage,
-        callback: fn (write: *Storage.Write) void,
-        write: *Storage.Write,
-        buffer: []const u8,
-        offset: u64,
-    ) void {
-        assert_alignment(buffer, offset);
-        write.* = .{
-            .completion = undefined,
-            .callback = callback,
-            .buffer = buffer,
-            .offset = offset,
-        };
-        self.start_write(write);
-    }
-    fn start_write(self: *Storage, write: *Storage.Write) void {
-        self.assert_bounds(write.buffer, write.offset);
-        self.io.write(
-            *Storage,
-            self,
-            on_write,
-            &write.completion,
-            self.fd,
-            write.buffer,
-            write.offset,
-        );
-    }
-    fn on_write(self: *Storage, completion: *IO.Completion, result: IO.WriteError!usize) void {
-        const write = @fieldParentPtr(Storage.Write, "completion", completion);
-        const bytes_written = result catch |err| switch (err) {
-            // We assume that the disk will attempt to reallocate a spare sector for any LSE.
-            // TODO What if we receive a temporary EIO error because of a faulty cable?
-            error.InputOutput => @panic("latent sector error: no spare sectors to reallocate"),
-            // TODO: It seems like it might be possible for some filesystems to return ETIMEDOUT
-            // here. Consider handling this without panicking.
-            else => {
-                log.err(
-                    "impossible write: offset={} buffer.len={} error={s}",
-                    .{ write.offset, write.buffer.len, @errorName(err) },
-                );
-                @panic("impossible write");
-            },
-        };
-        if (bytes_written == 0) {
-            // This should never happen if the kernel and filesystem are well behaved.
-            // However, block devices are known to exhibit this behavior in the wild.
-            // TODO: Consider retrying with a timeout if this panic proves problematic, and be
-            // careful to avoid logging in a busy loop. Perhaps a better approach might be to
-            // return wrote = null here and let the protocol retry at a higher layer where there is
-            // more context available to decide on how important this is or whether to cancel.
-            @panic("write operation returned 0 bytes written");
-        }
-        write.offset += bytes_written;
-        write.buffer = write.buffer[bytes_written..];
-        if (write.buffer.len == 0) {
-            write.callback(write);
-            return;
-        }
-        self.start_write(write);
-    }
-    /// Ensures that the read or write is aligned correctly for Direct I/O.
-    /// If this is not the case, then the underlying syscall will return EINVAL.
-    /// We check this only at the start of a read or write because the physical sector size may be
-    /// less than our logical sector size so that partial IOs then leave us no longer aligned.
-    fn assert_alignment(buffer: []const u8, offset: u64) void {
-        assert(@ptrToInt(buffer.ptr) % config.sector_size == 0);
-        assert(buffer.len % config.sector_size == 0);
-        assert(offset % config.sector_size == 0);
-    }
-    /// Ensures that the read or write is within bounds and intends to read or write some bytes.
-    fn assert_bounds(self: *Storage, buffer: []const u8, offset: u64) void {
-        assert(buffer.len > 0);
-        assert(offset + buffer.len <= self.size);
-    }
-};

package/src/tigerbeetle/src/test/cluster.zig DELETED Viewed

@@ -1,351 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const mem = std.mem;
-const config = @import("../config.zig");
-const StateChecker = @import("state_checker.zig").StateChecker;
-const message_pool = @import("../message_pool.zig");
-const MessagePool = message_pool.MessagePool;
-const Message = MessagePool.Message;
-const Network = @import("network.zig").Network;
-const NetworkOptions = @import("network.zig").NetworkOptions;
-pub const StateMachine = @import("state_machine.zig").StateMachine;
-const MessageBus = @import("message_bus.zig").MessageBus;
-const Storage = @import("storage.zig").Storage;
-const Time = @import("time.zig").Time;
-const vsr = @import("../vsr.zig");
-pub const Replica = vsr.Replica(StateMachine, MessageBus, Storage, Time);
-pub const Client = vsr.Client(StateMachine, MessageBus);
-pub const ClusterOptions = struct {
-    cluster: u32,
-    replica_count: u8,
-    client_count: u8,
-    seed: u64,
-    network_options: NetworkOptions,
-    storage_options: Storage.Options,
-    health_options: HealthOptions,
-};
-pub const HealthOptions = struct {
-    /// Probability per tick that a crash will occur.
-    crash_probability: f64,
-    /// Minimum duration of a crash.
-    crash_stability: u32,
-    /// Probability per tick that a crashed replica will recovery.
-    restart_probability: f64,
-    /// Minimum time a replica is up until it is crashed again.
-    restart_stability: u32,
-};
-pub const ReplicaHealth = union(enum) {
-    /// When >0, the replica cannot crash.
-    /// When =0, the replica may crash.
-    up: u32,
-    /// When >0, this is the ticks remaining until recovery is possible.
-    /// When =0, the replica may recover.
-    down: u32,
-};
-pub const Cluster = struct {
-    allocator: mem.Allocator,
-    options: ClusterOptions,
-    state_machines: []StateMachine,
-    storages: []Storage,
-    times: []Time,
-    replicas: []Replica,
-    health: []ReplicaHealth,
-    clients: []Client,
-    network: Network,
-    // TODO: Initializing these fields in main() is a bit ugly
-    state_checker: StateChecker = undefined,
-    on_change_state: fn (replica: *Replica) void = undefined,
-    pub fn create(allocator: mem.Allocator, prng: std.rand.Random, options: ClusterOptions) !*Cluster {
-        assert(options.replica_count > 0);
-        assert(options.health_options.crash_probability < 1.0);
-        assert(options.health_options.crash_probability >= 0.0);
-        assert(options.health_options.restart_probability < 1.0);
-        assert(options.health_options.restart_probability >= 0.0);
-        const cluster = try allocator.create(Cluster);
-        errdefer allocator.destroy(cluster);
-        const state_machines = try allocator.alloc(StateMachine, options.replica_count);
-        errdefer allocator.free(state_machines);
-        const storages = try allocator.alloc(Storage, options.replica_count);
-        errdefer allocator.free(storages);
-        const times = try allocator.alloc(Time, options.replica_count);
-        errdefer allocator.free(times);
-        const replicas = try allocator.alloc(Replica, options.replica_count);
-        errdefer allocator.free(replicas);
-        const health = try allocator.alloc(ReplicaHealth, options.replica_count);
-        errdefer allocator.free(health);
-        mem.set(ReplicaHealth, health, .{ .up = 0 });
-        const clients = try allocator.alloc(Client, options.client_count);
-        errdefer allocator.free(clients);
-        var network = try Network.init(
-            allocator,
-            options.replica_count,
-            options.client_count,
-            options.network_options,
-        );
-        errdefer network.deinit();
-        cluster.* = .{
-            .allocator = allocator,
-            .options = options,
-            .state_machines = state_machines,
-            .storages = storages,
-            .times = times,
-            .replicas = replicas,
-            .health = health,
-            .clients = clients,
-            .network = network,
-        };
-        var buffer: [config.replicas_max]Storage.FaultyAreas = undefined;
-        const faulty_areas = Storage.generate_faulty_areas(prng, config.journal_size_max, options.replica_count, &buffer);
-        assert(faulty_areas.len == options.replica_count);
-        for (cluster.replicas) |*replica, replica_index| {
-            cluster.times[replica_index] = .{
-                .resolution = config.tick_ms * std.time.ns_per_ms,
-                .offset_type = .linear,
-                .offset_coefficient_A = 0,
-                .offset_coefficient_B = 0,
-            };
-            cluster.state_machines[replica_index] = StateMachine.init(options.seed);
-            cluster.storages[replica_index] = try Storage.init(
-                allocator,
-                config.journal_size_max,
-                options.storage_options,
-                @intCast(u8, replica_index),
-                faulty_areas[replica_index],
-            );
-            const message_bus = try cluster.network.init_message_bus(
-                options.cluster,
-                .{ .replica = @intCast(u8, replica_index) },
-            );
-            replica.* = try Replica.init(
-                allocator,
-                options.cluster,
-                options.replica_count,
-                @intCast(u8, replica_index),
-                &cluster.times[replica_index],
-                &cluster.storages[replica_index],
-                message_bus,
-                &cluster.state_machines[replica_index],
-            );
-            message_bus.set_on_message(*Replica, replica, Replica.on_message);
-        }
-        {
-            // Format the WAL (equivalent to "tigerbeetle init ...").
-            for (cluster.storages) |storage| {
-                const write_size = vsr.format_journal(options.cluster, 0, storage.memory);
-                assert(write_size == storage.memory.len);
-                assert(write_size == config.journal_size_max);
-            }
-        }
-        for (cluster.clients) |*client| {
-            const client_id = prng.int(u128);
-            const client_message_bus = try cluster.network.init_message_bus(
-                options.cluster,
-                .{ .client = client_id },
-            );
-            client.* = try Client.init(
-                allocator,
-                client_id,
-                options.cluster,
-                options.replica_count,
-                client_message_bus,
-            );
-            client_message_bus.set_on_message(*Client, client, Client.on_message);
-        }
-        return cluster;
-    }
-    pub fn destroy(cluster: *Cluster) void {
-        for (cluster.clients) |*client| client.deinit();
-        cluster.allocator.free(cluster.clients);
-        for (cluster.replicas) |*replica| replica.deinit(cluster.allocator);
-        cluster.allocator.free(cluster.replicas);
-        cluster.allocator.free(cluster.health);
-        for (cluster.storages) |*storage| storage.deinit(cluster.allocator);
-        cluster.allocator.free(cluster.storages);
-        cluster.network.deinit();
-        cluster.allocator.destroy(cluster);
-    }
-    /// Reset a replica to its initial state, simulating a random crash/panic.
-    /// Leave the persistent storage untouched, and leave any currently
-    /// inflight messages to/from the replica in the network.
-    ///
-    /// Returns whether the replica was crashed.
-    pub fn crash_replica(cluster: *Cluster, replica_index: u8) !bool {
-        const replica = &cluster.replicas[replica_index];
-        if (replica.op == 0) {
-            // Only crash when `replica.op > 0` — an empty WAL would skip recovery after a crash.
-            return false;
-        }
-        // Ensure that the replica can eventually recover without this replica.
-        // Verify that each op is recoverable by the current healthy cluster (minus the replica we
-        // are trying to crash).
-        // TODO Remove this workaround when VSR recovery protocol is disabled.
-        if (cluster.options.replica_count != 1) {
-            var parent: u128 = undefined;
-            const cluster_op_max = op_max: {
-                var v: ?u32 = null;
-                var op_max: ?u64 = null;
-                for (cluster.replicas) |other_replica, i| {
-                    if (cluster.health[i] == .down) continue;
-                    if (other_replica.status == .recovering) continue;
-                    if (v == null or other_replica.view_normal > v.? or
-                        (other_replica.view_normal == v.? and other_replica.op > op_max.?))
-                    {
-                        v = other_replica.view_normal;
-                        op_max = other_replica.op;
-                        parent = other_replica.journal.header_with_op(op_max.?).?.checksum;
-                    }
-                }
-                break :op_max op_max.?;
-            };
-            // TODO This workaround doesn't handle log wrapping correctly.
-            assert(cluster_op_max < config.journal_slot_count);
-            var op: u64 = cluster_op_max + 1;
-            while (op > 0) {
-                op -= 1;
-                var cluster_op_known: bool = false;
-                for (cluster.replicas) |other_replica, i| {
-                    // Ignore replicas that are ineligible to assist recovery.
-                    if (replica_index == i) continue;
-                    if (cluster.health[i] == .down) continue;
-                    if (other_replica.status == .recovering) continue;
-                    if (other_replica.journal.header_with_op_and_checksum(op, parent)) |header| {
-                        parent = header.parent;
-                        if (!other_replica.journal.dirty.bit(.{ .index = op })) {
-                            // The op is recoverable if this replica crashes.
-                            break;
-                        }
-                        cluster_op_known = true;
-                    }
-                } else {
-                    if (op == cluster_op_max and !cluster_op_known) {
-                        // The replica can crash; it will be able to truncate the last op.
-                    } else {
-                        // The op isn't recoverable if this replica is crashed.
-                        return false;
-                    }
-                }
-            }
-            // We can't crash this replica because without it we won't be able to repair a broken
-            // hash chain.
-            if (parent != 0) return false;
-        }
-        cluster.health[replica_index] = .{ .down = cluster.options.health_options.crash_stability };
-        // Reset the storage before the replica so that pending writes can (partially) finish.
-        cluster.storages[replica_index].reset();
-        replica.deinit(cluster.allocator);
-        cluster.state_machines[replica_index] = StateMachine.init(cluster.options.seed);
-        // The message bus and network should be left alone, as messages
-        // may still be inflight to/from this replica. However, we should
-        // do a check to ensure that we aren't leaking any messages when
-        // deinitializing the replica above.
-        const packet_simulator = &cluster.network.packet_simulator;
-        // The same message may be used for multiple network packets, so simply counting how
-        // many packets are inflight from the replica is insufficient, we need to dedup them.
-        var messages_in_network_set = std.AutoHashMap(*Message, void).init(cluster.allocator);
-        defer messages_in_network_set.deinit();
-        var target: u8 = 0;
-        while (target < packet_simulator.options.node_count) : (target += 1) {
-            const path = .{ .source = replica_index, .target = target };
-            const queue = packet_simulator.path_queue(path);
-            var it = queue.iterator();
-            while (it.next()) |data| {
-                try messages_in_network_set.put(data.packet.message, {});
-            }
-        }
-        const messages_in_network = messages_in_network_set.count();
-        var messages_in_pool: usize = 0;
-        const message_bus = cluster.network.get_message_bus(.{ .replica = replica_index });
-        {
-            var it = message_bus.pool.free_list;
-            while (it) |message| : (it = message.next) messages_in_pool += 1;
-        }
-        const total_messages = message_pool.messages_max_replica;
-        assert(messages_in_network + messages_in_pool == total_messages);
-        replica.* = try Replica.init(
-            cluster.allocator,
-            cluster.options.cluster,
-            cluster.options.replica_count,
-            @intCast(u8, replica_index),
-            &cluster.times[replica_index],
-            &cluster.storages[replica_index],
-            message_bus,
-            &cluster.state_machines[replica_index],
-        );
-        message_bus.set_on_message(*Replica, replica, Replica.on_message);
-        replica.on_change_state = cluster.on_change_state;
-        return true;
-    }
-    /// Returns the number of replicas capable of helping a crashed node recover (i.e. with
-    /// replica.status=normal).
-    pub fn replica_normal_count(cluster: *Cluster) u8 {
-        var count: u8 = 0;
-        for (cluster.replicas) |*replica| {
-            if (replica.status == .normal) count += 1;
-        }
-        return count;
-    }
-    pub fn replica_up_count(cluster: *const Cluster) u8 {
-        var count: u8 = 0;
-        for (cluster.health) |health| {
-            if (health == .up) {
-                count += 1;
-            }
-        }
-        return count;
-    }
-};