npm - tigerbeetle-node - Versions diffs - 0.6.0 → 0.9.0 - Mend

tigerbeetle-node 0.6.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/README.md +102 -83
package/dist/benchmark.js +102 -100
package/dist/benchmark.js.map +1 -1
package/dist/index.d.ts +82 -82
package/dist/index.js +74 -93
package/dist/index.js.map +1 -1
package/dist/test.js +135 -112
package/dist/test.js.map +1 -1
package/package.json +13 -14
package/scripts/download_node_headers.sh +3 -1
package/src/benchmark.ts +114 -118
package/src/index.ts +102 -111
package/src/node.zig +53 -51
package/src/test.ts +146 -125
package/src/tigerbeetle/scripts/benchmark.bat +46 -46
package/src/tigerbeetle/scripts/benchmark.sh +5 -0
package/src/tigerbeetle/scripts/install_zig.bat +109 -109
package/src/tigerbeetle/scripts/install_zig.sh +7 -3
package/src/tigerbeetle/scripts/vopr.bat +47 -47
package/src/tigerbeetle/src/benchmark.zig +63 -96
package/src/tigerbeetle/src/config.zig +23 -19
package/src/tigerbeetle/src/demo.zig +2 -15
package/src/tigerbeetle/src/demo_01_create_accounts.zig +10 -10
package/src/tigerbeetle/src/demo_03_create_transfers.zig +5 -3
package/src/tigerbeetle/src/{demo_04_create_transfers_two_phase_commit.zig → demo_04_create_pending_transfers.zig} +18 -12
package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +37 -0
package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +24 -0
package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +1 -1
package/src/tigerbeetle/src/io/linux.zig +4 -4
package/src/tigerbeetle/src/main.zig +19 -3
package/src/tigerbeetle/src/message_pool.zig +5 -2
package/src/tigerbeetle/src/ring_buffer.zig +48 -3
package/src/tigerbeetle/src/simulator.zig +104 -8
package/src/tigerbeetle/src/state_machine.zig +1813 -816
package/src/tigerbeetle/src/test/cluster.zig +165 -32
package/src/tigerbeetle/src/test/packet_simulator.zig +14 -1
package/src/tigerbeetle/src/test/state_checker.zig +3 -1
package/src/tigerbeetle/src/test/state_machine.zig +8 -7
package/src/tigerbeetle/src/test/storage.zig +99 -40
package/src/tigerbeetle/src/tigerbeetle.zig +103 -98
package/src/tigerbeetle/src/vsr/journal.zig +1387 -459
package/src/tigerbeetle/src/vsr/replica.zig +1204 -417
package/src/tigerbeetle/src/vsr.zig +203 -49
package/src/translate.zig +10 -0
package/.yarn/releases/yarn-berry.cjs +0 -55
package/.yarnrc.yml +0 -1
package/scripts/postinstall.sh +0 -6
package/src/tigerbeetle/src/demo_05_accept_transfers.zig +0 -23
package/src/tigerbeetle/src/demo_06_reject_transfers.zig +0 -17
package/src/tigerbeetle/src/format_test.zig +0 -69
package/yarn.lock +0 -42

package/src/tigerbeetle/src/config.zig CHANGED Viewed

@@ -53,29 +53,27 @@ pub const transfers_max = switch (deployment_environment) {
     else => 1_000_000,
 };
-/// The maximum number of two-phase commits to store in memory:
+/// The maximum number of two-phase transfers to store in memory:
 /// This impacts the amount of memory allocated at initialization by the server.
-pub const commits_max = transfers_max;
-/// The maximum size of the journal file:
-/// This is pre-allocated and zeroed for performance when initialized.
-/// Writes within this file never extend the filesystem inode size reducing the cost of fdatasync().
-/// This enables static allocation of disk space so that appends cannot fail with ENOSPC.
-/// This also enables us to detect filesystem inode corruption that would change the journal size.
-pub const journal_size_max = switch (deployment_environment) {
-    .production => 128 * 1024 * 1024 * 1024,
-    else => 128 * 1024 * 1024,
-};
+pub const transfers_pending_max = transfers_max;
 /// The maximum number of batch entries in the journal file:
 /// A batch entry may contain many transfers, so this is not a limit on the number of transfers.
 /// We need this limit to allocate space for copies of batch headers at the start of the journal.
 /// These header copies enable us to disentangle corruption from crashes and recover accordingly.
-pub const journal_headers_max = switch (deployment_environment) {
-    .production => 1024 * 1024,
-    else => 16384,
+pub const journal_slot_count = switch (deployment_environment) {
+    .production => 1024,
+    else => 128,
 };
+/// The maximum size of the journal file:
+/// This is pre-allocated and zeroed for performance when initialized.
+/// Writes within this file never extend the filesystem inode size reducing the cost of fdatasync().
+/// This enables static allocation of disk space so that appends cannot fail with ENOSPC.
+/// This also enables us to detect filesystem inode corruption that would change the journal size.
+// TODO remove this; just allocate a part of the total storage for the journal
+pub const journal_size_max = journal_slot_count * (128 + message_size_max);
 /// The maximum number of connections that can be held open by the server at any time:
 pub const connections_max = replicas_max + clients_max;
@@ -92,7 +90,7 @@ pub const message_size_max = 1 * 1024 * 1024;
 /// The maximum number of Viewstamped Replication prepare messages that can be inflight at a time.
 /// This is immutable once assigned per cluster, as replicas need to know how many operations might
 /// possibly be uncommitted during a view change, and this must be constant for all replicas.
-pub const pipelining_max = clients_max;
+pub const pipeline_max = clients_max;
 /// The minimum and maximum amount of time in milliseconds to wait before initiating a connection.
 /// Exponential backoff and jitter are applied within this range.
@@ -224,12 +222,18 @@ pub const clock_synchronization_window_min_ms = 2000;
 /// If a window expires because of this then it is likely that the clock epoch will also be expired.
 pub const clock_synchronization_window_max_ms = 20000;
+// TODO Move these to a separate "internal computed constants" file.
+pub const journal_size_headers = journal_slot_count * 128; // 128 == @sizeOf(Header)
+pub const journal_size_prepares = journal_slot_count * message_size_max;
 comptime {
     // vsr.parse_address assumes that config.address/config.port are valid.
     _ = std.net.Address.parseIp4(address, 0) catch unreachable;
     _ = @as(u16, port);
-    // Avoid latency issues from a too-large sndbuf.
-    assert(tcp_sndbuf_replica <= 4 * 1024 * 1024);
-    assert(tcp_sndbuf_client <= 4 * 1024 * 1024);
+    // Avoid latency issues from setting sndbuf too high:
+    assert(tcp_sndbuf_replica <= 16 * 1024 * 1024);
+    assert(tcp_sndbuf_client <= 16 * 1024 * 1024);
+    assert(journal_size_max == journal_size_headers + journal_size_prepares);
 }

package/src/tigerbeetle/src/demo.zig CHANGED Viewed

@@ -6,11 +6,9 @@ const config = @import("config.zig");
 const tb = @import("tigerbeetle.zig");
 const Account = tb.Account;
 const Transfer = tb.Transfer;
-const Commit = tb.Commit;
 const CreateAccountsResult = tb.CreateAccountsResult;
 const CreateTransfersResult = tb.CreateTransfersResult;
-const CommitTransfersResult = tb.CommitTransfersResult;
 const IO = @import("io.zig").IO;
 const MessageBus = @import("message_bus.zig").MessageBusClient;
@@ -33,7 +31,7 @@ pub fn request(
 ) !void {
     const allocator = std.heap.page_allocator;
     const client_id = std.crypto.random.int(u128);
-    const cluster_id: u32 = 0;
+    const cluster_id: u32 = 1;
     var addresses = [_]std.net.Address{try std.net.Address.parseIp4("127.0.0.1", config.port)};
     var io = try IO.init(32, 0);
@@ -53,7 +51,7 @@ pub fn request(
     message_bus.set_on_message(*Client, &client, Client.on_message);
-    var message = client.get_message();
+    const message = client.get_message();
     defer client.unref(message);
     const body = std.mem.asBytes(&batch);
@@ -117,17 +115,6 @@ pub fn on_create_transfers(
     print_results(CreateTransfersResult, results);
 }
-pub fn on_commit_transfers(
-    user_data: u128,
-    operation: StateMachine.Operation,
-    results: Client.Error![]const u8,
-) void {
-    _ = user_data;
-    _ = operation;
-    print_results(CommitTransfersResult, results);
-}
 fn print_results(comptime Results: type, results: Client.Error![]const u8) void {
     const body = results catch unreachable;
     const slice = std.mem.bytesAsSlice(Results, body);

package/src/tigerbeetle/src/demo_01_create_accounts.zig CHANGED Viewed

@@ -9,25 +9,25 @@ pub fn main() !void {
             .id = 1,
             .user_data = 0,
             .reserved = [_]u8{0} ** 48,
-            .unit = 710, // Let's use the ISO-4217 Code Number for ZAR
+            .ledger = 710, // Let's use the ISO-4217 Code Number for ZAR
             .code = 1000, // A chart of accounts code to describe this as a clearing account.
             .flags = .{ .debits_must_not_exceed_credits = true },
-            .debits_reserved = 0,
-            .debits_accepted = 0,
-            .credits_reserved = 0,
-            .credits_accepted = 10000, // Let's start with some liquidity.
+            .debits_pending = 0,
+            .debits_posted = 0,
+            .credits_pending = 0,
+            .credits_posted = 10000, // Let's start with some liquidity.
         },
         Account{
             .id = 2,
             .user_data = 0,
             .reserved = [_]u8{0} ** 48,
-            .unit = 710, // Let's use the ISO-4217 Code Number for ZAR
+            .ledger = 710, // Let's use the ISO-4217 Code Number for ZAR
             .code = 2000, // A chart of accounts code to describe this as a payable account.
             .flags = .{},
-            .debits_reserved = 0,
-            .debits_accepted = 0,
-            .credits_reserved = 0,
-            .credits_accepted = 0,
+            .debits_pending = 0,
+            .debits_posted = 0,
+            .credits_pending = 0,
+            .credits_posted = 0,
         },
     };

package/src/tigerbeetle/src/demo_03_create_transfers.zig CHANGED Viewed

@@ -6,13 +6,15 @@ const Transfer = tb.Transfer;
 pub fn main() !void {
     const transfers = [_]Transfer{
         Transfer{
-            .id = 1000,
+            .id = 1,
             .debit_account_id = 1,
             .credit_account_id = 2,
             .user_data = 0,
-            .reserved = [_]u8{0} ** 32,
+            .reserved = 0,
+            .pending_id = 0,
             .timeout = 0,
-            .code = 0,
+            .ledger = 710, // Let's use the ISO-4217 Code Number for ZAR
+            .code = 1,
             .flags = .{},
             .amount = 1000,
         },

package/src/tigerbeetle/src/{demo_04_create_transfers_two_phase_commit.zig → demo_04_create_pending_transfers.zig} RENAMED Viewed

@@ -12,42 +12,48 @@ pub fn main() !void {
             .debit_account_id = 1,
             .credit_account_id = 2,
             .user_data = 0,
-            .reserved = [_]u8{0} ** 32,
+            .reserved = 0,
+            .pending_id = 0,
             .timeout = std.time.ns_per_hour,
-            .code = 0,
+            .ledger = 710,
+            .code = 1,
             .flags = .{
-                .two_phase_commit = true,
+                .pending = true, // Set this transfer to be two-phase.
             },
-            .amount = 9000,
+            .amount = 8000,
         },
         Transfer{
             .id = 1002,
             .debit_account_id = 1,
             .credit_account_id = 2,
             .user_data = 0,
-            .reserved = [_]u8{0} ** 32,
+            .reserved = 0,
+            .pending_id = 0,
             .timeout = std.time.ns_per_hour,
-            .code = 0,
+            .ledger = 710,
+            .code = 1,
             .flags = .{
-                .two_phase_commit = true,
+                .pending = true, // Set this transfer to be two-phase.
                 .linked = true, // Link this transfer with the next transfer 1003.
             },
-            .amount = 1,
+            .amount = 500,
         },
         Transfer{
             .id = 1003,
             .debit_account_id = 1,
             .credit_account_id = 2,
             .user_data = 0,
-            .reserved = [_]u8{0} ** 32,
+            .reserved = 0,
+            .pending_id = 0,
             .timeout = std.time.ns_per_hour,
-            .code = 0,
+            .ledger = 710,
+            .code = 1,
             .flags = .{
-                .two_phase_commit = true,
+                .pending = true, // Set this transfer to be two-phase.
                 // The last transfer in a linked chain has .linked set to false to close the chain.
                 // This transfer will succeed or fail together with transfer 1002 above.
             },
-            .amount = 1,
+            .amount = 500,
         },
     };

package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig ADDED Viewed

@@ -0,0 +1,37 @@
+const tb = @import("tigerbeetle.zig");
+const demo = @import("demo.zig");
+const Transfer = tb.Transfer;
+pub fn main() !void {
+    const commits = [_]Transfer{
+        Transfer{
+            .id = 2001,
+            .debit_account_id = 1,
+            .credit_account_id = 2,
+            .user_data = 0,
+            .reserved = 0,
+            .pending_id = 1001,
+            .timeout = 0,
+            .ledger = 0,// Honor original Transfer ledger.
+            .code = 0,// Honor original Transfer code.
+            .flags = .{ .post_pending_transfer = true }, // Post the pending two-phase transfer.
+            .amount = 0, // Inherit the amount from the pending transfer.
+        },
+        Transfer{
+            .id = 2002,
+            .debit_account_id = 1,
+            .credit_account_id = 2,
+            .user_data = 0,
+            .reserved = 0,
+            .pending_id = 1002,
+            .timeout = 0,
+            .ledger = 0,
+            .code = 0,
+            .flags = .{ .post_pending_transfer = true }, // Post the pending two-phase transfer.
+            .amount = 0, // Inherit the amount from the pending transfer.
+        },
+    };
+    try demo.request(.create_transfers, commits, demo.on_create_transfers);
+}

package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig ADDED Viewed

@@ -0,0 +1,24 @@
+const tb = @import("tigerbeetle.zig");
+const demo = @import("demo.zig");
+const Transfer = tb.Transfer;
+pub fn main() !void {
+    const commits = [_]Transfer{
+        Transfer{
+            .id = 2003,
+            .debit_account_id = 1,
+            .credit_account_id = 2,
+            .user_data = 0,
+            .reserved = 0,
+            .pending_id = 1003,
+            .timeout = 0,
+            .ledger = 0,
+            .code = 0,
+            .flags = .{ .void_pending_transfer = true },
+            .amount = 0,
+        },
+    };
+    try demo.request(.create_transfers, commits, demo.on_create_transfers);
+}

package/src/tigerbeetle/src/demo_07_lookup_transfers.zig CHANGED Viewed

@@ -1,7 +1,7 @@
 const demo = @import("demo.zig");
 pub fn main() !void {
-    const ids = [_]u128{ 1000, 1001, 1002 };
+    const ids = [_]u128{ 1, 1001, 1002, 1003, 2001, 2002, 2003 };
     try demo.request(.lookup_transfers, ids, demo.on_lookup_transfers);
 }

package/src/tigerbeetle/src/io/linux.zig CHANGED Viewed

@@ -986,7 +986,7 @@ pub const IO = struct {
     /// Detects whether the underlying file system for a given directory fd supports Direct I/O.
     /// Not all Linux file systems support `O_DIRECT`, e.g. a shared macOS volume.
     fn fs_supports_direct_io(dir_fd: std.os.fd_t) !bool {
-        if (!@hasDecl(std.os, "O_DIRECT")) return false;
+        if (!@hasDecl(std.os.O, "DIRECT")) return false;
         const path = "fs_supports_direct_io";
         const dir = std.fs.Dir{ .fd = dir_fd };
@@ -997,12 +997,12 @@ pub const IO = struct {
         while (true) {
             const res = os.system.openat(dir_fd, path, os.O.CLOEXEC | os.O.RDONLY | os.O.DIRECT, 0);
             switch (os.linux.getErrno(res)) {
-                0 => {
+                .SUCCESS => {
                     os.close(@intCast(os.fd_t, res));
                     return true;
                 },
-                os.linux.EINTR => continue,
-                os.linux.EINVAL => return false,
+                .INTR => continue,
+                .INVAL => return false,
                 else => |err| return os.unexpectedErrno(err),
             }
         }

package/src/tigerbeetle/src/main.zig CHANGED Viewed

@@ -53,12 +53,28 @@ fn init(io: *IO, cluster: u32, replica: u8, dir_fd: os.fd_t) !void {
     assert(filename.len == filename_len);
     // TODO Expose data file size on the CLI.
-    _ = try io.open_file(
+    const fd = try io.open_file(
         dir_fd,
         filename,
-        config.journal_size_max, // TODO Double-check that we have space for redundant headers.
+        config.journal_size_max,
         true,
     );
+    std.os.close(fd);
+    const file = try (std.fs.Dir{ .fd = dir_fd }).openFile(filename, .{ .write = true });
+    defer file.close();
+    {
+        const write_size_max = 4 * 1024 * 1024;
+        var write: [write_size_max]u8 = undefined;
+        var offset: u64 = 0;
+        while (true) {
+            const write_size = vsr.format_journal(cluster, offset, &write);
+            if (write_size == 0) break;
+            try file.writeAll(write[0..write_size]);
+            offset += write_size;
+        }
+    }
     log.info("initialized data file", .{});
 }
@@ -91,7 +107,7 @@ fn start(
         allocator,
         config.accounts_max,
         config.transfers_max,
-        config.commits_max,
+        config.transfers_pending_max,
     );
     var storage = try Storage.init(config.journal_size_max, storage_fd, io);
     var message_bus = try MessageBus.init(

package/src/tigerbeetle/src/message_pool.zig CHANGED Viewed

@@ -25,8 +25,11 @@ pub const messages_max_replica = messages_max: {
     sum += config.io_depth_read + config.io_depth_write; // Journal I/O
     sum += config.clients_max; // Replica.client_table
     sum += 1; // Replica.loopback_queue
-    sum += config.pipelining_max; // Replica.pipeline
-    sum += config.replicas_max; // Replica.do_view_change_from_all_replicas quorum (all others are bitsets)
+    sum += config.pipeline_max; // Replica.pipeline
+    // Replica.do_view_change_from_all_replicas quorum:
+    // Replica.recovery_response_quorum is only used for recovery and does not increase the limit.
+    // All other quorums are bitsets.
+    sum += config.replicas_max;
     sum += config.connections_max; // Connection.recv_message
     sum += config.connections_max * config.connection_send_queue_max_replica; // Connection.send_queue
     sum += 1; // Handle bursts (e.g. Connection.parse_message)

package/src/tigerbeetle/src/ring_buffer.zig CHANGED Viewed

@@ -1,12 +1,12 @@
 const std = @import("std");
 const assert = std.debug.assert;
-/// A First In, First Out ring buffer holding at most `size` elements.
-pub fn RingBuffer(comptime T: type, comptime size: usize) type {
+/// A First In, First Out ring buffer holding at most `count_max` elements.
+pub fn RingBuffer(comptime T: type, comptime count_max: usize) type {
     return struct {
         const Self = @This();
-        buffer: [size]T = undefined,
+        buffer: [count_max]T = undefined,
         /// The index of the slot with the first item, if any.
         index: usize = 0,
@@ -35,6 +35,15 @@ pub fn RingBuffer(comptime T: type, comptime size: usize) type {
             return &self.buffer[(self.index + self.count - 1) % self.buffer.len];
         }
+        pub inline fn get_ptr(self: *Self, index: usize) ?*T {
+            if (index < self.count) {
+                return &self.buffer[(self.index + index) % self.buffer.len];
+            } else {
+                assert(index < count_max);
+                return null;
+            }
+        }
         pub inline fn next_tail(self: Self) ?T {
             if (self.full()) return null;
             return self.buffer[(self.index + self.count) % self.buffer.len];
@@ -56,6 +65,10 @@ pub fn RingBuffer(comptime T: type, comptime size: usize) type {
             self.count += 1;
         }
+        pub inline fn retreat_tail(self: *Self) void {
+            self.count -= 1;
+        }
         /// Returns whether the ring buffer is completely full.
         pub inline fn full(self: Self) bool {
             return self.count == self.buffer.len;
@@ -90,6 +103,13 @@ pub fn RingBuffer(comptime T: type, comptime size: usize) type {
             return result;
         }
+        /// Remove and return the last item, if any.
+        pub fn pop_tail(self: *Self) ?T {
+            const result = self.tail() orelse return null;
+            self.retreat_tail();
+            return result;
+        }
         pub const Iterator = struct {
             ring: *Self,
             count: usize = 0,
@@ -210,15 +230,21 @@ test "RingBuffer: push/pop high level interface" {
     try testing.expect(!fifo.full());
     try testing.expect(fifo.empty());
+    try testing.expectEqual(@as(?*u32, null), fifo.get_ptr(0));
+    try testing.expectEqual(@as(?*u32, null), fifo.get_ptr(1));
+    try testing.expectEqual(@as(?*u32, null), fifo.get_ptr(2));
     try fifo.push(1);
     try testing.expectEqual(@as(?u32, 1), fifo.head());
+    try testing.expectEqual(@as(u32, 1), fifo.get_ptr(0).?.*);
+    try testing.expectEqual(@as(?*u32, null), fifo.get_ptr(1));
     try testing.expect(!fifo.full());
     try testing.expect(!fifo.empty());
     try fifo.push(2);
     try testing.expectEqual(@as(?u32, 1), fifo.head());
+    try testing.expectEqual(@as(u32, 2), fifo.get_ptr(1).?.*);
     try fifo.push(3);
     try testing.expectError(error.NoSpaceLeft, fifo.push(4));
@@ -228,6 +254,9 @@ test "RingBuffer: push/pop high level interface" {
     try testing.expectEqual(@as(?u32, 1), fifo.head());
     try testing.expectEqual(@as(?u32, 1), fifo.pop());
+    try testing.expectEqual(@as(u32, 2), fifo.get_ptr(0).?.*);
+    try testing.expectEqual(@as(u32, 3), fifo.get_ptr(1).?.*);
+    try testing.expectEqual(@as(?*u32, null), fifo.get_ptr(2));
     try testing.expect(!fifo.full());
     try testing.expect(!fifo.empty());
@@ -242,3 +271,19 @@ test "RingBuffer: push/pop high level interface" {
     try testing.expect(!fifo.full());
     try testing.expect(fifo.empty());
 }
+test "RingBuffer: pop_tail" {
+    var lifo = RingBuffer(u32, 3){};
+    try lifo.push(1);
+    try lifo.push(2);
+    try lifo.push(3);
+    try testing.expect(lifo.full());
+    try testing.expectEqual(@as(?u32, 3), lifo.pop_tail());
+    try testing.expectEqual(@as(?u32, 1), lifo.head());
+    try testing.expectEqual(@as(?u32, 2), lifo.pop_tail());
+    try testing.expectEqual(@as(?u32, 1), lifo.head());
+    try testing.expectEqual(@as(?u32, 1), lifo.pop_tail());
+    try testing.expectEqual(@as(?u32, null), lifo.pop_tail());
+    try testing.expect(lifo.empty());
+}

package/src/tigerbeetle/src/simulator.zig CHANGED Viewed

@@ -20,6 +20,8 @@ const output = std.log.scoped(.state_checker);
 /// This will run much slower but will trace all logic across the cluster.
 const log_state_transitions_only = builtin.mode != .Debug;
+const log_health = std.log.scoped(.health);
 /// You can fine tune your log levels even further (debug/info/notice/warn/err/crit/alert/emerg):
 pub const log_level: std.log.Level = if (log_state_transitions_only) .info else .debug;
@@ -64,7 +66,6 @@ pub fn main() !void {
     const node_count = replica_count + client_count;
     const ticks_max = 100_000_000;
-    const transitions_max = config.journal_size_max / config.message_size_max;
     const request_probability = 1 + random.uintLessThan(u8, 99);
     const idle_on_probability = random.uintLessThan(u8, 20);
     const idle_off_probability = 10 + random.uintLessThan(u8, 10);
@@ -101,10 +102,16 @@ pub fn main() !void {
             .read_latency_min = random.uintLessThan(u16, 3),
             .read_latency_mean = 3 + random.uintLessThan(u16, 10),
             .write_latency_min = random.uintLessThan(u16, 3),
-            .write_latency_mean = 3 + random.uintLessThan(u16, 10),
+            .write_latency_mean = 3 + random.uintLessThan(u16, 100),
             .read_fault_probability = random.uintLessThan(u8, 10),
             .write_fault_probability = random.uintLessThan(u8, 10),
         },
+        .health_options = .{
+            .crash_probability = 0.0001,
+            .crash_stability = random.uintLessThan(u32, 1_000),
+            .restart_probability = 0.01,
+            .restart_stability = random.uintLessThan(u32, 1_000),
+        },
     });
     defer cluster.destroy();
@@ -143,6 +150,10 @@ pub fn main() !void {
         \\          write_latency_mean={}
         \\          read_fault_probability={}%
         \\          write_fault_probability={}%
+        \\          crash_probability={d}%
+        \\          crash_stability={} ticks
+        \\          restart_probability={d}%
+        \\          restart_stability={} ticks
         \\
     , .{
         seed,
@@ -169,26 +180,105 @@ pub fn main() !void {
         cluster.options.storage_options.write_latency_mean,
         cluster.options.storage_options.read_fault_probability,
         cluster.options.storage_options.write_fault_probability,
+        cluster.options.health_options.crash_probability * 100,
+        cluster.options.health_options.crash_stability,
+        cluster.options.health_options.restart_probability * 100,
+        cluster.options.health_options.restart_stability,
     });
     var requests_sent: u64 = 0;
     var idle = false;
+    // The minimum number of healthy replicas required for a crashed replica to be able to recover.
+    const replica_normal_min = replicas: {
+        if (replica_count == 1) {
+            // A cluster of 1 can crash safely (as long as there is no disk corruption) since it
+            // does not run the recovery protocol.
+            break :replicas 0;
+        } else {
+            break :replicas cluster.replicas[0].quorum_view_change;
+        }
+    };
+    // Disable most faults at startup, so that the replicas don't get stuck in recovery mode.
+    for (cluster.storages) |*storage, i| {
+        storage.faulty = replica_normal_min <= i;
+    }
+    // TODO When storage is supported, run more transitions than fit in the journal.
+    const transitions_max = config.journal_slot_count / 2;
     var tick: u64 = 0;
     while (tick < ticks_max) : (tick += 1) {
-        for (cluster.storages) |*storage| storage.tick();
+        const health_options = &cluster.options.health_options;
+        // The maximum number of replicas that can crash, with the cluster still able to recover.
+        var crashes = cluster.replica_normal_count() -| replica_normal_min;
+        for (cluster.storages) |*storage, replica| {
+            if (cluster.replicas[replica].journal.recovered) {
+                // TODO Remove this workaround when VSR recovery protocol is disabled.
+                // When only the minimum number of replicas are healthy (no more crashes allowed),
+                // disable storage faults on all healthy replicas.
+                //
+                // This is a workaround to avoid the deadlock that occurs when (for example) in a
+                // cluster of 3 replicas, one is down, another has a corrupt prepare, and the last does
+                // not have the prepare. The two healthy replicas can never complete a view change,
+                // because two replicas are not enough to nack, and the unhealthy replica cannot
+                // complete the VSR recovery protocol either.
+                if (cluster.health[replica] == .up and crashes == 0) {
+                    storage.faulty = false;
+                } else {
+                    // When a journal recovers for the first time, enable its storage faults.
+                    // Future crashes will recover in the presence of faults.
+                    storage.faulty = true;
+                }
+            }
+            storage.tick();
+        }
-        for (cluster.replicas) |*replica, i| {
-            replica.tick();
-            cluster.state_checker.check_state(@intCast(u8, i));
+        for (cluster.replicas) |*replica| {
+            switch (cluster.health[replica.replica]) {
+                .up => |*ticks| {
+                    ticks.* -|= 1;
+                    replica.tick();
+                    cluster.state_checker.check_state(replica.replica);
+                    if (ticks.* != 0) continue;
+                    if (crashes == 0) continue;
+                    if (cluster.storages[replica.replica].writes.count() == 0) {
+                        if (!chance_f64(random, health_options.crash_probability)) continue;
+                    } else {
+                        if (!chance_f64(random, health_options.crash_probability * 10.0)) continue;
+                    }
+                    if (!try cluster.crash_replica(replica.replica)) continue;
+                    log_health.debug("crash replica={}", .{replica.replica});
+                    crashes -= 1;
+                },
+                .down => |*ticks| {
+                    ticks.* -|= 1;
+                    // Keep ticking the time so that it won't have diverged too far to synchronize
+                    // when the replica restarts.
+                    replica.clock.time.tick();
+                    assert(replica.status == .recovering);
+                    if (ticks.* == 0 and chance_f64(random, health_options.restart_probability)) {
+                        cluster.health[replica.replica] = .{ .up = health_options.restart_stability };
+                        log_health.debug("restart replica={}", .{replica.replica});
+                    }
+                },
+            }
         }
-        cluster.network.packet_simulator.tick();
+        cluster.network.packet_simulator.tick(cluster.health);
         for (cluster.clients) |*client| client.tick();
         if (cluster.state_checker.transitions == transitions_max) {
-            if (cluster.state_checker.convergence()) break;
+            if (cluster.state_checker.convergence() and
+                cluster.replica_up_count() == replica_count)
+            {
+                break;
+            }
             continue;
         } else {
             assert(cluster.state_checker.transitions < transitions_max);
@@ -222,6 +312,12 @@ fn chance(random: std.rand.Random, p: u8) bool {
     return random.uintLessThan(u8, 100) < p;
 }
+/// Returns true, `p` percent of the time, else false.
+fn chance_f64(random: std.rand.Random, p: f64) bool {
+    assert(p <= 100.0);
+    return random.float(f64) < p;
+}
 /// Returns the next argument for the simulator or null (if none available)
 fn args_next(args: *std.process.ArgIterator, allocator: std.mem.Allocator) ?[:0]const u8 {
     const err_or_bytes = args.next(allocator) orelse return null;