npm - tigerbeetle-node - Versions diffs - 0.11.8 → 0.11.10 - Mend

tigerbeetle-node 0.11.8 → 0.11.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/dist/.client.node.sha256 +1 -1
package/package.json +4 -3
package/scripts/build_lib.sh +41 -0
package/src/node.zig +1 -1
package/src/tigerbeetle/scripts/validate_docs.sh +7 -1
package/src/tigerbeetle/src/benchmark.zig +3 -3
package/src/tigerbeetle/src/config.zig +31 -16
package/src/tigerbeetle/src/constants.zig +48 -9
package/src/tigerbeetle/src/ewah.zig +5 -5
package/src/tigerbeetle/src/ewah_fuzz.zig +1 -1
package/src/tigerbeetle/src/lsm/binary_search.zig +1 -1
package/src/tigerbeetle/src/lsm/bloom_filter.zig +1 -1
package/src/tigerbeetle/src/lsm/compaction.zig +34 -21
package/src/tigerbeetle/src/lsm/forest_fuzz.zig +84 -104
package/src/tigerbeetle/src/lsm/grid.zig +19 -13
package/src/tigerbeetle/src/lsm/manifest_log.zig +8 -10
package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +18 -13
package/src/tigerbeetle/src/lsm/merge_iterator.zig +1 -1
package/src/tigerbeetle/src/lsm/segmented_array.zig +17 -17
package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +1 -1
package/src/tigerbeetle/src/lsm/set_associative_cache.zig +1 -1
package/src/tigerbeetle/src/lsm/table.zig +8 -20
package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
package/src/tigerbeetle/src/lsm/table_iterator.zig +3 -3
package/src/tigerbeetle/src/lsm/table_mutable.zig +14 -2
package/src/tigerbeetle/src/lsm/test.zig +5 -4
package/src/tigerbeetle/src/lsm/tree.zig +1 -2
package/src/tigerbeetle/src/lsm/tree_fuzz.zig +85 -115
package/src/tigerbeetle/src/message_bus.zig +4 -4
package/src/tigerbeetle/src/message_pool.zig +7 -10
package/src/tigerbeetle/src/ring_buffer.zig +22 -12
package/src/tigerbeetle/src/simulator.zig +366 -239
package/src/tigerbeetle/src/state_machine/auditor.zig +5 -5
package/src/tigerbeetle/src/state_machine/workload.zig +3 -3
package/src/tigerbeetle/src/state_machine.zig +190 -178
package/src/tigerbeetle/src/{util.zig → stdx.zig} +2 -0
package/src/tigerbeetle/src/storage.zig +13 -6
package/src/tigerbeetle/src/{test → testing/cluster}/message_bus.zig +3 -3
package/src/tigerbeetle/src/{test → testing/cluster}/network.zig +46 -22
package/src/tigerbeetle/src/testing/cluster/state_checker.zig +169 -0
package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +202 -0
package/src/tigerbeetle/src/testing/cluster.zig +443 -0
package/src/tigerbeetle/src/{test → testing}/fuzz.zig +0 -0
package/src/tigerbeetle/src/testing/hash_log.zig +66 -0
package/src/tigerbeetle/src/{test → testing}/id.zig +0 -0
package/src/tigerbeetle/src/testing/packet_simulator.zig +365 -0
package/src/tigerbeetle/src/{test → testing}/priority_queue.zig +1 -1
package/src/tigerbeetle/src/testing/reply_sequence.zig +139 -0
package/src/tigerbeetle/src/{test → testing}/state_machine.zig +3 -1
package/src/tigerbeetle/src/testing/storage.zig +757 -0
package/src/tigerbeetle/src/{test → testing}/table.zig +21 -0
package/src/tigerbeetle/src/{test → testing}/time.zig +0 -0
package/src/tigerbeetle/src/tigerbeetle.zig +2 -0
package/src/tigerbeetle/src/tracer.zig +3 -3
package/src/tigerbeetle/src/unit_tests.zig +4 -4
package/src/tigerbeetle/src/vopr.zig +2 -2
package/src/tigerbeetle/src/vsr/client.zig +5 -2
package/src/tigerbeetle/src/vsr/clock.zig +93 -53
package/src/tigerbeetle/src/vsr/journal.zig +109 -98
package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +2 -2
package/src/tigerbeetle/src/vsr/replica.zig +1983 -1430
package/src/tigerbeetle/src/vsr/replica_format.zig +13 -13
package/src/tigerbeetle/src/vsr/superblock.zig +240 -142
package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -7
package/src/tigerbeetle/src/vsr/superblock_free_set.zig +1 -1
package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +1 -1
package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +49 -14
package/src/tigerbeetle/src/vsr/superblock_manifest.zig +38 -19
package/src/tigerbeetle/src/vsr/superblock_quorums.zig +48 -48
package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +51 -51
package/src/tigerbeetle/src/vsr.zig +99 -33
package/src/tigerbeetle/src/demo.zig +0 -132
package/src/tigerbeetle/src/demo_01_create_accounts.zig +0 -35
package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +0 -7
package/src/tigerbeetle/src/demo_03_create_transfers.zig +0 -37
package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +0 -61
package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +0 -37
package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +0 -24
package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +0 -7
package/src/tigerbeetle/src/test/cluster.zig +0 -352
package/src/tigerbeetle/src/test/conductor.zig +0 -366
package/src/tigerbeetle/src/test/packet_simulator.zig +0 -398
package/src/tigerbeetle/src/test/state_checker.zig +0 -169
package/src/tigerbeetle/src/test/storage.zig +0 -864
package/src/tigerbeetle/src/test/storage_checker.zig +0 -204

package/src/tigerbeetle/src/simulator.zig CHANGED Viewed

@@ -8,17 +8,21 @@ const constants = @import("constants.zig");
 const vsr = @import("vsr.zig");
 const Header = vsr.Header;
-const Client = @import("test/cluster.zig").Client;
-const Cluster = @import("test/cluster.zig").Cluster;
-const ClusterOptions = @import("test/cluster.zig").ClusterOptions;
-const Replica = @import("test/cluster.zig").Replica;
-const StateMachine = @import("test/cluster.zig").StateMachine;
-const StateChecker = @import("test/state_checker.zig").StateChecker;
-const StorageChecker = @import("test/storage_checker.zig").StorageChecker;
-const PartitionMode = @import("test/packet_simulator.zig").PartitionMode;
-const MessageBus = @import("test/message_bus.zig").MessageBus;
-const Conductor = @import("test/conductor.zig").ConductorType(Client, MessageBus, StateMachine);
-const IdPermutation = @import("test/id.zig").IdPermutation;
+const state_machine = @import("vsr_simulator_options").state_machine;
+const StateMachineType = switch (state_machine) {
+    .accounting => @import("state_machine.zig").StateMachineType,
+    .testing => @import("testing/state_machine.zig").StateMachineType,
+};
+const Client = @import("testing/cluster.zig").Client;
+const Cluster = @import("testing/cluster.zig").ClusterType(StateMachineType);
+const Replica = @import("testing/cluster.zig").Replica;
+const StateMachine = Cluster.StateMachine;
+const Failure = @import("testing/cluster.zig").Failure;
+const PartitionMode = @import("testing/packet_simulator.zig").PartitionMode;
+const ReplySequence = @import("testing/reply_sequence.zig").ReplySequence;
+const IdPermutation = @import("testing/id.zig").IdPermutation;
+const Message = @import("message_pool.zig").MessagePool.Message;
 /// The `log` namespace in this root file is required to implement our custom `log` function.
 const output = std.log.scoped(.state_checker);
@@ -36,10 +40,6 @@ pub const log_level: std.log.Level = if (log_state_transitions_only) .info else
 const cluster_id = 0;
-var cluster: *Cluster = undefined;
-var state_checker: *StateChecker = undefined;
-var storage_checker: *StorageChecker = undefined;
 pub fn main() !void {
     // This must be initialized at runtime as stderr is not comptime known on e.g. Windows.
     log_buffer.unbuffered_writer = std.io.getStdErr().writer();
@@ -79,51 +79,35 @@ pub fn main() !void {
     const replica_count = 1 + random.uintLessThan(u8, constants.replicas_max);
     const client_count = 1 + random.uintLessThan(u8, constants.clients_max);
-    const node_count = replica_count + client_count;
-    const ticks_max = 50_000_000;
-    const request_probability = 1 + random.uintLessThan(u8, 99);
-    const idle_on_probability = random.uintLessThan(u8, 20);
-    const idle_off_probability = 10 + random.uintLessThan(u8, 10);
-    // The maximum number of transitions from calling `client.request()`, not including
-    // `register` messages.
-    const requests_committed_max: usize = constants.journal_slot_count * 3;
-    const cluster_options: ClusterOptions = .{
-        .cluster = cluster_id,
+    const cluster_options = Cluster.Options{
+        .cluster_id = cluster_id,
         .replica_count = replica_count,
         .client_count = client_count,
         .storage_size_limit = vsr.sector_floor(
             constants.storage_size_max - random.uintLessThan(u64, constants.storage_size_max / 10),
         ),
         .seed = random.int(u64),
-        .on_change_state = on_replica_change_state,
-        .on_compact = on_replica_compact,
-        .on_checkpoint = on_replica_checkpoint,
-        .network_options = .{
-            .packet_simulator_options = .{
-                .replica_count = replica_count,
-                .client_count = client_count,
-                .node_count = node_count,
-                .seed = random.int(u64),
-                .one_way_delay_mean = 3 + random.uintLessThan(u16, 10),
-                .one_way_delay_min = random.uintLessThan(u16, 3),
-                .packet_loss_probability = random.uintLessThan(u8, 30),
-                .path_maximum_capacity = 2 + random.uintLessThan(u8, 19),
-                .path_clog_duration_mean = random.uintLessThan(u16, 500),
-                .path_clog_probability = random.uintLessThan(u8, 2),
-                .packet_replay_probability = random.uintLessThan(u8, 50),
-                .partition_mode = random_partition_mode(random),
-                .partition_probability = random.uintLessThan(u8, 3),
-                .unpartition_probability = 1 + random.uintLessThan(u8, 10),
-                .partition_stability = 100 + random.uintLessThan(u32, 100),
-                .unpartition_stability = random.uintLessThan(u32, 20),
-            },
+        .network = .{
+            .replica_count = replica_count,
+            .client_count = client_count,
+            .seed = random.int(u64),
+            .one_way_delay_mean = 3 + random.uintLessThan(u16, 10),
+            .one_way_delay_min = random.uintLessThan(u16, 3),
+            .packet_loss_probability = random.uintLessThan(u8, 30),
+            .path_maximum_capacity = 2 + random.uintLessThan(u8, 19),
+            .path_clog_duration_mean = random.uintLessThan(u16, 500),
+            .path_clog_probability = random.uintLessThan(u8, 2),
+            .packet_replay_probability = random.uintLessThan(u8, 50),
+            .partition_mode = random_partition_mode(random),
+            .partition_probability = random.uintLessThan(u8, 3),
+            .unpartition_probability = 1 + random.uintLessThan(u8, 10),
+            .partition_stability = 100 + random.uintLessThan(u32, 100),
+            .unpartition_stability = random.uintLessThan(u32, 20),
         },
-        .storage_options = .{
+        .storage = .{
             .seed = random.int(u64),
             .read_latency_min = random.uintLessThan(u16, 3),
             .read_latency_mean = 3 + random.uintLessThan(u16, 10),
@@ -132,16 +116,13 @@ pub fn main() !void {
             .read_fault_probability = random.uintLessThan(u8, 10),
             .write_fault_probability = random.uintLessThan(u8, 10),
             .crash_fault_probability = 80 + random.uintLessThan(u8, 21),
-            .faulty_superblock = true,
         },
-        .health_options = .{
-            .crash_probability = 0.000001,
-            .crash_stability = random.uintLessThan(u32, 1_000),
-            .restart_probability = 0.0001,
-            .restart_stability = random.uintLessThan(u32, 1_000),
+        .storage_fault_atlas = .{
+            .faulty_superblock = true,
+            .faulty_wal_headers = replica_count > 1,
+            .faulty_wal_prepares = replica_count > 1,
         },
-        // TODO(dj) SimulatorType(StateMachine).init(state_machine_options)
-        .state_machine_options = switch (constants.state_machine) {
+        .state_machine = switch (state_machine) {
             .testing => .{},
             .accounting => .{
                 .lsm_forest_node_count = 4096,
@@ -152,6 +133,26 @@ pub fn main() !void {
         },
     };
+    const workload_options = StateMachine.Workload.Options.generate(random, .{
+        .client_count = client_count,
+        // TODO(DJ) Once Workload no longer needs in_flight_max, make stalled_queue_capacity private.
+        // Also maybe make it dynamic (computed from the client_count instead of clients_max).
+        .in_flight_max = ReplySequence.stalled_queue_capacity,
+    });
+    const simulator_options = Simulator.Options{
+        .cluster = cluster_options,
+        .workload = workload_options,
+        .replica_crash_probability = 0.00002,
+        .replica_crash_stability = random.uintLessThan(u32, 1_000),
+        .replica_restart_probability = 0.0001,
+        .replica_restart_stability = random.uintLessThan(u32, 1_000),
+        .requests_max = constants.journal_slot_count * 3,
+        .request_probability = 1 + random.uintLessThan(u8, 99),
+        .request_idle_on_probability = random.uintLessThan(u8, 20),
+        .request_idle_off_probability = 10 + random.uintLessThan(u8, 10),
+    };
     output.info(
         \\
         \\          SEED={}
@@ -185,190 +186,335 @@ pub fn main() !void {
         \\          restart_stability={} ticks
     , .{
         seed,
-        replica_count,
-        client_count,
-        request_probability,
-        idle_on_probability,
-        idle_off_probability,
-        cluster_options.network_options.packet_simulator_options.one_way_delay_mean,
-        cluster_options.network_options.packet_simulator_options.one_way_delay_min,
-        cluster_options.network_options.packet_simulator_options.packet_loss_probability,
-        cluster_options.network_options.packet_simulator_options.path_maximum_capacity,
-        cluster_options.network_options.packet_simulator_options.path_clog_duration_mean,
-        cluster_options.network_options.packet_simulator_options.path_clog_probability,
-        cluster_options.network_options.packet_simulator_options.packet_replay_probability,
-        cluster_options.network_options.packet_simulator_options.partition_mode,
-        cluster_options.network_options.packet_simulator_options.partition_probability,
-        cluster_options.network_options.packet_simulator_options.unpartition_probability,
-        cluster_options.network_options.packet_simulator_options.partition_stability,
-        cluster_options.network_options.packet_simulator_options.unpartition_stability,
-        cluster_options.storage_options.read_latency_min,
-        cluster_options.storage_options.read_latency_mean,
-        cluster_options.storage_options.write_latency_min,
-        cluster_options.storage_options.write_latency_mean,
-        cluster_options.storage_options.read_fault_probability,
-        cluster_options.storage_options.write_fault_probability,
-        cluster_options.health_options.crash_probability * 100,
-        cluster_options.health_options.crash_stability,
-        cluster_options.health_options.restart_probability * 100,
-        cluster_options.health_options.restart_stability,
+        cluster_options.replica_count,
+        cluster_options.client_count,
+        simulator_options.request_probability,
+        simulator_options.request_idle_on_probability,
+        simulator_options.request_idle_off_probability,
+        cluster_options.network.one_way_delay_mean,
+        cluster_options.network.one_way_delay_min,
+        cluster_options.network.packet_loss_probability,
+        cluster_options.network.path_maximum_capacity,
+        cluster_options.network.path_clog_duration_mean,
+        cluster_options.network.path_clog_probability,
+        cluster_options.network.packet_replay_probability,
+        cluster_options.network.partition_mode,
+        cluster_options.network.partition_probability,
+        cluster_options.network.unpartition_probability,
+        cluster_options.network.partition_stability,
+        cluster_options.network.unpartition_stability,
+        cluster_options.storage.read_latency_min,
+        cluster_options.storage.read_latency_mean,
+        cluster_options.storage.write_latency_min,
+        cluster_options.storage.write_latency_mean,
+        cluster_options.storage.read_fault_probability,
+        cluster_options.storage.write_fault_probability,
+        simulator_options.replica_crash_probability * 100,
+        simulator_options.replica_crash_stability,
+        simulator_options.replica_restart_probability * 100,
+        simulator_options.replica_restart_stability,
     });
-    cluster = try Cluster.create(allocator, random, cluster_options);
-    defer cluster.destroy();
+    var simulator = try Simulator.init(allocator, random, simulator_options);
+    defer simulator.deinit(allocator);
-    const workload_options = StateMachine.Workload.Options.generate(random, .{
-        .client_count = client_count,
-        .in_flight_max = Conductor.stalled_queue_capacity,
-    });
+    const ticks_max = 50_000_000;
+    var tick: u64 = 0;
+    while (tick < ticks_max) : (tick += 1) {
+        simulator.tick();
+        if (simulator.done()) break;
+    } else {
+        output.err("you can reproduce this failure with seed={}", .{seed});
+        fatal(.liveness, "unable to complete requests_committed_max before ticks_max", .{});
+    }
+    assert(simulator.done());
-    var workload = try StateMachine.Workload.init(allocator, random, workload_options);
-    defer workload.deinit(allocator);
+    output.info("\n          PASSED ({} ticks)", .{tick});
+}
-    var conductor = try Conductor.init(allocator, random, &workload, .{
-        .cluster = cluster_id,
-        .replica_count = replica_count,
-        .client_count = client_count,
-        .message_bus_options = .{ .network = &cluster.network },
-        .requests_max = requests_committed_max,
-        .request_probability = request_probability,
-        .idle_on_probability = idle_on_probability,
-        .idle_off_probability = idle_off_probability,
-    });
-    defer conductor.deinit(allocator);
+pub const Simulator = struct {
+    pub const Options = struct {
+        cluster: Cluster.Options,
+        workload: StateMachine.Workload.Options,
+        /// Probability per tick that a crash will occur.
+        replica_crash_probability: f64,
+        /// Minimum duration of a crash.
+        replica_crash_stability: u32,
+        /// Probability per tick that a crashed replica will recovery.
+        replica_restart_probability: f64,
+        /// Minimum time a replica is up until it is crashed again.
+        replica_restart_stability: u32,
+        /// The total number of requests to send. Does not count `register` messages.
+        requests_max: usize,
+        request_probability: u8, // percent
+        request_idle_on_probability: u8, // percent
+        request_idle_off_probability: u8, // percent
+    };
-    for (conductor.clients) |*client| {
-        cluster.network.link(client.message_bus.process, &client.message_bus);
+    random: std.rand.Random,
+    options: Options,
+    cluster: *Cluster,
+    workload: StateMachine.Workload,
+    /// Protect a replica from fast successive crash/restarts.
+    replica_stability: []usize,
+    reply_sequence: ReplySequence,
+    /// Total number of requests sent, including those that have not been delivered.
+    /// Does not include `register` messages.
+    requests_sent: usize = 0,
+    requests_idle: bool = false,
+    pub fn init(allocator: std.mem.Allocator, random: std.rand.Random, options: Options) !Simulator {
+        assert(options.replica_crash_probability < 1.0);
+        assert(options.replica_crash_probability >= 0.0);
+        assert(options.replica_restart_probability < 1.0);
+        assert(options.replica_restart_probability >= 0.0);
+        assert(options.requests_max > 0);
+        assert(options.request_probability > 0);
+        assert(options.request_probability <= 100);
+        assert(options.request_idle_on_probability <= 100);
+        assert(options.request_idle_off_probability > 0);
+        assert(options.request_idle_off_probability <= 100);
+        var cluster = try Cluster.init(allocator, on_cluster_reply, options.cluster);
+        errdefer cluster.deinit();
+        var workload = try StateMachine.Workload.init(allocator, random, options.workload);
+        errdefer workload.deinit(allocator);
+        var replica_stability = try allocator.alloc(usize, options.cluster.replica_count);
+        errdefer allocator.free(replica_stability);
+        std.mem.set(usize, replica_stability, 0);
+        var reply_sequence = try ReplySequence.init(allocator);
+        errdefer reply_sequence.deinit(allocator);
+        return Simulator{
+            .random = random,
+            .options = options,
+            .cluster = cluster,
+            .workload = workload,
+            .replica_stability = replica_stability,
+            .reply_sequence = reply_sequence,
+        };
     }
-    state_checker = try allocator.create(StateChecker);
-    defer allocator.destroy(state_checker);
-    state_checker.* = try StateChecker.init(
-        allocator,
-        cluster_id,
-        cluster.replicas,
-        conductor.clients,
-    );
-    defer state_checker.deinit();
-    storage_checker = try allocator.create(StorageChecker);
-    defer allocator.destroy(storage_checker);
-    storage_checker.* = StorageChecker.init(allocator);
-    defer storage_checker.deinit();
-    // The minimum number of healthy replicas required for a crashed replica to be able to recover.
-    const replica_normal_min = replicas: {
-        if (replica_count == 1) {
-            // A cluster of 1 can crash safely (as long as there is no disk corruption) since it
-            // does not run the recovery protocol.
-            break :replicas 0;
-        } else {
-            break :replicas cluster.replicas[0].quorum_view_change;
+    pub fn deinit(simulator: *Simulator, allocator: std.mem.Allocator) void {
+        allocator.free(simulator.replica_stability);
+        simulator.reply_sequence.deinit(allocator);
+        simulator.workload.deinit(allocator);
+        simulator.cluster.deinit();
+    }
+    pub fn done(simulator: *Simulator) bool {
+        assert(simulator.requests_sent <= simulator.options.requests_max);
+        if (!simulator.cluster.state_checker.convergence()) return false;
+        if (!simulator.reply_sequence.empty()) return false;
+        if (simulator.requests_sent < simulator.options.requests_max) return false;
+        for (simulator.cluster.replica_health) |health| {
+            if (health == .down) return false;
         }
-    };
-    var tick: u64 = 0;
-    while (tick < ticks_max) : (tick += 1) {
-        const health_options = &cluster.options.health_options;
-        // The maximum number of replicas that can crash, with the cluster still able to recover.
-        var crashes = cluster.replica_normal_count() -| replica_normal_min;
-        for (cluster.storages) |*storage, replica| {
-            if (cluster.replicas[replica].journal.status == .recovered) {
-                // TODO Remove this workaround when VSR recovery protocol is disabled.
-                // When only the minimum number of replicas are healthy (no more crashes allowed),
-                // disable storage faults on all healthy replicas.
-                //
-                // This is a workaround to avoid the deadlock that occurs when (for example) in a
-                // cluster of 3 replicas, one is down, another has a corrupt prepare, and the last does
-                // not have the prepare. The two healthy replicas can never complete a view change,
-                // because two replicas are not enough to nack, and the unhealthy replica cannot
-                // complete the VSR recovery protocol either.
-                if (cluster.health[replica] == .up and crashes == 0) {
-                    if (storage.faulty) {
-                        log_simulator.debug("{}: disable storage faults", .{replica});
-                        storage.faulty = false;
-                    }
-                } else {
-                    // When a journal recovers for the first time, enable its storage faults.
-                    // Future crashes will recover in the presence of faults.
-                    if (!storage.faulty) {
-                        log_simulator.debug("{}: enable storage faults", .{replica});
-                        storage.faulty = true;
-                    }
-                }
-            }
+        for (simulator.cluster.clients) |*client| {
+            if (client.request_queue.count > 0) return false;
         }
+        return true;
+    }
-        for (cluster.replicas) |*replica, index| {
-            switch (cluster.health[replica.replica]) {
-                .up => |*ticks| {
-                    ticks.* -|= 1;
-                    replica.tick();
-                    cluster.storages[index].tick();
-                    state_checker.check_state(replica.replica) catch |err| {
-                        fatal(.correctness, "state checker error: {}", .{err});
-                    };
-                    if (ticks.* != 0) continue;
-                    if (crashes == 0) continue;
-                    if (cluster.storages[replica.replica].writes.count() == 0) {
-                        if (!chance_f64(random, health_options.crash_probability)) continue;
-                    } else {
-                        if (!chance_f64(random, health_options.crash_probability * 10.0)) continue;
-                    }
+    pub fn tick(simulator: *Simulator) void {
+        // TODO(Zig): Remove (see on_cluster_reply()).
+        simulator.cluster.context = simulator;
-                    if (!try cluster.crash_replica(replica.replica)) continue;
-                    log_simulator.debug("{}: crash replica", .{replica.replica});
-                    crashes -= 1;
-                },
-                .down => |*ticks| {
-                    ticks.* -|= 1;
-                    // Keep ticking the time so that it won't have diverged too far to synchronize
-                    // when the replica restarts.
-                    replica.clock.time.tick();
-                    assert(replica.status == .recovering);
-                    if (ticks.* == 0 and chance_f64(random, health_options.restart_probability)) {
-                        cluster.health[replica.replica] = .{ .up = health_options.restart_stability };
-                        log_simulator.debug("{}: restart replica", .{replica.replica});
-                    }
-                },
+        simulator.cluster.tick();
+        simulator.tick_requests();
+        simulator.tick_crash();
+    }
+    fn on_cluster_reply(
+        cluster: *Cluster,
+        reply_client: usize,
+        request: *Message,
+        reply: *Message,
+    ) void {
+        // TODO(Zig) Use @returnAddress to initialzie the cluster, then this can just use @fieldParentPtr().
+        const simulator = @ptrCast(*Simulator, @alignCast(@alignOf(Simulator), cluster.context.?));
+        simulator.reply_sequence.insert(reply_client, request, reply);
+        while (simulator.reply_sequence.peek()) |commit| {
+            defer simulator.reply_sequence.next();
+            const commit_client = simulator.cluster.clients[commit.client_index];
+            assert(commit.reply.references == 1);
+            assert(commit.reply.header.command == .reply);
+            assert(commit.reply.header.client == commit_client.id);
+            assert(commit.reply.header.request == commit.request.header.request);
+            assert(commit.reply.header.operation == commit.request.header.operation);
+            assert(commit.request.references == 1);
+            assert(commit.request.header.command == .request);
+            assert(commit.request.header.client == commit_client.id);
+            log_simulator.debug("consume_stalled_replies: op={} operation={} client={} request={}", .{
+                commit.reply.header.op,
+                commit.reply.header.operation,
+                commit.request.header.client,
+                commit.request.header.request,
+            });
+            if (commit.request.header.operation != .register) {
+                simulator.workload.on_reply(
+                    commit.client_index,
+                    commit.reply.header.operation,
+                    commit.reply.header.timestamp,
+                    commit.request.body(),
+                    commit.reply.body(),
+                );
             }
         }
+    }
-        cluster.network.packet_simulator.tick(cluster.health);
-        conductor.tick();
+    /// Maybe send a request from one of the cluster's clients.
+    fn tick_requests(simulator: *Simulator) void {
+        if (simulator.requests_idle) {
+            if (chance(simulator.random, simulator.options.request_idle_off_probability)) {
+                simulator.requests_idle = false;
+            }
+        } else {
+            if (chance(simulator.random, simulator.options.request_idle_on_probability)) {
+                simulator.requests_idle = true;
+            }
+        }
-        if (state_checker.convergence() and conductor.done() and
-            cluster.replica_up_count() == replica_count)
-        {
-            break;
+        if (simulator.requests_idle) return;
+        if (simulator.requests_sent == simulator.options.requests_max) return;
+        if (!chance(simulator.random, simulator.options.request_probability)) return;
+        const client_index =
+            simulator.random.uintLessThan(usize, simulator.options.cluster.client_count);
+        var client = &simulator.cluster.clients[client_index];
+        // Make sure that there is capacity in the client's request queue so that we never trigger
+        // error.TooManyOutstandingRequests.
+        if (client.request_queue.count + 1 > constants.client_request_queue_max) return;
+        // Messages aren't added to the ReplySequence until a reply arrives.
+        // Before sending a new message, make sure there will definitely be room for it.
+        var reserved: usize = 0;
+        for (simulator.cluster.clients) |*c| {
+            // Count the number of clients that are still waiting for a `register` to complete,
+            // since they may start one at any time.
+            reserved += @boolToInt(c.session == 0);
+            // Count the number of requests queued.
+            reserved += c.request_queue.count;
         }
-    } else {
-        output.err("you can reproduce this failure with seed={}", .{seed});
-        fatal(.liveness, "unable to complete requests_committed_max before ticks_max", .{});
+        // +1 for the potential request — is there room in the sequencer's queue?
+        if (reserved + 1 > simulator.reply_sequence.free()) return;
+        var request_message = client.get_message();
+        defer client.unref(request_message);
+        const request_metadata = simulator.workload.build_request(
+            client_index,
+            @alignCast(
+                @alignOf(vsr.Header),
+                request_message.buffer[@sizeOf(vsr.Header)..constants.message_size_max],
+            ),
+        );
+        assert(request_metadata.size <= constants.message_size_max - @sizeOf(vsr.Header));
+        simulator.cluster.request(
+            client_index,
+            request_metadata.operation,
+            request_message,
+            request_metadata.size,
+        );
+        // Since we already checked the client's request queue for free space, `client.request()`
+        // should always queue the request.
+        assert(request_message == client.request_queue.tail_ptr().?.message);
+        assert(request_message.header.size == @sizeOf(vsr.Header) + request_metadata.size);
+        assert(request_message.header.operation.cast(StateMachine) == request_metadata.operation);
+        simulator.requests_sent += 1;
+        assert(simulator.requests_sent <= simulator.options.requests_max);
     }
-    assert(state_checker.convergence());
-    assert(conductor.done());
+    fn tick_crash(simulator: *Simulator) void {
+        const recoverable_count_min =
+            vsr.quorums(simulator.options.cluster.replica_count).view_change;
+        var recoverable_count: usize = 0;
+        for (simulator.cluster.replicas) |*replica| {
+            recoverable_count += @boolToInt(replica.status != .recovering_head);
+        }
-    output.info("\n          PASSED ({} ticks)", .{tick});
-}
+        for (simulator.cluster.replicas) |*replica| {
+            simulator.replica_stability[replica.replica] -|= 1;
+            const stability = simulator.replica_stability[replica.replica];
+            if (stability > 0) continue;
+            switch (simulator.cluster.replica_health[replica.replica]) {
+                .up => {
+                    const storage = &simulator.cluster.storages[replica.replica];
+                    const replica_writes = storage.writes.count();
+                    const crash_probability = simulator.options.replica_crash_probability *
+                        @as(f64, if (replica_writes == 0) 1.0 else 10.0);
+                    if (!chance_f64(simulator.random, crash_probability)) continue;
+                    const fault = recoverable_count > recoverable_count_min;
+                    replica.superblock.storage.faulty = fault;
+                    if (!fault) {
+                        // The journal writes redundant headers of faulty ops as zeroes to ensure
+                        // that they remain faulty after a crash/recover. Since that fault cannot
+                        // be disabled by `storage.faulty`, we must manually repair it here to
+                        // ensure a cluster cannot become stuck in status=recovering_head.
+                        // See recover_slots() for more detail.
+                        const offset = vsr.Zone.wal_headers.offset(0);
+                        const size = vsr.Zone.wal_headers.size().?;
+                        const headers_bytes = storage.memory[offset..][0..size];
+                        const headers = mem.bytesAsSlice(vsr.Header, headers_bytes);
+                        for (headers) |*h, slot| {
+                            if (h.checksum == 0) h.* = storage.wal_prepares()[slot].header;
+                        }
+                    }
-pub const ExitCode = enum(u8) {
-    ok = 0,
-    crash = 127, // Any assertion crash will be given an exit code of 127 by default.
-    liveness = 128,
-    correctness = 129,
+                    log_simulator.debug("{}: crash replica (faults={})", .{ replica.replica, fault });
+                    simulator.cluster.crash_replica(replica.replica) catch unreachable;
+                    replica.superblock.storage.faulty = true;
+                    recoverable_count -= @boolToInt(replica.status == .recovering_head);
+                    assert(replica.status != .recovering_head or fault);
+                    simulator.replica_stability[replica.replica] =
+                        simulator.options.replica_crash_stability;
+                },
+                .down => {
+                    if (chance_f64(simulator.random, simulator.options.replica_restart_probability)) {
+                        simulator.cluster.restart_replica(replica.replica);
+                        log_simulator.debug("{}: restart replica", .{replica.replica});
+                        simulator.replica_stability[replica.replica] =
+                            simulator.options.replica_restart_stability;
+                    }
+                },
+            }
+        }
+    }
 };
 /// Print an error message and then exit with an exit code.
-fn fatal(exit_code: ExitCode, comptime fmt_string: []const u8, args: anytype) noreturn {
+fn fatal(failure: Failure, comptime fmt_string: []const u8, args: anytype) noreturn {
     output.err(fmt_string, args);
-    std.os.exit(@enumToInt(exit_code));
+    std.os.exit(@enumToInt(failure));
+}
+/// Returns true, `p` percent of the time, else false.
+fn chance(random: std.rand.Random, p: u8) bool {
+    assert(p <= 100);
+    return random.uintLessThanBiased(u8, 100) < p;
 }
 /// Returns true, `p` percent of the time, else false.
@@ -383,29 +529,10 @@ fn args_next(args: *std.process.ArgIterator, allocator: std.mem.Allocator) ?[:0]
     return err_or_bytes catch @panic("Unable to extract next value from args");
 }
-fn on_replica_change_state(replica: *const Replica) void {
-    state_checker.check_state(replica.replica) catch |err| {
-        fatal(.correctness, "state checker error: {}", .{err});
-    };
-}
-fn on_replica_compact(replica: *const Replica) void {
-    storage_checker.replica_compact(replica) catch |err| {
-        fatal(.correctness, "storage checker error: {}", .{err});
-    };
-}
-fn on_replica_checkpoint(replica: *const Replica) void {
-    storage_checker.replica_checkpoint(replica) catch |err| {
-        fatal(.correctness, "storage checker error: {}", .{err});
-    };
-}
-/// Returns a random partitioning mode, excluding .custom
+/// Returns a random partitioning mode.
 fn random_partition_mode(random: std.rand.Random) PartitionMode {
     const typeInfo = @typeInfo(PartitionMode).Enum;
-    var enumAsInt = random.uintAtMost(typeInfo.tag_type, typeInfo.fields.len - 2);
-    if (enumAsInt >= @enumToInt(PartitionMode.custom)) enumAsInt += 1;
+    var enumAsInt = random.uintAtMost(typeInfo.tag_type, typeInfo.fields.len - 1);
     return @intToEnum(PartitionMode, enumAsInt);
 }