tigerbeetle-node 0.11.9 → 0.11.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- ed71bbee01e27a1f4638bfc2b0542414c1fc8b503bce5bbcc45ae2b80b1edb26 dist/client.node
1
+ 21c6105d76e0efc68fe5cbe799d363a083a9b44359e16d7dbfbb172e381ea0c3 dist/client.node
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tigerbeetle-node",
3
- "version": "0.11.9",
3
+ "version": "0.11.10",
4
4
  "description": "TigerBeetle Node.js client",
5
5
  "main": "dist/index.js",
6
6
  "typings": "dist/index.d.ts",
@@ -5,25 +5,37 @@ set -e
5
5
  # macOS 13 Ventura is not supported on Zig 0.9.x.
6
6
  # Overriding -target is one workaround Andrew suggests.
7
7
  # https://github.com/ziglang/zig/issues/10478#issuecomment-1294313967
8
- # Cut everything after the first `.` in the target query result
9
- # because the rest of it doesn't always seem to be valid when passed
10
- # back in to `-target`.
11
- target="$(./zig/zig targets | grep triple |cut -d '"' -f 4 | cut -d '.' -f 1)"
8
+ target=""
12
9
  if [ "$(./zig/zig targets | grep triple |cut -d '"' -f 4 | cut -d '.' -f 1,2)" = "aarch64-macos.13" ]; then
13
- target="native-macos.11"
10
+ target="-target native-macos.11"
14
11
  fi
15
12
 
16
- echo "Building for $target"
13
+ # Zig picks musl libc on RHEL instead of glibc, incorrectly
14
+ # https://github.com/ziglang/zig/issues/12156
15
+ if [ -f "/etc/redhat-release" ]; then
16
+ if ! grep Fedora /etc/redhat-release; then
17
+ target="-target native-native-gnu"
18
+ fi
19
+ fi
20
+
21
+ if [ "$target" = "" ]; then
22
+ echo "Building default target"
23
+ else
24
+ echo "Building for '$target'"
25
+ fi
17
26
 
18
27
  mkdir -p dist
19
28
 
20
- ./zig/zig build-lib \
29
+ # Need to do string eval-ing because of shellcheck's strict string
30
+ # interpolation rules.
31
+ cmd="./zig/zig build-lib \
21
32
  -mcpu=baseline \
22
33
  -OReleaseSafe \
23
34
  -dynamic \
24
35
  -lc \
25
- -isystem build/node-"$(node --version)"/include/node \
36
+ -isystem build/node-$(node --version)/include/node \
26
37
  -fallow-shlib-undefined \
27
38
  -femit-bin=dist/client.node \
28
- -target "$target" \
29
- src/node.zig
39
+ $target src/node.zig"
40
+
41
+ eval "$cmd"
@@ -80,6 +80,7 @@ const ConfigCluster = struct {
80
80
  cache_line_size: comptime_int = 64,
81
81
  clients_max: usize,
82
82
  pipeline_prepare_queue_max: usize = 8,
83
+ view_change_headers_max: usize = 8,
83
84
  quorum_replication_max: u8 = 3,
84
85
  journal_slot_count: usize = 1024,
85
86
  message_size_max: usize = 1 * 1024 * 1024,
@@ -180,6 +181,7 @@ pub const configs = struct {
180
181
  .cluster = .{
181
182
  .clients_max = 4 + 3,
182
183
  .pipeline_prepare_queue_max = 4,
184
+ .view_change_headers_max = 4,
183
185
  .journal_slot_count = Config.Cluster.journal_slot_count_min,
184
186
  .message_size_max = Config.Cluster.message_size_max_min(4),
185
187
  .storage_size_max = 4 * 1024 * 1024 * 1024,
@@ -157,6 +157,9 @@ comptime {
157
157
  assert(message_size_max >= @sizeOf(vsr.Header));
158
158
  assert(message_size_max >= sector_size);
159
159
  assert(message_size_max >= Config.Cluster.message_size_max_min(clients_max));
160
+
161
+ // Ensure that DVC/SV messages can fit all necessary headers.
162
+ assert(message_body_size_max >= view_change_headers_max * @sizeOf(vsr.Header));
160
163
  }
161
164
 
162
165
  /// The maximum number of Viewstamped Replication prepare messages that can be inflight at a time.
@@ -184,6 +187,21 @@ comptime {
184
187
  assert(pipeline_request_queue_max >= 0);
185
188
  }
186
189
 
190
+ /// The number of prepare headers to include in the body of a DVC/SV.
191
+ ///
192
+ /// CRITICAL:
193
+ /// We must provide enough headers to cover all uncommitted headers so that the new
194
+ /// primary (if we are in a view change) can decide whether to discard uncommitted headers
195
+ /// that cannot be repaired because they are gaps. See DVCQuorum for more detail.
196
+ pub const view_change_headers_max = config.cluster.view_change_headers_max;
197
+
198
+ comptime {
199
+ assert(view_change_headers_max > 0);
200
+ assert(view_change_headers_max >= pipeline_prepare_queue_max);
201
+ assert(view_change_headers_max <= journal_slot_count);
202
+ assert(view_change_headers_max <= @divFloor(message_body_size_max, @sizeOf(vsr.Header)));
203
+ }
204
+
187
205
  /// The minimum and maximum amount of time in milliseconds to wait before initiating a connection.
188
206
  /// Exponential backoff and jitter are applied within this range.
189
207
  pub const connection_delay_min_ms = config.process.connection_delay_min_ms;
@@ -185,8 +185,6 @@ const Environment = struct {
185
185
  .commit_min_checksum = env.superblock.working.vsr_state.commit_min_checksum + 1,
186
186
  .commit_min = op,
187
187
  .commit_max = op + 1,
188
- .log_view = 0,
189
- .view = 0,
190
188
  });
191
189
  }
192
190
 
@@ -419,16 +419,17 @@ const Environment = struct {
419
419
  env.manifest_log.checkpoint(checkpoint_callback);
420
420
  env.wait(&env.manifest_log);
421
421
 
422
- var vsr_state = env.manifest_log.superblock.working.vsr_state;
423
- vsr_state.commit_min += 1;
424
- vsr_state.commit_min_checksum += 1;
425
- vsr_state.commit_max += 1;
422
+ const vsr_state = &env.manifest_log.superblock.working.vsr_state;
426
423
 
427
424
  env.pending += 1;
428
425
  env.manifest_log.superblock.checkpoint(
429
426
  checkpoint_superblock_callback,
430
427
  &env.superblock_context,
431
- vsr_state,
428
+ .{
429
+ .commit_min_checksum = vsr_state.commit_min_checksum + 1,
430
+ .commit_min = vsr_state.commit_min + 1,
431
+ .commit_max = vsr_state.commit_max + 1,
432
+ },
432
433
  );
433
434
  env.wait(&env.manifest_log);
434
435
 
@@ -202,15 +202,16 @@ const Environment = struct {
202
202
 
203
203
  log.debug("forest checkpointing completed!", .{});
204
204
 
205
- var vsr_state = env.superblock.staging.vsr_state;
206
- vsr_state.commit_min += 1;
207
- vsr_state.commit_min_checkpoint += 1;
205
+ const vsr_state = &env.superblock.staging.vsr_state;
208
206
 
209
207
  env.state = .superblock_checkpointing;
210
208
  env.superblock.checkpoint(
211
209
  superblock_checkpoint_callback,
212
210
  &env.superblock_context,
213
- vsr_state,
211
+ .{
212
+ .commit_min_checkpoint = vsr_state.commit_min_checkpoint + 1,
213
+ .commit_min = vsr_state.commit_min + 1,
214
+ },
214
215
  );
215
216
  }
216
217
 
@@ -145,7 +145,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
145
145
 
146
146
  compaction_io_pending: usize,
147
147
  compaction_callback: ?fn (*Tree) void,
148
- compaction_next_tick: Grid.NextTick = undefined,
149
148
 
150
149
  checkpoint_callback: ?fn (*Tree) void,
151
150
  open_callback: ?fn (*Tree) void,
@@ -337,8 +336,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
337
336
  }
338
337
 
339
338
  if (index_block_count == 0) {
340
- context.callback = callback;
341
- tree.grid.on_next_tick(lookup_invalid_tick_callback, &context.next_tick);
339
+ callback(context, null);
342
340
  return;
343
341
  }
344
342
 
@@ -348,7 +346,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
348
346
  context.* = .{
349
347
  .tree = tree,
350
348
  .completion = undefined,
351
- .next_tick = undefined,
352
349
 
353
350
  .key = key,
354
351
  .fingerprint = fingerprint,
@@ -363,18 +360,12 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
363
360
  context.read_index_block();
364
361
  }
365
362
 
366
- fn lookup_invalid_tick_callback(next_tick: *Grid.NextTick) void {
367
- const context = @fieldParentPtr(LookupContext, "next_tick", next_tick);
368
- context.callback(context, null);
369
- }
370
-
371
363
  pub const LookupContext = struct {
372
364
  const Read = Grid.Read;
373
365
  const BlockPtrConst = Grid.BlockPtrConst;
374
366
 
375
367
  tree: *Tree,
376
368
  completion: Read,
377
- next_tick: Grid.NextTick,
378
369
 
379
370
  key: Key,
380
371
  fingerprint: bloom_filter.Fingerprint,
@@ -569,8 +560,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
569
560
  tree.compact_mutable_table_into_immutable();
570
561
  }
571
562
 
572
- tree.compaction_callback = callback;
573
- tree.grid.on_next_tick(compact_skip_tick_callback, &tree.compaction_next_tick);
563
+ callback(tree);
574
564
  return;
575
565
  }
576
566
 
@@ -598,13 +588,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
598
588
  tree.compact_drive();
599
589
  }
600
590
 
601
- fn compact_skip_tick_callback(next_tick: *Grid.NextTick) void {
602
- const tree = @fieldParentPtr(Tree, "compaction_next_tick", next_tick);
603
- const callback = tree.compaction_callback.?;
604
- tree.compaction_callback = null;
605
- callback(tree);
606
- }
607
-
608
591
  fn compact_start(tree: *Tree, callback: fn (*Tree) void) void {
609
592
  assert(tree.compaction_io_pending == 0);
610
593
  assert(tree.compaction_callback == null);
@@ -874,7 +857,7 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
874
857
  // We are at the end of a half-bar, but the compactions have not finished.
875
858
  // We keep ticking them until they finish.
876
859
  log.debug(tree_name ++ ": compact_done: driving outstanding compactions", .{});
877
- tree.grid.on_next_tick(compact_drive_tick_callback, &tree.compaction_next_tick);
860
+ tree.compact_drive();
878
861
  return;
879
862
  }
880
863
 
@@ -952,16 +935,6 @@ pub fn TreeType(comptime TreeTable: type, comptime Storage: type, comptime tree_
952
935
  tree.manifest.compact(compact_manifest_callback);
953
936
  }
954
937
 
955
- /// Asynchronously continue to drive the compactions when they haven't finished at the time
956
- /// they were supposed to at the end of a half-bar.
957
- fn compact_drive_tick_callback(next_tick: *Grid.NextTick) void {
958
- const tree = @fieldParentPtr(Tree, "compaction_next_tick", next_tick);
959
- assert(tree.compaction_io_pending == 0);
960
- assert(tree.compaction_callback != null);
961
- assert(tree.compaction_op == tree.lookup_snapshot_max);
962
- tree.compact_drive();
963
- }
964
-
965
938
  /// Called after the last beat of a full compaction bar.
966
939
  fn compact_mutable_table_into_immutable(tree: *Tree) void {
967
940
  assert(tree.table_immutable.free);
@@ -241,8 +241,6 @@ fn EnvironmentType(comptime table_usage: TableUsage) type {
241
241
  .commit_min_checksum = env.superblock.working.vsr_state.commit_min_checksum + 1,
242
242
  .commit_min = op,
243
243
  .commit_max = op + 1,
244
- .log_view = 0,
245
- .view = 0,
246
244
  });
247
245
  }
248
246
 
@@ -143,7 +143,7 @@ pub fn main() !void {
143
143
  const simulator_options = Simulator.Options{
144
144
  .cluster = cluster_options,
145
145
  .workload = workload_options,
146
- .replica_crash_probability = 0.000001,
146
+ .replica_crash_probability = 0.00002,
147
147
  .replica_crash_stability = random.uintLessThan(u32, 1_000),
148
148
  .replica_restart_probability = 0.0001,
149
149
  .replica_restart_stability = random.uintLessThan(u32, 1_000),
@@ -444,43 +444,11 @@ pub const Simulator = struct {
444
444
  }
445
445
 
446
446
  fn tick_crash(simulator: *Simulator) void {
447
- // The maximum number of replicas that can crash, with the cluster still able to recover.
448
- var crashes = blk: {
449
- // The minimum number of healthy replicas required for a crashed replica to be able to
450
- // recover. A cluster of 1 can crash safely (as long as there is no disk corruption)
451
- // since it does not run the recovery protocol.
452
- var replica_normal_min = if (simulator.options.cluster.replica_count == 1)
453
- 0
454
- else
455
- vsr.quorums(simulator.options.cluster.replica_count).view_change;
456
- break :blk simulator.cluster.replica_normal_count() -| replica_normal_min;
457
- };
458
-
459
- for (simulator.cluster.storages) |*storage, replica| {
460
- if (simulator.cluster.replicas[replica].journal.status == .recovered) {
461
- // TODO Remove this workaround when VSR recovery protocol is disabled.
462
- // When only the minimum number of replicas are healthy (no more crashes allowed),
463
- // disable storage faults on all healthy replicas.
464
- //
465
- // This is a workaround to avoid the deadlock that occurs when (for example) in a
466
- // cluster of 3 replicas, one is down, another has a corrupt prepare, and the last does
467
- // not have the prepare. The two healthy replicas can never complete a view change,
468
- // because two replicas are not enough to nack, and the unhealthy replica cannot
469
- // complete the VSR recovery protocol either.
470
- if (simulator.cluster.replica_health[replica] == .up and crashes == 0) {
471
- if (storage.faulty) {
472
- log_simulator.debug("{}: disable storage faults", .{replica});
473
- storage.faulty = false;
474
- }
475
- } else {
476
- // When a journal recovers for the first time, enable its storage faults.
477
- // Future crashes will recover in the presence of faults.
478
- if (!storage.faulty) {
479
- log_simulator.debug("{}: enable storage faults", .{replica});
480
- storage.faulty = true;
481
- }
482
- }
483
- }
447
+ const recoverable_count_min =
448
+ vsr.quorums(simulator.options.cluster.replica_count).view_change;
449
+ var recoverable_count: usize = 0;
450
+ for (simulator.cluster.replicas) |*replica| {
451
+ recoverable_count += @boolToInt(replica.status != .recovering_head);
484
452
  }
485
453
 
486
454
  for (simulator.cluster.replicas) |*replica| {
@@ -490,28 +458,41 @@ pub const Simulator = struct {
490
458
 
491
459
  switch (simulator.cluster.replica_health[replica.replica]) {
492
460
  .up => {
493
- if (crashes == 0) continue;
494
- const replica_writes = simulator.cluster.storages[replica.replica].writes.count();
461
+ const storage = &simulator.cluster.storages[replica.replica];
462
+ const replica_writes = storage.writes.count();
495
463
  const crash_probability = simulator.options.replica_crash_probability *
496
464
  @as(f64, if (replica_writes == 0) 1.0 else 10.0);
497
465
  if (!chance_f64(simulator.random, crash_probability)) continue;
498
466
 
499
- const replica_crashed = simulator.cluster.crash_replica(replica.replica) catch |err| {
500
- log_simulator.err("{}: crash replica: unable to open after crash (err={})", .{
501
- replica.replica,
502
- err,
503
- });
504
- unreachable;
505
- };
506
- if (replica_crashed) {
507
- log_simulator.debug("{}: crash replica", .{replica.replica});
508
- crashes -= 1;
509
- simulator.replica_stability[replica.replica] =
510
- simulator.options.replica_crash_stability;
467
+ const fault = recoverable_count > recoverable_count_min;
468
+ replica.superblock.storage.faulty = fault;
469
+
470
+ if (!fault) {
471
+ // The journal writes redundant headers of faulty ops as zeroes to ensure
472
+ // that they remain faulty after a crash/recover. Since that fault cannot
473
+ // be disabled by `storage.faulty`, we must manually repair it here to
474
+ // ensure a cluster cannot become stuck in status=recovering_head.
475
+ // See recover_slots() for more detail.
476
+ const offset = vsr.Zone.wal_headers.offset(0);
477
+ const size = vsr.Zone.wal_headers.size().?;
478
+ const headers_bytes = storage.memory[offset..][0..size];
479
+ const headers = mem.bytesAsSlice(vsr.Header, headers_bytes);
480
+ for (headers) |*h, slot| {
481
+ if (h.checksum == 0) h.* = storage.wal_prepares()[slot].header;
482
+ }
511
483
  }
484
+
485
+ log_simulator.debug("{}: crash replica (faults={})", .{ replica.replica, fault });
486
+ simulator.cluster.crash_replica(replica.replica) catch unreachable;
487
+ replica.superblock.storage.faulty = true;
488
+
489
+ recoverable_count -= @boolToInt(replica.status == .recovering_head);
490
+ assert(replica.status != .recovering_head or fault);
491
+
492
+ simulator.replica_stability[replica.replica] =
493
+ simulator.options.replica_crash_stability;
512
494
  },
513
495
  .down => {
514
- assert(replica.status == .recovering);
515
496
  if (chance_f64(simulator.random, simulator.options.replica_restart_probability)) {
516
497
  simulator.cluster.restart_replica(replica.replica);
517
498
  log_simulator.debug("{}: restart replica", .{replica.replica});
@@ -72,7 +72,7 @@ pub fn StateCheckerType(comptime Client: type, comptime Replica: type) type {
72
72
  const commit_b = replica.commit_min;
73
73
 
74
74
  const header_b = replica.journal.header_with_op(replica.commit_min);
75
- assert(header_b != null or replica.commit_min == replica.op_checkpoint);
75
+ assert(header_b != null or replica.commit_min == replica.op_checkpoint());
76
76
  assert(header_b == null or header_b.?.op == commit_b);
77
77
 
78
78
  const checksum_a = state_checker.commits.items[commit_a].header.checksum;
@@ -14,7 +14,7 @@
14
14
  //! - Acquired Grid blocks
15
15
  //!
16
16
  //! Areas not verified:
17
- //! - SuperBlock sectors, which hold replica-specific state.
17
+ //! - SuperBlock headers, which hold replica-specific state.
18
18
  //! - WAL headers, which may differ because the WAL writes deliberately corrupt redundant headers
19
19
  //! to faulty slots to ensure recovery is consistent.
20
20
  //! - Non-allocated Grid blocks, which may differ due to state transfer.
@@ -25,7 +25,7 @@ const log = std.log.scoped(.storage_checker);
25
25
  const constants = @import("../../constants.zig");
26
26
  const vsr = @import("../../vsr.zig");
27
27
  const superblock = @import("../../vsr/superblock.zig");
28
- const SuperBlockSector = superblock.SuperBlockSector;
28
+ const SuperBlockHeader = superblock.SuperBlockHeader;
29
29
  const Storage = @import("../storage.zig").Storage;
30
30
 
31
31
  /// After each compaction half measure, save the cumulative hash of all acquired grid blocks.
@@ -44,7 +44,7 @@ const Checkpoints = std.AutoHashMap(u64, Checkpoint);
44
44
 
45
45
  const Checkpoint = struct {
46
46
  // The superblock trailers are an XOR of all copies of all respective trailers, not the
47
- // `SuperBlockSector.{trailer}_checksum`.
47
+ // `SuperBlockHeader.{trailer}_checksum`.
48
48
  checksum_superblock_manifest: u128,
49
49
  checksum_superblock_free_set: u128,
50
50
  checksum_superblock_client_table: u128,
@@ -145,15 +145,15 @@ pub fn StorageCheckerType(comptime Replica: type) type {
145
145
  inline for (std.meta.fields(Checkpoint)) |field| {
146
146
  log.debug("{}: replica_checkpoint: checkpoint={} area={s} value={}", .{
147
147
  replica.replica,
148
- replica.op_checkpoint,
148
+ replica.op_checkpoint(),
149
149
  field.name,
150
150
  @field(checkpoint, field.name),
151
151
  });
152
152
  }
153
153
 
154
- const checkpoint_expect = checker.checkpoints.get(replica.op_checkpoint) orelse {
154
+ const checkpoint_expect = checker.checkpoints.get(replica.op_checkpoint()) orelse {
155
155
  // This replica is the first to reach op_checkpoint.
156
- try checker.checkpoints.putNoClobber(replica.op_checkpoint, checkpoint);
156
+ try checker.checkpoints.putNoClobber(replica.op_checkpoint(), checkpoint);
157
157
  return;
158
158
  };
159
159
 
@@ -142,7 +142,7 @@ pub fn ClusterType(comptime StateMachineType: fn (comptime Storage: type, compti
142
142
  storage_options.replica_index = @intCast(u8, replica_index);
143
143
  storage_options.fault_atlas = storage_fault_atlas;
144
144
  storage.* = try Storage.init(allocator, options.storage_size_limit, storage_options);
145
- // Disable most faults at startup, so that the replicas don't get stuck in recovery mode.
145
+ // Disable most faults at startup, so that the replicas don't get stuck recovering_head.
146
146
  storage.faulty = replica_index >= vsr.quorums(options.replica_count).view_change;
147
147
  }
148
148
  errdefer for (storages) |*storage| storage.deinit(allocator);
@@ -301,99 +301,17 @@ pub fn ClusterType(comptime StateMachineType: fn (comptime Storage: type, compti
301
301
  ///
302
302
  /// Returns whether the replica was crashed.
303
303
  /// Returns an error when the replica was unable to recover (open).
304
- pub fn crash_replica(cluster: *Self, replica_index: u8) !bool {
304
+ pub fn crash_replica(cluster: *Self, replica_index: u8) !void {
305
305
  assert(cluster.replica_health[replica_index] == .up);
306
306
 
307
- const replica = &cluster.replicas[replica_index];
308
- if (replica.op == 0) {
309
- // Only crash when `replica.op > 0` — an empty WAL would skip recovery after a crash.
310
- return false;
311
- }
312
-
313
- // TODO Remove this workaround when VSR recovery protocol is disabled.
314
- for (replica.journal.prepare_inhabited) |inhabited, i| {
315
- if (i == 0) {
316
- // Ignore the root header.
317
- } else {
318
- if (inhabited) break;
319
- }
320
- } else {
321
- // Only crash when at least one header has been written to the WAL.
322
- // An empty WAL would skip recovery after a crash.
323
- return false;
324
- }
325
-
326
- // Ensure that the cluster can eventually recover without this replica.
327
- // Verify that each op is recoverable by the current healthy cluster (minus the replica we
328
- // are trying to crash).
329
- // TODO Remove this workaround when VSR recovery protocol is disabled.
330
- if (cluster.options.replica_count != 1) {
331
- var parent: u128 = undefined;
332
- const cluster_op_max = op_max: {
333
- var v: ?u32 = null;
334
- var op_max: ?u64 = null;
335
- for (cluster.replicas) |other_replica, i| {
336
- if (cluster.replica_health[i] == .down) continue;
337
- if (other_replica.status == .recovering) continue;
338
-
339
- if (v == null or other_replica.log_view > v.? or
340
- (other_replica.log_view == v.? and other_replica.op > op_max.?))
341
- {
342
- v = other_replica.log_view;
343
- op_max = other_replica.op;
344
- parent = other_replica.journal.header_with_op(op_max.?).?.checksum;
345
- }
346
- }
347
- break :op_max op_max.?;
348
- };
349
-
350
- // This whole workaround doesn't handle log wrapping correctly.
351
- // If the log has wrapped, don't crash the replica.
352
- if (cluster_op_max >= constants.journal_slot_count) {
353
- return false;
354
- }
355
-
356
- var op: u64 = cluster_op_max + 1;
357
- while (op > 0) {
358
- op -= 1;
359
-
360
- var cluster_op_known: bool = false;
361
- for (cluster.replicas) |other_replica, i| {
362
- // Ignore replicas that are ineligible to assist recovery.
363
- if (replica_index == i) continue;
364
- if (cluster.replica_health[i] == .down) continue;
365
- if (other_replica.status == .recovering) continue;
366
-
367
- if (other_replica.journal.header_with_op_and_checksum(op, parent)) |header| {
368
- parent = header.parent;
369
- if (!other_replica.journal.dirty.bit(.{ .index = op })) {
370
- // The op is recoverable if this replica crashes.
371
- break;
372
- }
373
- cluster_op_known = true;
374
- }
375
- } else {
376
- if (op == cluster_op_max and !cluster_op_known) {
377
- // The replica can crash; it will be able to truncate the last op.
378
- } else {
379
- // The op isn't recoverable if this replica is crashed.
380
- return false;
381
- }
382
- }
383
- }
384
-
385
- // We can't crash this replica because without it we won't be able to repair a broken
386
- // hash chain.
387
- if (parent != 0) return false;
388
- }
389
-
390
- cluster.replica_health[replica_index] = .down;
391
-
392
307
  // Reset the storage before the replica so that pending writes can (partially) finish.
393
308
  cluster.storages[replica_index].reset();
309
+
310
+ const replica = &cluster.replicas[replica_index];
394
311
  const replica_time = replica.time;
395
312
  replica.deinit(cluster.allocator);
396
313
  cluster.network.process_disable(.{ .replica = replica_index });
314
+ cluster.replica_health[replica_index] = .down;
397
315
 
398
316
  // Ensure that none of the replica's messages leaked when it was deinitialized.
399
317
  var messages_in_pool: usize = 0;
@@ -411,18 +329,6 @@ pub fn ClusterType(comptime StateMachineType: fn (comptime Storage: type, compti
411
329
  // Pass the old replica's Time through to the new replica. It will continue to tick while
412
330
  // the replica is crashed, to ensure the clocks don't desyncronize too far to recover.
413
331
  try cluster.open_replica(replica_index, replica_time);
414
-
415
- return true;
416
- }
417
-
418
- /// Returns the number of replicas capable of helping a crashed node recover (i.e. with
419
- /// replica.status=normal).
420
- pub fn replica_normal_count(cluster: *const Self) u8 {
421
- var count: u8 = 0;
422
- for (cluster.replicas) |*replica| {
423
- if (replica.status == .normal) count += 1;
424
- }
425
- return count;
426
332
  }
427
333
 
428
334
  fn open_replica(cluster: *Self, replica_index: u8, time: Time) !void {