tigerbeetle-node 0.11.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/package.json +4 -3
  2. package/src/tigerbeetle/scripts/fuzz_loop.sh +1 -1
  3. package/src/tigerbeetle/scripts/pre-commit.sh +2 -2
  4. package/src/tigerbeetle/scripts/validate_docs.sh +17 -0
  5. package/src/tigerbeetle/src/benchmark.zig +25 -11
  6. package/src/tigerbeetle/src/c/tb_client/context.zig +248 -47
  7. package/src/tigerbeetle/src/c/tb_client/echo_client.zig +108 -0
  8. package/src/tigerbeetle/src/c/tb_client/packet.zig +2 -2
  9. package/src/tigerbeetle/src/c/tb_client/signal.zig +2 -4
  10. package/src/tigerbeetle/src/c/tb_client/thread.zig +17 -256
  11. package/src/tigerbeetle/src/c/tb_client.h +18 -4
  12. package/src/tigerbeetle/src/c/tb_client.zig +88 -26
  13. package/src/tigerbeetle/src/c/tb_client_header_test.zig +135 -0
  14. package/src/tigerbeetle/src/c/test.zig +371 -1
  15. package/src/tigerbeetle/src/cli.zig +36 -6
  16. package/src/tigerbeetle/src/config.zig +10 -1
  17. package/src/tigerbeetle/src/demo.zig +2 -1
  18. package/src/tigerbeetle/src/demo_01_create_accounts.zig +1 -1
  19. package/src/tigerbeetle/src/demo_03_create_transfers.zig +13 -0
  20. package/src/tigerbeetle/src/ewah.zig +11 -33
  21. package/src/tigerbeetle/src/ewah_benchmark.zig +8 -9
  22. package/src/tigerbeetle/src/lsm/README.md +97 -3
  23. package/src/tigerbeetle/src/lsm/compaction.zig +32 -7
  24. package/src/tigerbeetle/src/{eytzinger_benchmark.zig → lsm/eytzinger_benchmark.zig} +34 -21
  25. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +34 -32
  26. package/src/tigerbeetle/src/lsm/grid.zig +39 -21
  27. package/src/tigerbeetle/src/lsm/groove.zig +1 -0
  28. package/src/tigerbeetle/src/lsm/k_way_merge.zig +3 -3
  29. package/src/tigerbeetle/src/lsm/level_iterator.zig +1 -1
  30. package/src/tigerbeetle/src/lsm/manifest.zig +13 -0
  31. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -49
  32. package/src/tigerbeetle/src/lsm/manifest_log.zig +173 -335
  33. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +665 -0
  34. package/src/tigerbeetle/src/lsm/node_pool.zig +4 -0
  35. package/src/tigerbeetle/src/lsm/posted_groove.zig +1 -0
  36. package/src/tigerbeetle/src/lsm/segmented_array.zig +24 -15
  37. package/src/tigerbeetle/src/lsm/table.zig +32 -20
  38. package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
  39. package/src/tigerbeetle/src/lsm/table_iterator.zig +4 -5
  40. package/src/tigerbeetle/src/lsm/test.zig +13 -2
  41. package/src/tigerbeetle/src/lsm/tree.zig +45 -7
  42. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +36 -32
  43. package/src/tigerbeetle/src/main.zig +55 -2
  44. package/src/tigerbeetle/src/message_bus.zig +18 -7
  45. package/src/tigerbeetle/src/message_pool.zig +8 -2
  46. package/src/tigerbeetle/src/ring_buffer.zig +7 -3
  47. package/src/tigerbeetle/src/simulator.zig +38 -11
  48. package/src/tigerbeetle/src/state_machine.zig +47 -22
  49. package/src/tigerbeetle/src/test/accounting/workload.zig +9 -5
  50. package/src/tigerbeetle/src/test/cluster.zig +15 -33
  51. package/src/tigerbeetle/src/test/conductor.zig +2 -1
  52. package/src/tigerbeetle/src/test/network.zig +45 -19
  53. package/src/tigerbeetle/src/test/packet_simulator.zig +40 -29
  54. package/src/tigerbeetle/src/test/state_checker.zig +5 -7
  55. package/src/tigerbeetle/src/test/storage.zig +453 -110
  56. package/src/tigerbeetle/src/test/storage_checker.zig +204 -0
  57. package/src/tigerbeetle/src/tigerbeetle.zig +1 -0
  58. package/src/tigerbeetle/src/unit_tests.zig +6 -1
  59. package/src/tigerbeetle/src/util.zig +97 -11
  60. package/src/tigerbeetle/src/vopr.zig +2 -1
  61. package/src/tigerbeetle/src/vsr/client.zig +8 -3
  62. package/src/tigerbeetle/src/vsr/journal.zig +280 -202
  63. package/src/tigerbeetle/src/vsr/replica.zig +169 -31
  64. package/src/tigerbeetle/src/vsr/superblock.zig +356 -629
  65. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -6
  66. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +414 -151
  67. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +332 -0
  68. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +349 -0
  69. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +44 -9
  70. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +394 -0
  71. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +312 -0
  72. package/src/tigerbeetle/src/vsr.zig +19 -5
  73. package/src/tigerbeetle/src/benchmark_array_search.zig +0 -317
  74. package/src/tigerbeetle/src/benchmarks/perf.zig +0 -299
  75. package/src/tigerbeetle/src/vopr_hub/README.md +0 -58
  76. package/src/tigerbeetle/src/vopr_hub/SETUP.md +0 -199
  77. package/src/tigerbeetle/src/vopr_hub/go.mod +0 -3
  78. package/src/tigerbeetle/src/vopr_hub/main.go +0 -1022
  79. package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +0 -3
  80. package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +0 -403
@@ -1,3 +1,14 @@
1
+ //! SuperBlock invariants:
2
+ //!
3
+ //! * vsr_state
4
+ //! - vsr_state.commit_min is initially 0 (for a newly-formatted replica).
5
+ //! - vsr_state.commit_min ≤ vsr_state.commit_max
6
+ //! - vsr_state.view_normal ≤ vsr_state.view
7
+ //! - checkpoint() must advance the superblock's vsr_state.commit_min.
8
+ //! - view_change() must not advance the superblock's vsr_state.commit_min.
9
+ //! - All fields of vsr_state except commit_min_checksum are monotonically increasing over
10
+ //! view_change()/checkpoint().
11
+ //!
1
12
  const std = @import("std");
2
13
  const assert = std.debug.assert;
3
14
  const crypto = std.crypto;
@@ -15,18 +26,11 @@ const MessagePool = @import("../message_pool.zig").MessagePool;
15
26
  pub const SuperBlockManifest = @import("superblock_manifest.zig").Manifest;
16
27
  pub const SuperBlockFreeSet = @import("superblock_free_set.zig").FreeSet;
17
28
  pub const SuperBlockClientTable = @import("superblock_client_table.zig").ClientTable;
29
+ pub const Quorums = @import("superblock_quorums.zig").QuorumsType(.{
30
+ .superblock_copies = config.superblock_copies,
31
+ });
18
32
 
19
- /// Identifies the type of a sector or block. Protects against misdirected I/O across valid types.
20
- pub const Magic = enum(u8) {
21
- superblock,
22
- manifest,
23
- prepare,
24
- index,
25
- filter,
26
- data,
27
- };
28
-
29
- pub const SuperBlockVersion: u8 = 0;
33
+ pub const SuperBlockVersion: u16 = 0;
30
34
 
31
35
  // Fields are aligned to work as an extern or packed struct.
32
36
  pub const SuperBlockSector = extern struct {
@@ -38,14 +42,13 @@ pub const SuperBlockSector = extern struct {
38
42
  /// This simplifies writing and comparing multiple copies.
39
43
  copy: u8 = 0,
40
44
 
41
- /// Protects against misdirected I/O for non-superblock sectors that have a valid checksum.
42
- magic: Magic,
45
+ /// Protects against writing to or reading from the wrong data file.
46
+ replica: u8,
43
47
 
44
48
  /// The version of the superblock format in use, reserved for major breaking changes.
45
- version: u8,
49
+ version: u16,
46
50
 
47
51
  /// Protects against writing to or reading from the wrong data file.
48
- replica: u8,
49
52
  cluster: u32,
50
53
 
51
54
  /// The current size of the data file.
@@ -93,9 +96,12 @@ pub const SuperBlockSector = extern struct {
93
96
  /// The size of the client table entries stored in the superblock trailer.
94
97
  client_table_size: u32,
95
98
 
96
- reserved: [3172]u8 = [_]u8{0} ** 3172,
99
+ reserved: [3148]u8 = [_]u8{0} ** 3148,
97
100
 
98
101
  pub const VSRState = extern struct {
102
+ /// The vsr.Header.checksum of commit_min's message.
103
+ commit_min_checksum: u128,
104
+
99
105
  /// The last operation committed to the state machine. At startup, replay the log hereafter.
100
106
  commit_min: u64,
101
107
 
@@ -108,12 +114,24 @@ pub const SuperBlockSector = extern struct {
108
114
  /// The view number of the replica.
109
115
  view: u32,
110
116
 
117
+ reserved: [8]u8 = [_]u8{0} ** 8,
118
+
111
119
  comptime {
112
- assert(@sizeOf(VSRState) == 24);
120
+ assert(@sizeOf(VSRState) == 48);
113
121
  // Assert that there is no implicit padding in the struct.
114
122
  assert(@bitSizeOf(VSRState) == @sizeOf(VSRState) * 8);
115
123
  }
116
124
 
125
+ pub fn root(cluster: u32) VSRState {
126
+ return .{
127
+ .commit_min_checksum = vsr.Header.root_prepare(cluster).checksum,
128
+ .commit_min = 0,
129
+ .commit_max = 0,
130
+ .view_normal = 0,
131
+ .view = 0,
132
+ };
133
+ }
134
+
117
135
  pub fn internally_consistent(state: VSRState) bool {
118
136
  return state.commit_max >= state.commit_min and state.view >= state.view_normal;
119
137
  }
@@ -121,6 +139,10 @@ pub const SuperBlockSector = extern struct {
121
139
  pub fn monotonic(old: VSRState, new: VSRState) bool {
122
140
  assert(old.internally_consistent());
123
141
  assert(new.internally_consistent());
142
+ // The last case is for when checking monotonic() from the sequence=0 sector.
143
+ assert(old.commit_min != new.commit_min or
144
+ old.commit_min_checksum == new.commit_min_checksum or
145
+ (old.commit_min_checksum == 0 and old.commit_min == 0));
124
146
 
125
147
  if (old.view > new.view) return false;
126
148
  if (old.view_normal > new.view_normal) return false;
@@ -147,6 +169,7 @@ pub const SuperBlockSector = extern struct {
147
169
  /// But the corresponding `compact()` updates were preserved, and must not be repeated
148
170
  /// to ensure determinstic storage.
149
171
  pub fn op_compacted(state: VSRState, op: u64) bool {
172
+ // If commit_min is 0, we have never checkpointed, so no compactions are checkpointed.
150
173
  return state.commit_min > 0 and op <= state.commit_min + config.lsm_batch_multiple;
151
174
  }
152
175
  };
@@ -202,13 +225,13 @@ pub const SuperBlockSector = extern struct {
202
225
  }
203
226
 
204
227
  pub fn set_checksum(superblock: *SuperBlockSector) void {
205
- assert(superblock.copy < superblock_copies_max);
206
- assert(superblock.magic == .superblock);
228
+ assert(superblock.copy < config.superblock_copies);
207
229
  assert(superblock.version == SuperBlockVersion);
208
230
  assert(superblock.flags == 0);
209
231
 
210
232
  assert(@bitCast(u32, superblock.reserved[0..4].*) == 0);
211
233
  for (mem.bytesAsSlice(u64, superblock.reserved[4..])) |word| assert(word == 0);
234
+ for (mem.bytesAsSlice(u64, &superblock.vsr_state.reserved)) |word| assert(word == 0);
212
235
 
213
236
  superblock.checksum = superblock.calculate_checksum();
214
237
  }
@@ -219,9 +242,6 @@ pub const SuperBlockSector = extern struct {
219
242
 
220
243
  /// Does not consider { checksum, copy } when comparing equality.
221
244
  pub fn equal(a: *const SuperBlockSector, b: *const SuperBlockSector) bool {
222
- assert(a.magic == .superblock);
223
- assert(b.magic == .superblock);
224
-
225
245
  if (a.version != b.version) return false;
226
246
  if (a.replica != b.replica) return false;
227
247
  if (a.cluster != b.cluster) return false;
@@ -243,6 +263,9 @@ pub const SuperBlockSector = extern struct {
243
263
  for (mem.bytesAsSlice(u64, a.reserved[4..])) |word| assert(word == 0);
244
264
  for (mem.bytesAsSlice(u64, b.reserved[4..])) |word| assert(word == 0);
245
265
 
266
+ for (mem.bytesAsSlice(u64, &a.vsr_state.reserved)) |word| assert(word == 0);
267
+ for (mem.bytesAsSlice(u64, &b.vsr_state.reserved)) |word| assert(word == 0);
268
+
246
269
  return true;
247
270
  }
248
271
  };
@@ -255,20 +278,12 @@ comptime {
255
278
  }
256
279
 
257
280
  /// The size of the entire superblock storage zone.
258
- pub const superblock_zone_size = superblock_size * superblock_copies_max;
259
-
260
- /// A single set of copies (a copy set) consists of config.superblock_copies of a superblock.
261
- /// At least two copy sets are required for copy-on-write in order not to impair existing copies.
262
- ///
263
- /// However, when writing only the superblock sector for a view change, we do update-in-place,
264
- /// which is necessary as we need to continue to reference the existing superblock trailer to
265
- /// decouple view changes from checkpoints, to not force an untimely checkpoint ahead of schedule.
266
- pub const superblock_copies_max = config.superblock_copies * 2;
281
+ pub const superblock_zone_size = superblock_copy_size * config.superblock_copies;
267
282
 
268
283
  /// The size of an individual superblock including trailer.
269
- pub const superblock_size = @sizeOf(SuperBlockSector) + superblock_trailer_size_max;
284
+ pub const superblock_copy_size = @sizeOf(SuperBlockSector) + superblock_trailer_size_max;
270
285
  comptime {
271
- assert(superblock_size % config.sector_size == 0);
286
+ assert(superblock_copy_size % config.sector_size == 0);
272
287
  }
273
288
 
274
289
  /// The maximum possible size of the superblock trailer, following the superblock sector.
@@ -289,7 +304,9 @@ pub const superblock_trailer_size_max = blk: {
289
304
 
290
305
  // We order the smaller manifest section ahead of the block free set for better access locality.
291
306
  // For example, it's cheaper to skip over 1 MiB when reading from disk than to skip over 32 MiB.
292
- break :blk superblock_trailer_manifest_size_max + superblock_trailer_free_set_size_max + superblock_trailer_client_table_size_max;
307
+ break :blk superblock_trailer_manifest_size_max +
308
+ superblock_trailer_free_set_size_max +
309
+ superblock_trailer_client_table_size_max;
293
310
  };
294
311
 
295
312
  // A manifest block reference of 40 bytes contains a tree hash, checksum, and address.
@@ -299,6 +316,7 @@ pub const superblock_trailer_manifest_size_max = blk: {
299
316
 
300
317
  // Use a multiple of sector * reference so that the size is exactly divisible without padding:
301
318
  // For example, this 2.5 MiB manifest trailer == 65536 references == 65536 * 511 or 34m tables.
319
+ // TODO Size this relative to the expected number of tables & fragmentation.
302
320
  break :blk 16 * config.sector_size * SuperBlockManifest.BlockReferenceSize;
303
321
  };
304
322
 
@@ -322,6 +340,33 @@ pub const data_file_size_min = blk: {
322
340
  break :blk superblock_zone_size + config.journal_size_max;
323
341
  };
324
342
 
343
+ /// This table shows the sequence number progression of the SuperBlock's sectors.
344
+ ///
345
+ /// action working staging disk
346
+ /// format seq seq seq
347
+ /// 0 - Initially the file has no sectors.
348
+ /// 0 1 -
349
+ /// 0 1 1 Write a copyset for the first sequence.
350
+ /// 1 1 1 Read quorum; verify 3/4 are valid.
351
+ ///
352
+ /// open seq seq seq
353
+ /// a
354
+ /// a a Read quorum; verify 2/4 are valid.
355
+ /// a (a) a Repair any broken copies of `a`.
356
+ ///
357
+ /// checkpoint seq seq seq
358
+ /// a a a
359
+ /// a a+1
360
+ /// a a+1 a+1
361
+ /// a+1 a+1 a+1 Read quorum; verify 3/4 are valid.
362
+ ///
363
+ /// view_change seq seq seq
364
+ /// a a
365
+ /// a a+1 a The new sequence reuses the original parent.
366
+ /// a a+1 a+1
367
+ /// a+1 a+1 a+1 Read quorum; verify 3/4 are valid.
368
+ /// working staging disk
369
+ ///
325
370
  pub fn SuperBlockType(comptime Storage: type) type {
326
371
  return struct {
327
372
  const SuperBlock = @This();
@@ -344,8 +389,11 @@ pub fn SuperBlockType(comptime Storage: type) type {
344
389
 
345
390
  write: Storage.Write = undefined,
346
391
  read: Storage.Read = undefined,
347
- copy: u8 = undefined,
348
- vsr_state: SuperBlockSector.VSRState = undefined,
392
+ read_threshold: ?Quorums.Threshold = null,
393
+ copy: ?u8 = null,
394
+ /// Used by format(), checkpoint(), and view_change().
395
+ vsr_state: ?SuperBlockSector.VSRState = null,
396
+ repairs: ?Quorums.RepairIterator = null, // Used by open().
349
397
  };
350
398
 
351
399
  storage: *Storage,
@@ -360,14 +408,8 @@ pub fn SuperBlockType(comptime Storage: type) type {
360
408
  working: *align(config.sector_size) SuperBlockSector,
361
409
 
362
410
  /// The superblock that will replace the current working superblock once written.
363
- /// This is used when writing the staging superblock, or when changing views before then.
364
411
  /// We cannot mutate any working state directly until it is safely on stable storage.
365
412
  /// Otherwise, we may accidentally externalize guarantees that are not yet durable.
366
- writing: *align(config.sector_size) SuperBlockSector,
367
-
368
- /// The superblock that will be checkpointed next.
369
- /// This may be updated incrementally several times before the next checkpoint.
370
- /// For example, to track new snapshots as they are registered.
371
413
  staging: *align(config.sector_size) SuperBlockSector,
372
414
 
373
415
  /// The copies that we read into at startup or when verifying the written superblock.
@@ -421,11 +463,8 @@ pub fn SuperBlockType(comptime Storage: type) type {
421
463
  const b = try allocator.allocAdvanced(SuperBlockSector, config.sector_size, 1, .exact);
422
464
  errdefer allocator.free(b);
423
465
 
424
- const c = try allocator.allocAdvanced(SuperBlockSector, config.sector_size, 1, .exact);
425
- errdefer allocator.free(c);
426
-
427
466
  const reading = try allocator.allocAdvanced(
428
- [config.superblock_copies * 2]SuperBlockSector,
467
+ [config.superblock_copies]SuperBlockSector,
429
468
  config.sector_size,
430
469
  1,
431
470
  .exact,
@@ -475,8 +514,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
475
514
  return SuperBlock{
476
515
  .storage = storage,
477
516
  .working = &a[0],
478
- .writing = &b[0],
479
- .staging = &c[0],
517
+ .staging = &b[0],
480
518
  .reading = &reading[0],
481
519
  .manifest = manifest,
482
520
  .free_set = free_set,
@@ -489,7 +527,6 @@ pub fn SuperBlockType(comptime Storage: type) type {
489
527
 
490
528
  pub fn deinit(superblock: *SuperBlock, allocator: mem.Allocator) void {
491
529
  allocator.destroy(superblock.working);
492
- allocator.destroy(superblock.writing);
493
530
  allocator.destroy(superblock.staging);
494
531
  allocator.free(superblock.reading);
495
532
 
@@ -526,7 +563,6 @@ pub fn SuperBlockType(comptime Storage: type) type {
526
563
  // We therefore use zero values to make this parent checksum as stable as possible.
527
564
  superblock.working.* = .{
528
565
  .copy = 0,
529
- .magic = .superblock,
530
566
  .version = SuperBlockVersion,
531
567
  .sequence = 0,
532
568
  .replica = options.replica,
@@ -538,6 +574,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
538
574
  .free_set_checksum = 0,
539
575
  .client_table_checksum = 0,
540
576
  .vsr_state = .{
577
+ .commit_min_checksum = 0,
541
578
  .commit_min = 0,
542
579
  .commit_max = 0,
543
580
  .view_normal = 0,
@@ -557,15 +594,11 @@ pub fn SuperBlockType(comptime Storage: type) type {
557
594
 
558
595
  superblock.working.set_checksum();
559
596
 
560
- superblock.staging.* = superblock.working.*;
561
- superblock.staging.sequence = superblock.working.sequence + 1;
562
- superblock.staging.parent = superblock.working.checksum;
563
-
564
597
  context.* = .{
565
598
  .superblock = superblock,
566
599
  .callback = callback,
567
600
  .caller = .format,
568
- .copy = undefined,
601
+ .vsr_state = SuperBlockSector.VSRState.root(options.cluster),
569
602
  };
570
603
 
571
604
  // TODO At a higher layer, we must:
@@ -591,23 +624,33 @@ pub fn SuperBlockType(comptime Storage: type) type {
591
624
  superblock.acquire(context);
592
625
  }
593
626
 
627
+ /// The vsr_state must update the commit_min and commit_min_checksum.
628
+ // TODO Will the replica ever update view/view_normal by calling checkpoint() during a view
629
+ // change? If not, forbid it.
594
630
  pub fn checkpoint(
595
631
  superblock: *SuperBlock,
596
632
  callback: fn (context: *Context) void,
597
633
  context: *Context,
634
+ vsr_state: SuperBlockSector.VSRState,
598
635
  ) void {
599
636
  assert(superblock.opened);
637
+ // Checkpoint must advance commit_min, but never the view.
638
+ assert(superblock.staging.vsr_state.would_be_updated_by(vsr_state));
639
+ assert(superblock.staging.vsr_state.commit_min < vsr_state.commit_min);
640
+ assert(superblock.staging.vsr_state.commit_min_checksum !=
641
+ vsr_state.commit_min_checksum);
600
642
 
601
643
  context.* = .{
602
644
  .superblock = superblock,
603
645
  .callback = callback,
604
646
  .caller = .checkpoint,
605
- .copy = undefined,
647
+ .vsr_state = vsr_state,
606
648
  };
607
649
 
608
650
  superblock.acquire(context);
609
651
  }
610
652
 
653
+ /// The vsr_state must not update the `commit_min` or `commit_min_checksum`.
611
654
  pub fn view_change(
612
655
  superblock: *SuperBlock,
613
656
  callback: fn (context: *Context) void,
@@ -615,20 +658,28 @@ pub fn SuperBlockType(comptime Storage: type) type {
615
658
  vsr_state: SuperBlockSector.VSRState,
616
659
  ) void {
617
660
  assert(superblock.opened);
661
+ assert(vsr_state.commit_min == superblock.staging.vsr_state.commit_min);
662
+ assert(vsr_state.commit_min_checksum ==
663
+ superblock.staging.vsr_state.commit_min_checksum);
664
+ assert(superblock.staging.vsr_state.monotonic(vsr_state));
618
665
 
619
666
  log.debug(
620
- "view_change: commit_min={}..{} commit_max={}..{} view_normal={}..{} view={}..{}",
667
+ "view_change: commit_min_checksum={}..{} commit_min={}..{} commit_max={}..{} " ++
668
+ "view_normal={}..{} view={}..{}",
621
669
  .{
622
- superblock.working.vsr_state.commit_min,
670
+ superblock.staging.vsr_state.commit_min_checksum,
671
+ vsr_state.commit_min_checksum,
672
+
673
+ superblock.staging.vsr_state.commit_min,
623
674
  vsr_state.commit_min,
624
675
 
625
- superblock.working.vsr_state.commit_max,
676
+ superblock.staging.vsr_state.commit_max,
626
677
  vsr_state.commit_max,
627
678
 
628
- superblock.working.vsr_state.view_normal,
679
+ superblock.staging.vsr_state.view_normal,
629
680
  vsr_state.view_normal,
630
681
 
631
- superblock.working.vsr_state.view,
682
+ superblock.staging.vsr_state.view,
632
683
  vsr_state.view,
633
684
  },
634
685
  );
@@ -639,14 +690,10 @@ pub fn SuperBlockType(comptime Storage: type) type {
639
690
  .superblock = superblock,
640
691
  .callback = callback,
641
692
  .caller = .view_change,
642
- .copy = undefined,
643
693
  .vsr_state = vsr_state,
644
694
  };
645
695
 
646
- // Only this view_change() function may change the VSR state.
647
- assert(meta.eql(superblock.working.vsr_state, superblock.staging.vsr_state));
648
-
649
- if (!superblock.working.vsr_state.would_be_updated_by(context.vsr_state)) {
696
+ if (!superblock.staging.vsr_state.would_be_updated_by(context.vsr_state.?)) {
650
697
  log.debug("view_change: no change", .{});
651
698
  callback(context);
652
699
  return;
@@ -672,34 +719,32 @@ pub fn SuperBlockType(comptime Storage: type) type {
672
719
  }
673
720
 
674
721
  fn write_staging(superblock: *SuperBlock, context: *Context) void {
675
- assert(context.caller == .format or context.caller == .checkpoint);
722
+ assert(context.caller != .open);
676
723
  assert(context.caller == .format or superblock.opened);
724
+ assert(context.copy == null);
725
+ assert(context.vsr_state.?.internally_consistent());
677
726
  assert(superblock.queue_head == context);
678
727
  assert(superblock.queue_tail == null);
728
+ assert(superblock.working.vsr_state.would_be_updated_by(context.vsr_state.?));
679
729
 
680
- superblock.write_staging_encode_manifest();
681
- superblock.write_staging_encode_free_set();
682
- superblock.write_staging_encode_client_table();
683
-
684
- superblock.writing.* = superblock.staging.*;
685
- superblock.writing.set_checksum();
686
-
687
- assert(superblock.writing.sequence == superblock.working.sequence + 1);
688
- assert(superblock.writing.parent == superblock.working.checksum);
689
-
690
- superblock.staging.sequence = superblock.writing.sequence + 1;
691
- superblock.staging.parent = superblock.writing.checksum;
692
-
693
- assert(superblock.writing.manifest_checksum == superblock.staging.manifest_checksum);
694
- assert(superblock.writing.free_set_checksum == superblock.staging.free_set_checksum);
695
- assert(superblock.writing.client_table_checksum == superblock.staging.client_table_checksum);
696
-
697
- assert(superblock.writing.manifest_size == superblock.staging.manifest_size);
698
- assert(superblock.writing.free_set_size == superblock.staging.free_set_size);
699
- assert(superblock.writing.client_table_size == superblock.staging.client_table_size);
730
+ superblock.staging.* = superblock.working.*;
731
+ superblock.staging.sequence = superblock.staging.sequence + 1;
732
+ superblock.staging.parent = superblock.staging.checksum;
733
+ superblock.staging.vsr_state.update(context.vsr_state.?);
734
+
735
+ if (context.caller != .view_change) {
736
+ superblock.write_staging_encode_manifest();
737
+ superblock.write_staging_encode_free_set();
738
+ superblock.write_staging_encode_client_table();
739
+ }
740
+ superblock.staging.set_checksum();
700
741
 
701
- context.copy = starting_copy_for_sequence(superblock.writing.sequence);
702
- superblock.write_manifest(context);
742
+ context.copy = 0;
743
+ if (context.caller == .view_change) {
744
+ superblock.write_sector(context);
745
+ } else {
746
+ superblock.write_manifest(context);
747
+ }
703
748
  }
704
749
 
705
750
  fn write_staging_encode_manifest(superblock: *SuperBlock) void {
@@ -740,54 +785,25 @@ pub fn SuperBlockType(comptime Storage: type) type {
740
785
  staging.client_table_checksum = vsr.checksum(target[0..staging.client_table_size]);
741
786
  }
742
787
 
743
- fn write_view_change(superblock: *SuperBlock, context: *Context) void {
744
- assert(context.caller == .view_change);
745
- assert(superblock.opened);
746
- assert(superblock.queue_head == context);
747
- assert(superblock.queue_tail == null);
748
- assert(context.vsr_state.internally_consistent());
749
- assert(meta.eql(superblock.working.vsr_state, superblock.staging.vsr_state));
750
- assert(superblock.working.vsr_state.would_be_updated_by(context.vsr_state));
751
-
752
- superblock.writing.* = superblock.working.*;
753
-
754
- // We cannot increment the sequence number when writing only the superblock sector as
755
- // this would write the sector to another copy set with different superblock trailers.
756
- // Instead, we increment twice so that the sector remains in the same copy set.
757
- superblock.writing.sequence += 2;
758
- assert(superblock.writing.parent == superblock.working.parent);
759
-
760
- superblock.writing.vsr_state.update(context.vsr_state);
761
- superblock.staging.vsr_state.update(context.vsr_state);
762
-
763
- superblock.writing.set_checksum();
764
-
765
- superblock.staging.sequence = superblock.writing.sequence + 1;
766
- superblock.staging.parent = superblock.writing.checksum;
767
-
768
- context.copy = starting_copy_for_sequence(superblock.writing.sequence);
769
- superblock.write_sector(context);
770
- }
771
-
772
788
  fn write_manifest(superblock: *SuperBlock, context: *Context) void {
773
789
  assert(superblock.queue_head == context);
774
790
 
775
- const size = vsr.sector_ceil(superblock.writing.manifest_size);
791
+ const size = vsr.sector_ceil(superblock.staging.manifest_size);
776
792
  assert(size <= superblock_trailer_manifest_size_max);
777
793
 
778
794
  const buffer = superblock.manifest_buffer[0..size];
779
- const offset = offset_manifest(context.copy, superblock.writing.sequence);
795
+ const offset = Layout.offset_manifest(context.copy.?);
780
796
 
781
- mem.set(u8, buffer[superblock.writing.manifest_size..], 0); // Zero sector padding.
797
+ mem.set(u8, buffer[superblock.staging.manifest_size..], 0); // Zero sector padding.
782
798
 
783
- assert(superblock.writing.manifest_checksum == vsr.checksum(
784
- superblock.manifest_buffer[0..superblock.writing.manifest_size],
799
+ assert(superblock.staging.manifest_checksum == vsr.checksum(
800
+ superblock.manifest_buffer[0..superblock.staging.manifest_size],
785
801
  ));
786
802
 
787
803
  log.debug("{s}: write_manifest: checksum={x} size={} offset={}", .{
788
804
  @tagName(context.caller),
789
- superblock.writing.manifest_checksum,
790
- superblock.writing.manifest_size,
805
+ superblock.staging.manifest_checksum,
806
+ superblock.staging.manifest_size,
791
807
  offset,
792
808
  });
793
809
 
@@ -815,22 +831,22 @@ pub fn SuperBlockType(comptime Storage: type) type {
815
831
  fn write_free_set(superblock: *SuperBlock, context: *Context) void {
816
832
  assert(superblock.queue_head == context);
817
833
 
818
- const size = vsr.sector_ceil(superblock.writing.free_set_size);
834
+ const size = vsr.sector_ceil(superblock.staging.free_set_size);
819
835
  assert(size <= superblock_trailer_free_set_size_max);
820
836
 
821
837
  const buffer = superblock.free_set_buffer[0..size];
822
- const offset = offset_free_set(context.copy, superblock.writing.sequence);
838
+ const offset = Layout.offset_free_set(context.copy.?);
823
839
 
824
- mem.set(u8, buffer[superblock.writing.free_set_size..], 0); // Zero sector padding.
840
+ mem.set(u8, buffer[superblock.staging.free_set_size..], 0); // Zero sector padding.
825
841
 
826
- assert(superblock.writing.free_set_checksum == vsr.checksum(
827
- superblock.free_set_buffer[0..superblock.writing.free_set_size],
842
+ assert(superblock.staging.free_set_checksum == vsr.checksum(
843
+ superblock.free_set_buffer[0..superblock.staging.free_set_size],
828
844
  ));
829
845
 
830
846
  log.debug("{s}: write_free_set: checksum={x} size={} offset={}", .{
831
847
  @tagName(context.caller),
832
- superblock.writing.free_set_checksum,
833
- superblock.writing.free_set_size,
848
+ superblock.staging.free_set_checksum,
849
+ superblock.staging.free_set_size,
834
850
  offset,
835
851
  });
836
852
 
@@ -858,22 +874,22 @@ pub fn SuperBlockType(comptime Storage: type) type {
858
874
  fn write_client_table(superblock: *SuperBlock, context: *Context) void {
859
875
  assert(superblock.queue_head == context);
860
876
 
861
- const size = vsr.sector_ceil(superblock.writing.client_table_size);
877
+ const size = vsr.sector_ceil(superblock.staging.client_table_size);
862
878
  assert(size <= superblock_trailer_client_table_size_max);
863
879
 
864
880
  const buffer = superblock.client_table_buffer[0..size];
865
- const offset = offset_client_table(context.copy, superblock.writing.sequence);
881
+ const offset = Layout.offset_client_table(context.copy.?);
866
882
 
867
- mem.set(u8, buffer[superblock.writing.client_table_size..], 0); // Zero sector padding.
883
+ mem.set(u8, buffer[superblock.staging.client_table_size..], 0); // Zero sector padding.
868
884
 
869
- assert(superblock.writing.client_table_checksum == vsr.checksum(
870
- superblock.client_table_buffer[0..superblock.writing.client_table_size],
885
+ assert(superblock.staging.client_table_checksum == vsr.checksum(
886
+ superblock.client_table_buffer[0..superblock.staging.client_table_size],
871
887
  ));
872
888
 
873
889
  log.debug("{s}: write_client_table: checksum={x} size={} offset={}", .{
874
890
  @tagName(context.caller),
875
- superblock.writing.client_table_checksum,
876
- superblock.writing.client_table_size,
891
+ superblock.staging.client_table_checksum,
892
+ superblock.staging.client_table_size,
877
893
  offset,
878
894
  });
879
895
 
@@ -901,45 +917,42 @@ pub fn SuperBlockType(comptime Storage: type) type {
901
917
  fn write_sector(superblock: *SuperBlock, context: *Context) void {
902
918
  assert(superblock.queue_head == context);
903
919
 
904
- // We either update the working superblock for a checkpoint (+1) or a view change (+2):
905
- assert(superblock.writing.sequence == superblock.working.sequence + 1 or
906
- superblock.writing.sequence == superblock.working.sequence + 2);
907
-
908
- // The staging superblock should always be one ahead, with VSR state in sync:
909
- assert(superblock.staging.sequence == superblock.writing.sequence + 1);
910
- assert(superblock.staging.parent == superblock.writing.checksum);
911
- assert(meta.eql(superblock.staging.vsr_state, superblock.writing.vsr_state));
920
+ // We update the working superblock for a checkpoint/format/view_change:
921
+ // open() does not update the working superblock, since it only writes to repair.
922
+ if (context.caller == .open) {
923
+ assert(superblock.staging.sequence == superblock.working.sequence);
924
+ } else {
925
+ assert(superblock.staging.sequence == superblock.working.sequence + 1);
926
+ assert(superblock.staging.parent == superblock.working.checksum);
927
+ }
912
928
 
913
929
  // The superblock cluster and replica should never change once formatted:
914
- assert(superblock.writing.cluster == superblock.working.cluster);
915
- assert(superblock.writing.cluster == superblock.staging.cluster);
916
- assert(superblock.writing.replica == superblock.working.replica);
917
- assert(superblock.writing.replica == superblock.staging.replica);
930
+ assert(superblock.staging.cluster == superblock.working.cluster);
931
+ assert(superblock.staging.replica == superblock.working.replica);
918
932
 
919
- assert(superblock.writing.size >= data_file_size_min);
920
- assert(superblock.writing.size <= superblock.writing.size_max);
933
+ assert(superblock.staging.size >= data_file_size_min);
934
+ assert(superblock.staging.size <= superblock.staging.size_max);
921
935
 
922
- assert(context.copy < superblock_copies_max);
923
- assert(context.copy >= starting_copy_for_sequence(superblock.writing.sequence));
924
- assert(context.copy <= stopping_copy_for_sequence(superblock.writing.sequence));
925
- superblock.writing.copy = context.copy;
936
+ assert(context.copy.? < config.superblock_copies);
937
+ superblock.staging.copy = context.copy.?;
926
938
 
927
939
  // Updating the copy number should not affect the checksum, which was previously set:
928
- assert(superblock.writing.valid_checksum());
940
+ assert(superblock.staging.valid_checksum());
929
941
 
930
- const buffer = mem.asBytes(superblock.writing);
931
- const offset = superblock_size * context.copy;
942
+ const buffer = mem.asBytes(superblock.staging);
943
+ const offset = Layout.offset_sector(context.copy.?);
932
944
 
933
- log.debug("{s}: write_sector: checksum={x} sequence={} copy={} size={} offset={}", .{
945
+ log.debug("{}: {s}: write_sector: checksum={x} sequence={} copy={} size={} offset={}", .{
946
+ superblock.staging.replica,
934
947
  @tagName(context.caller),
935
- superblock.writing.checksum,
936
- superblock.writing.sequence,
937
- context.copy,
948
+ superblock.staging.checksum,
949
+ superblock.staging.sequence,
950
+ context.copy.?,
938
951
  buffer.len,
939
952
  offset,
940
953
  });
941
954
 
942
- superblock.assert_bounds(offset, buffer.len + superblock_trailer_size_max);
955
+ superblock.assert_bounds(offset, buffer.len);
943
956
 
944
957
  superblock.storage.write_sectors(
945
958
  write_sector_callback,
@@ -953,25 +966,24 @@ pub fn SuperBlockType(comptime Storage: type) type {
953
966
  fn write_sector_callback(write: *Storage.Write) void {
954
967
  const context = @fieldParentPtr(Context, "write", write);
955
968
  const superblock = context.superblock;
969
+ const copy = context.copy.?;
956
970
 
957
971
  assert(superblock.queue_head == context);
958
972
 
959
- assert(context.copy < superblock_copies_max);
960
- assert(context.copy >= starting_copy_for_sequence(superblock.writing.sequence));
961
- assert(context.copy <= stopping_copy_for_sequence(superblock.writing.sequence));
962
- assert(context.copy == superblock.writing.copy);
973
+ assert(copy < config.superblock_copies);
974
+ assert(copy == superblock.staging.copy);
963
975
 
964
- if (context.copy == stopping_copy_for_sequence(superblock.writing.sequence)) {
965
- if (context.caller == .format and superblock.writing.sequence < 2) {
966
- assert(superblock.writing.sequence != 0);
976
+ if (context.caller == .open) {
977
+ context.copy = null;
978
+ superblock.repair(context);
979
+ return;
980
+ }
967
981
 
968
- superblock.working.* = superblock.writing.*;
969
- superblock.write_staging(context);
970
- } else {
971
- superblock.read_working(context);
972
- }
982
+ if (copy + 1 == config.superblock_copies) {
983
+ context.copy = null;
984
+ superblock.read_working(context, .verify);
973
985
  } else {
974
- context.copy += 1;
986
+ context.copy = copy + 1;
975
987
 
976
988
  switch (context.caller) {
977
989
  .open => unreachable,
@@ -981,34 +993,42 @@ pub fn SuperBlockType(comptime Storage: type) type {
981
993
  }
982
994
  }
983
995
 
984
- fn read_working(superblock: *SuperBlock, context: *Context) void {
996
+ fn read_working(
997
+ superblock: *SuperBlock,
998
+ context: *Context,
999
+ threshold: Quorums.Threshold,
1000
+ ) void {
985
1001
  assert(superblock.queue_head == context);
1002
+ assert(context.copy == null);
1003
+ assert(context.read_threshold == null);
986
1004
 
987
1005
  // We do not submit reads in parallel, as while this would shave off 1ms, it would also
988
1006
  // increase the risk that a single fault applies to more reads due to temporal locality.
989
1007
  // This would make verification reads more flaky when we do experience a read fault.
990
1008
  // See "An Analysis of Data Corruption in the Storage Stack".
991
1009
 
992
- context.copy = 0; // Read all copies across all copy sets.
1010
+ context.copy = 0;
1011
+ context.read_threshold = threshold;
993
1012
  for (superblock.reading) |*copy| copy.* = undefined;
994
1013
  superblock.read_sector(context);
995
1014
  }
996
1015
 
997
1016
  fn read_sector(superblock: *SuperBlock, context: *Context) void {
998
1017
  assert(superblock.queue_head == context);
999
- assert(context.copy < superblock_copies_max);
1018
+ assert(context.copy.? < config.superblock_copies);
1019
+ assert(context.read_threshold != null);
1000
1020
 
1001
- const buffer = mem.asBytes(&superblock.reading[context.copy]);
1002
- const offset = superblock_size * context.copy;
1021
+ const buffer = mem.asBytes(&superblock.reading[context.copy.?]);
1022
+ const offset = Layout.offset_sector(context.copy.?);
1003
1023
 
1004
1024
  log.debug("{s}: read_sector: copy={} size={} offset={}", .{
1005
1025
  @tagName(context.caller),
1006
- context.copy,
1026
+ context.copy.?,
1007
1027
  buffer.len,
1008
1028
  offset,
1009
1029
  });
1010
1030
 
1011
- superblock.assert_bounds(offset, buffer.len + superblock_trailer_size_max);
1031
+ superblock.assert_bounds(offset, buffer.len);
1012
1032
 
1013
1033
  superblock.storage.read_sectors(
1014
1034
  read_sector_callback,
@@ -1022,96 +1042,109 @@ pub fn SuperBlockType(comptime Storage: type) type {
1022
1042
  fn read_sector_callback(read: *Storage.Read) void {
1023
1043
  const context = @fieldParentPtr(Context, "read", read);
1024
1044
  const superblock = context.superblock;
1045
+ const threshold = context.read_threshold.?;
1025
1046
 
1026
1047
  assert(superblock.queue_head == context);
1027
1048
 
1028
- assert(context.copy < superblock_copies_max);
1029
- if (context.copy == superblock_copies_max - 1) {
1030
- const threshold = threshold_for_caller(context.caller);
1031
-
1032
- if (superblock.quorums.working(superblock.reading, threshold)) |working| {
1033
- switch (context.caller) {
1034
- .format, .checkpoint, .view_change => {
1035
- if (working.checksum != superblock.writing.checksum) {
1036
- @panic("superblock failed verification after writing");
1037
- }
1038
- assert(working.equal(superblock.writing));
1039
- assert(superblock.staging.sequence == working.sequence + 1);
1040
- assert(superblock.staging.parent == working.checksum);
1041
- },
1042
- .open => {
1043
- superblock.staging.* = working.*;
1044
- superblock.staging.sequence = working.sequence + 1;
1045
- superblock.staging.parent = working.checksum;
1046
- },
1047
- }
1049
+ assert(context.copy.? < config.superblock_copies);
1050
+ if (context.copy.? + 1 != config.superblock_copies) {
1051
+ context.copy = context.copy.? + 1;
1052
+ superblock.read_sector(context);
1053
+ return;
1054
+ }
1055
+
1056
+ context.read_threshold = null;
1057
+ context.copy = null;
1058
+
1059
+ if (superblock.quorums.working(superblock.reading, threshold)) |quorum| {
1060
+ assert(quorum.valid);
1061
+ assert(quorum.copies.count() >= threshold.count());
1048
1062
 
1049
- if (context.caller == .format) {
1050
- assert(working.sequence == 2);
1051
- // TODO Assert working.size.
1052
- assert(working.manifest_size == 0);
1053
- assert(working.free_set_size == 8);
1054
- assert(working.vsr_state.commit_min == 0);
1055
- assert(working.vsr_state.commit_max == 0);
1056
- assert(working.vsr_state.view_normal == 0);
1057
- assert(working.vsr_state.view == 0);
1058
- } else if (context.caller == .checkpoint) {
1059
- superblock.free_set.checkpoint();
1063
+ const working = quorum.sector;
1064
+ if (threshold == .verify) {
1065
+ if (working.checksum != superblock.staging.checksum) {
1066
+ @panic("superblock failed verification after writing");
1060
1067
  }
1068
+ assert(working.equal(superblock.staging));
1069
+ }
1061
1070
 
1062
- superblock.working.* = working.*;
1063
- log.debug(
1064
- "{s}: installed working superblock: checksum={x} sequence={} cluster={} " ++
1065
- "replica={} size={} commit_min={} commit_max={} view_normal={} view={}",
1066
- .{
1067
- @tagName(context.caller),
1068
- superblock.working.checksum,
1069
- superblock.working.sequence,
1070
- superblock.working.cluster,
1071
- superblock.working.replica,
1072
- superblock.working.size,
1073
- superblock.working.vsr_state.commit_min,
1074
- superblock.working.vsr_state.commit_max,
1075
- superblock.working.vsr_state.view_normal,
1076
- superblock.working.vsr_state.view,
1077
- },
1078
- );
1079
-
1080
- if (context.caller == .open) {
1081
- context.copy = starting_copy_for_sequence(superblock.working.sequence);
1082
- superblock.read_manifest(context);
1083
- } else {
1084
- // TODO Consider calling TRIM() on Grid's free suffix after checkpointing.
1071
+ if (context.caller == .format) {
1072
+ assert(working.sequence == 1);
1073
+ assert(working.size == data_file_size_min);
1074
+ assert(working.manifest_size == 0);
1075
+ assert(working.free_set_size == 8);
1076
+ assert(working.client_table_size == 4);
1077
+ assert(working.vsr_state.commit_min_checksum ==
1078
+ vsr.Header.root_prepare(working.cluster).checksum);
1079
+ assert(working.vsr_state.commit_min == 0);
1080
+ assert(working.vsr_state.commit_max == 0);
1081
+ assert(working.vsr_state.view_normal == 0);
1082
+ assert(working.vsr_state.view == 0);
1083
+ } else if (context.caller == .checkpoint) {
1084
+ superblock.free_set.checkpoint();
1085
+ }
1086
+
1087
+ superblock.working.* = working.*;
1088
+ superblock.staging.* = working.*;
1089
+ log.debug(
1090
+ "{s}: installed working superblock: checksum={x} sequence={} cluster={} " ++
1091
+ "replica={} size={} " ++
1092
+ "commit_min_checksum={} commit_min={} commit_max={} " ++
1093
+ "view_normal={} view={}",
1094
+ .{
1095
+ @tagName(context.caller),
1096
+ superblock.working.checksum,
1097
+ superblock.working.sequence,
1098
+ superblock.working.cluster,
1099
+ superblock.working.replica,
1100
+ superblock.working.size,
1101
+ superblock.working.vsr_state.commit_min_checksum,
1102
+ superblock.working.vsr_state.commit_min,
1103
+ superblock.working.vsr_state.commit_max,
1104
+ superblock.working.vsr_state.view_normal,
1105
+ superblock.working.vsr_state.view,
1106
+ },
1107
+ );
1108
+
1109
+ if (context.caller == .open) {
1110
+ if (context.repairs) |_| {
1111
+ // We just verified that the repair completed.
1112
+ assert(threshold == .verify);
1085
1113
  superblock.release(context);
1114
+ } else {
1115
+ assert(threshold == .open);
1116
+ context.copy = 0;
1117
+ context.repairs = quorum.repairs();
1118
+ superblock.read_manifest(context);
1086
1119
  }
1087
- } else |err| switch (err) {
1088
- error.NotFound => @panic("superblock not found"),
1089
- error.QuorumLost => @panic("superblock quorum lost"),
1090
- error.ParentNotFound => @panic("superblock parent not found"),
1091
- error.ParentQuorumLost => @panic("superblock parent quorum lost"),
1092
- error.VSRStateNotMonotonic => @panic("superblock vsr state not monotonic"),
1093
- error.SequenceNotMonotonic => @panic("superblock sequence not monotonic"),
1120
+ } else {
1121
+ // TODO Consider calling TRIM() on Grid's free suffix after checkpointing.
1122
+ superblock.release(context);
1094
1123
  }
1095
- } else {
1096
- context.copy += 1;
1097
- superblock.read_sector(context);
1124
+ } else |err| switch (err) {
1125
+ error.Fork => @panic("superblock forked"),
1126
+ error.NotFound => @panic("superblock not found"),
1127
+ error.QuorumLost => @panic("superblock quorum lost"),
1128
+ error.ParentNotConnected => @panic("superblock parent not connected"),
1129
+ error.ParentSkipped => @panic("superblock parent superseded"),
1130
+ error.VSRStateNotMonotonic => @panic("superblock vsr state not monotonic"),
1098
1131
  }
1099
1132
  }
1100
1133
 
1101
1134
  fn read_manifest(superblock: *SuperBlock, context: *Context) void {
1102
1135
  assert(context.caller == .open);
1103
1136
  assert(superblock.queue_head == context);
1104
- assert(context.copy < superblock_copies_max);
1137
+ assert(context.copy.? < config.superblock_copies);
1105
1138
 
1106
1139
  const size = vsr.sector_ceil(superblock.working.manifest_size);
1107
1140
  assert(size <= superblock_trailer_manifest_size_max);
1108
1141
 
1109
1142
  const buffer = superblock.manifest_buffer[0..size];
1110
- const offset = offset_manifest(context.copy, superblock.working.sequence);
1143
+ const offset = Layout.offset_manifest(context.copy.?);
1111
1144
 
1112
1145
  log.debug("{s}: read_manifest: copy={} size={} offset={}", .{
1113
1146
  @tagName(context.caller),
1114
- context.copy,
1147
+ context.copy.?,
1115
1148
  buffer.len,
1116
1149
  offset,
1117
1150
  });
@@ -1135,6 +1168,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
1135
1168
  fn read_manifest_callback(read: *Storage.Read) void {
1136
1169
  const context = @fieldParentPtr(Context, "read", read);
1137
1170
  const superblock = context.superblock;
1171
+ const copy = context.copy.?;
1138
1172
 
1139
1173
  assert(context.caller == .open);
1140
1174
  assert(superblock.queue_head == context);
@@ -1153,13 +1187,13 @@ pub fn SuperBlockType(comptime Storage: type) type {
1153
1187
  // TODO Repair any impaired copies before we continue.
1154
1188
  // At present, we repair at the next checkpoint.
1155
1189
  // We do not repair padding.
1156
- context.copy = starting_copy_for_sequence(superblock.working.sequence);
1190
+ context.copy = 0;
1157
1191
  superblock.read_free_set(context);
1158
- } else if (context.copy == stopping_copy_for_sequence(superblock.working.sequence)) {
1192
+ } else if (copy + 1 == config.superblock_copies) {
1159
1193
  @panic("superblock manifest lost");
1160
1194
  } else {
1161
- log.debug("open: read_manifest: corrupt copy={}", .{context.copy});
1162
- context.copy += 1;
1195
+ log.debug("open: read_manifest: corrupt copy={}", .{copy});
1196
+ context.copy = copy + 1;
1163
1197
  superblock.read_manifest(context);
1164
1198
  }
1165
1199
  }
@@ -1167,17 +1201,17 @@ pub fn SuperBlockType(comptime Storage: type) type {
1167
1201
  fn read_free_set(superblock: *SuperBlock, context: *Context) void {
1168
1202
  assert(context.caller == .open);
1169
1203
  assert(superblock.queue_head == context);
1170
- assert(context.copy < superblock_copies_max);
1204
+ assert(context.copy.? < config.superblock_copies);
1171
1205
 
1172
1206
  const size = vsr.sector_ceil(superblock.working.free_set_size);
1173
1207
  assert(size <= superblock_trailer_free_set_size_max);
1174
1208
 
1175
1209
  const buffer = superblock.free_set_buffer[0..size];
1176
- const offset = offset_free_set(context.copy, superblock.working.sequence);
1210
+ const offset = Layout.offset_free_set(context.copy.?);
1177
1211
 
1178
1212
  log.debug("{s}: read_free_set: copy={} size={} offset={}", .{
1179
1213
  @tagName(context.caller),
1180
- context.copy,
1214
+ context.copy.?,
1181
1215
  buffer.len,
1182
1216
  offset,
1183
1217
  });
@@ -1201,6 +1235,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
1201
1235
  fn read_free_set_callback(read: *Storage.Read) void {
1202
1236
  const context = @fieldParentPtr(Context, "read", read);
1203
1237
  const superblock = context.superblock;
1238
+ const copy = context.copy.?;
1204
1239
 
1205
1240
  assert(context.caller == .open);
1206
1241
  assert(superblock.queue_head == context);
@@ -1220,11 +1255,11 @@ pub fn SuperBlockType(comptime Storage: type) type {
1220
1255
 
1221
1256
  // TODO Repair any impaired copies before we continue.
1222
1257
  superblock.read_client_table(context);
1223
- } else if (context.copy == stopping_copy_for_sequence(superblock.working.sequence)) {
1258
+ } else if (copy + 1 == config.superblock_copies) {
1224
1259
  @panic("superblock free set lost");
1225
1260
  } else {
1226
- log.debug("open: read_free_set: corrupt copy={}", .{context.copy});
1227
- context.copy += 1;
1261
+ log.debug("open: read_free_set: corrupt copy={}", .{copy});
1262
+ context.copy = copy + 1;
1228
1263
  superblock.read_free_set(context);
1229
1264
  }
1230
1265
  }
@@ -1239,17 +1274,17 @@ pub fn SuperBlockType(comptime Storage: type) type {
1239
1274
  fn read_client_table(superblock: *SuperBlock, context: *Context) void {
1240
1275
  assert(context.caller == .open);
1241
1276
  assert(superblock.queue_head == context);
1242
- assert(context.copy < superblock_copies_max);
1277
+ assert(context.copy.? < config.superblock_copies);
1243
1278
 
1244
1279
  const size = vsr.sector_ceil(superblock.working.client_table_size);
1245
1280
  assert(size <= superblock_trailer_client_table_size_max);
1246
1281
 
1247
1282
  const buffer = superblock.client_table_buffer[0..size];
1248
- const offset = offset_client_table(context.copy, superblock.working.sequence);
1283
+ const offset = Layout.offset_client_table(context.copy.?);
1249
1284
 
1250
1285
  log.debug("{s}: read_client_table: copy={} size={} offset={}", .{
1251
1286
  @tagName(context.caller),
1252
- context.copy,
1287
+ context.copy.?,
1253
1288
  buffer.len,
1254
1289
  offset,
1255
1290
  });
@@ -1273,6 +1308,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
1273
1308
  fn read_client_table_callback(read: *Storage.Read) void {
1274
1309
  const context = @fieldParentPtr(Context, "read", read);
1275
1310
  const superblock = context.superblock;
1311
+ const copy = context.copy.?;
1276
1312
 
1277
1313
  assert(context.caller == .open);
1278
1314
  assert(superblock.queue_head == context);
@@ -1288,17 +1324,33 @@ pub fn SuperBlockType(comptime Storage: type) type {
1288
1324
  config.clients_max,
1289
1325
  });
1290
1326
 
1291
- // TODO Repair any impaired copies before we continue.
1292
- superblock.release(context);
1293
- } else if (context.copy == stopping_copy_for_sequence(superblock.working.sequence)) {
1327
+ context.copy = null;
1328
+ superblock.repair(context);
1329
+ } else if (copy + 1 == config.superblock_copies) {
1294
1330
  @panic("superblock client table lost");
1295
1331
  } else {
1296
- log.debug("open: read_client_table: corrupt copy={}", .{context.copy});
1297
- context.copy += 1;
1332
+ log.debug("open: read_client_table: corrupt copy={}", .{copy});
1333
+ context.copy = copy + 1;
1298
1334
  superblock.read_client_table(context);
1299
1335
  }
1300
1336
  }
1301
1337
 
1338
+ fn repair(superblock: *SuperBlock, context: *Context) void {
1339
+ assert(context.caller == .open);
1340
+ assert(context.copy == null);
1341
+ assert(superblock.queue_head == context);
1342
+
1343
+ if (context.repairs.?.next()) |repair_copy| {
1344
+ context.copy = repair_copy;
1345
+ log.warn("repair: copy={}", .{repair_copy});
1346
+
1347
+ superblock.staging.* = superblock.working.*;
1348
+ superblock.write_manifest(context);
1349
+ } else {
1350
+ superblock.release(context);
1351
+ }
1352
+ }
1353
+
1302
1354
  fn acquire(superblock: *SuperBlock, context: *Context) void {
1303
1355
  if (superblock.queue_head) |head| {
1304
1356
  // There should be nothing else happening when we format() or open():
@@ -1321,11 +1373,10 @@ pub fn SuperBlockType(comptime Storage: type) type {
1321
1373
  superblock.queue_head = context;
1322
1374
  log.debug("{s}: started", .{@tagName(context.caller)});
1323
1375
 
1324
- switch (context.caller) {
1325
- .format => superblock.write_staging(context),
1326
- .open => superblock.read_working(context),
1327
- .checkpoint => superblock.write_staging(context),
1328
- .view_change => superblock.write_view_change(context),
1376
+ if (context.caller == .open) {
1377
+ superblock.read_working(context, .open);
1378
+ } else {
1379
+ superblock.write_staging(context);
1329
1380
  }
1330
1381
  }
1331
1382
  }
@@ -1335,20 +1386,24 @@ pub fn SuperBlockType(comptime Storage: type) type {
1335
1386
 
1336
1387
  log.debug("{s}: complete", .{@tagName(context.caller)});
1337
1388
 
1338
- if (context.caller == .open) {
1339
- assert(!superblock.opened);
1340
- superblock.opened = true;
1389
+ switch (context.caller) {
1390
+ .format => {},
1391
+ .open => {
1392
+ assert(!superblock.opened);
1393
+ superblock.opened = true;
1341
1394
 
1342
- if (superblock.working.manifest_size > 0) {
1343
- assert(superblock.manifest.count > 0);
1344
- }
1345
- // TODO Make the FreeSet encoding format not dependant on the word size.
1346
- if (superblock.working.free_set_size > @sizeOf(usize)) {
1347
- assert(superblock.free_set.count_acquired() > 0);
1348
- }
1349
- } else if (context.caller == .view_change) {
1350
- assert(meta.eql(superblock.working.vsr_state, context.vsr_state));
1351
- assert(meta.eql(superblock.staging.vsr_state, context.vsr_state));
1395
+ if (superblock.working.manifest_size > 0) {
1396
+ assert(superblock.manifest.count > 0);
1397
+ }
1398
+ // TODO Make the FreeSet encoding format not dependant on the word size.
1399
+ if (superblock.working.free_set_size > @sizeOf(usize)) {
1400
+ assert(superblock.free_set.count_acquired() > 0);
1401
+ }
1402
+ },
1403
+ .checkpoint, .view_change => {
1404
+ assert(meta.eql(superblock.staging.vsr_state, context.vsr_state.?));
1405
+ assert(meta.eql(superblock.working.vsr_state, context.vsr_state.?));
1406
+ },
1352
1407
  }
1353
1408
 
1354
1409
  const queue_tail = superblock.queue_tail;
@@ -1364,40 +1419,6 @@ pub fn SuperBlockType(comptime Storage: type) type {
1364
1419
  assert(offset + size <= superblock.storage_offset + superblock.storage_size);
1365
1420
  }
1366
1421
 
1367
- fn offset_manifest(copy: u8, sequence: u64) u64 {
1368
- assert(copy >= starting_copy_for_sequence(sequence));
1369
- assert(copy <= stopping_copy_for_sequence(sequence));
1370
-
1371
- return superblock_size * copy + @sizeOf(SuperBlockSector);
1372
- }
1373
-
1374
- fn offset_free_set(copy: u8, sequence: u64) u64 {
1375
- assert(copy >= starting_copy_for_sequence(sequence));
1376
- assert(copy <= stopping_copy_for_sequence(sequence));
1377
-
1378
- return superblock_size * copy + @sizeOf(SuperBlockSector) +
1379
- superblock_trailer_manifest_size_max;
1380
- }
1381
-
1382
- fn offset_client_table(copy: u8, sequence: u64) u64 {
1383
- assert(copy >= starting_copy_for_sequence(sequence));
1384
- assert(copy <= stopping_copy_for_sequence(sequence));
1385
-
1386
- return superblock_size * copy + @sizeOf(SuperBlockSector) +
1387
- superblock_trailer_manifest_size_max +
1388
- superblock_trailer_free_set_size_max;
1389
- }
1390
-
1391
- /// Returns the first copy index (inclusive) to be written for a sequence number.
1392
- fn starting_copy_for_sequence(sequence: u64) u8 {
1393
- return config.superblock_copies * @intCast(u8, sequence % 2);
1394
- }
1395
-
1396
- /// Returns the last copy index (inclusive) to be written for a sequence number.
1397
- fn stopping_copy_for_sequence(sequence: u64) u8 {
1398
- return starting_copy_for_sequence(sequence) + config.superblock_copies - 1;
1399
- }
1400
-
1401
1422
  /// We use flexible quorums for even quorums with write quorum > read quorum, for example:
1402
1423
  /// * When writing, we must verify that at least 3/4 copies were written.
1403
1424
  /// * At startup, we must verify that at least 2/4 copies were read.
@@ -1428,217 +1449,25 @@ pub fn SuperBlockType(comptime Storage: type) type {
1428
1449
  };
1429
1450
  }
1430
1451
 
1431
- const Quorums = struct {
1432
- const Quorum = struct {
1433
- sector: *const SuperBlockSector,
1434
- count: QuorumCount = QuorumCount.initEmpty(),
1435
- valid: bool = false,
1436
- };
1437
-
1438
- const QuorumCount = std.StaticBitSet(superblock_copies_max);
1439
-
1440
- array: [superblock_copies_max]Quorum = undefined,
1441
- count: u8 = 0,
1442
-
1443
- pub const Error = error{
1444
- NotFound,
1445
- QuorumLost,
1446
- ParentNotFound,
1447
- ParentQuorumLost,
1448
- SequenceNotMonotonic,
1449
- VSRStateNotMonotonic,
1450
- };
1451
-
1452
- /// Returns the working superblock according to the quorum with the highest sequence number.
1453
- /// Verifies that the highest quorum is connected, that the previous quorum was not lost.
1454
- /// i.e. Both the working and previous quorum must be valid and intact and connected.
1455
- /// Otherwise, we might regress to a previous working superblock.
1456
- pub fn working(
1457
- quorums: *Quorums,
1458
- copies: []SuperBlockSector,
1459
- threshold: u8,
1460
- ) Error!*const SuperBlockSector {
1461
- assert(copies.len == superblock_copies_max);
1462
- assert(threshold >= 2 and threshold <= 5);
1463
-
1464
- quorums.array = undefined;
1465
- quorums.count = 0;
1466
-
1467
- for (copies) |*copy, index| quorums.count_copy(copy, index, threshold);
1468
-
1469
- std.sort.sort(Quorum, quorums.slice(), {}, sort_priority_descending);
1470
-
1471
- for (quorums.slice()) |quorum| {
1472
- if (quorum.count.count() == config.superblock_copies) {
1473
- log.debug("quorum: checksum={x} parent={x} sequence={} count={} valid={}", .{
1474
- quorum.sector.checksum,
1475
- quorum.sector.parent,
1476
- quorum.sector.sequence,
1477
- quorum.count.count(),
1478
- quorum.valid,
1479
- });
1480
- } else {
1481
- log.err("quorum: checksum={x} parent={x} sequence={} count={} valid={}", .{
1482
- quorum.sector.checksum,
1483
- quorum.sector.parent,
1484
- quorum.sector.sequence,
1485
- quorum.count.count(),
1486
- quorum.valid,
1487
- });
1488
- }
1489
- }
1490
-
1491
- // No working copies of any sequence number exist in the superblock storage zone at all.
1492
- if (quorums.slice().len == 0) return error.NotFound;
1493
-
1494
- // At least one copy or quorum exists.
1495
- const b = quorums.slice()[0];
1496
-
1497
- // Verify that the remaining quorums are correctly sorted:
1498
- for (quorums.slice()[1..]) |a| {
1499
- assert(sort_priority_descending({}, b, a));
1500
- assert(a.sector.magic == .superblock);
1501
- assert(a.sector.valid_checksum());
1502
- }
1503
-
1504
- // Even the best copy with the most quorum still has inadequate quorum.
1505
- if (!b.valid) return error.QuorumLost;
1506
-
1507
- // The superblock is only partially formatted, not all copies were written.
1508
- if (b.sector.sequence < 2) return error.NotFound;
1509
-
1510
- // Verify that the parent copy exists:
1511
- for (quorums.slice()[1..]) |a| {
1512
- if (a.sector.cluster != b.sector.cluster) {
1513
- log.err("superblock copy={} has cluster={} instead of {}", .{
1514
- a.sector.copy,
1515
- a.sector.cluster,
1516
- b.sector.cluster,
1517
- });
1518
- } else if (a.sector.replica != b.sector.replica) {
1519
- log.err("superblock copy={} has replica={} instead of {}", .{
1520
- a.sector.copy,
1521
- a.sector.replica,
1522
- b.sector.replica,
1523
- });
1524
- } else if (a.sector.checksum == b.sector.parent) {
1525
- assert(a.sector.checksum != b.sector.checksum);
1526
- assert(a.sector.cluster == b.sector.cluster);
1527
- assert(a.sector.replica == b.sector.replica);
1528
-
1529
- if (!a.valid) {
1530
- return error.ParentQuorumLost;
1531
- } else if (a.sector.sequence >= b.sector.sequence) {
1532
- return error.SequenceNotMonotonic;
1533
- } else if (a.sector.sequence % 2 == b.sector.sequence % 2) {
1534
- // The parent must reside in the alternate copy to guarantee that we are able to
1535
- // detect when the working quorum is lost.
1536
- return error.SequenceNotMonotonic;
1537
- } else if (!a.sector.vsr_state.monotonic(b.sector.vsr_state)) {
1538
- return error.VSRStateNotMonotonic;
1539
- } else {
1540
- assert(b.sector.magic == .superblock);
1541
- assert(b.sector.valid_checksum());
1542
-
1543
- return b.sector;
1544
- }
1545
- }
1546
- }
1547
-
1548
- return error.ParentNotFound;
1549
- }
1550
-
1551
- fn count_copy(
1552
- quorums: *Quorums,
1553
- copy: *const SuperBlockSector,
1554
- index: usize,
1555
- threshold: u8,
1556
- ) void {
1557
- assert(index < superblock_copies_max);
1558
- assert(threshold >= 2 and threshold <= 5);
1559
-
1560
- if (!copy.valid_checksum()) {
1561
- log.debug("copy: {}/{}: invalid checksum", .{ index, superblock_copies_max });
1562
- return;
1563
- }
1564
-
1565
- if (copy.magic != .superblock) {
1566
- log.debug("copy: {}/{}: not a superblock", .{ index, superblock_copies_max });
1567
- return;
1568
- }
1569
-
1570
- if (copy.copy == index) {
1571
- log.debug("copy: {}/{}: checksum={x} parent={x} sequence={}", .{
1572
- index,
1573
- superblock_copies_max,
1574
- copy.checksum,
1575
- copy.parent,
1576
- copy.sequence,
1577
- });
1578
- } else {
1579
- // If our read was misdirected, we definitely still want to count the copy.
1580
- // We must just be careful to count it idempotently.
1581
- log.err(
1582
- "copy: {}/{}: checksum={x} parent={x} sequence={} misdirected from copy={}",
1583
- .{
1584
- index,
1585
- superblock_copies_max,
1586
- copy.checksum,
1587
- copy.parent,
1588
- copy.sequence,
1589
- copy.copy,
1590
- },
1591
- );
1592
- }
1593
-
1594
- var quorum = quorums.find_or_insert_quorum_for_copy(copy);
1595
- assert(quorum.sector.checksum == copy.checksum);
1596
- assert(quorum.sector.equal(copy));
1597
-
1598
- quorum.count.set(copy.copy);
1599
- assert(quorum.count.isSet(copy.copy));
1600
-
1601
- // In the worst case, all copies may contain divergent forks of the same sequence.
1602
- // However, this should not happen for the same checksum.
1603
- assert(quorum.count.count() <= config.superblock_copies);
1604
-
1605
- quorum.valid = quorum.count.count() >= threshold;
1452
+ pub const Layout = struct {
1453
+ pub fn offset_sector(copy: u8) u64 {
1454
+ assert(copy < config.superblock_copies);
1455
+ return superblock_copy_size * @as(u64, copy);
1606
1456
  }
1607
1457
 
1608
- fn find_or_insert_quorum_for_copy(quorums: *Quorums, copy: *const SuperBlockSector) *Quorum {
1609
- assert(copy.magic == .superblock);
1610
- assert(copy.valid_checksum());
1611
-
1612
- for (quorums.array[0..quorums.count]) |*quorum| {
1613
- if (copy.checksum == quorum.sector.checksum) return quorum;
1614
- } else {
1615
- quorums.array[quorums.count] = Quorum{ .sector = copy };
1616
- quorums.count += 1;
1617
-
1618
- return &quorums.array[quorums.count - 1];
1619
- }
1458
+ pub fn offset_manifest(copy: u8) u64 {
1459
+ assert(copy < config.superblock_copies);
1460
+ return offset_sector(copy) + @sizeOf(SuperBlockSector);
1620
1461
  }
1621
1462
 
1622
- fn slice(quorums: *Quorums) []Quorum {
1623
- return quorums.array[0..quorums.count];
1463
+ pub fn offset_free_set(copy: u8) u64 {
1464
+ assert(copy < config.superblock_copies);
1465
+ return offset_manifest(copy) + superblock_trailer_manifest_size_max;
1624
1466
  }
1625
1467
 
1626
- fn sort_priority_descending(_: void, a: Quorum, b: Quorum) bool {
1627
- assert(a.sector.checksum != b.sector.checksum);
1628
- assert(a.sector.magic == .superblock);
1629
- assert(b.sector.magic == .superblock);
1630
-
1631
- if (a.valid and !b.valid) return true;
1632
- if (b.valid and !a.valid) return false;
1633
-
1634
- if (a.sector.sequence > b.sector.sequence) return true;
1635
- if (b.sector.sequence > a.sector.sequence) return false;
1636
-
1637
- if (a.count.count() > b.count.count()) return true;
1638
- if (b.count.count() > a.count.count()) return false;
1639
-
1640
- // The sort order must be stable and deterministic:
1641
- return a.sector.checksum > b.sector.checksum;
1468
+ pub fn offset_client_table(copy: u8) u64 {
1469
+ assert(copy < config.superblock_copies);
1470
+ return offset_free_set(copy) + superblock_trailer_free_set_size_max;
1642
1471
  }
1643
1472
  };
1644
1473
 
@@ -1657,105 +1486,3 @@ test "SuperBlockSector" {
1657
1486
  a.replica += 1;
1658
1487
  try expect(!a.valid_checksum());
1659
1488
  }
1660
-
1661
- // TODO Add unit tests for Quorums.
1662
- // TODO Test invariants and transitions across TestRunner functions.
1663
- const TestStorage = @import("../test/storage.zig").Storage;
1664
- const TestSuperBlock = SuperBlockType(TestStorage);
1665
-
1666
- const TestRunner = struct {
1667
- superblock: *TestSuperBlock,
1668
- context_format: TestSuperBlock.Context = undefined,
1669
- context_open: TestSuperBlock.Context = undefined,
1670
- context_checkpoint: TestSuperBlock.Context = undefined,
1671
- context_view_change: TestSuperBlock.Context = undefined,
1672
- pending: usize = 0,
1673
-
1674
- fn format(runner: *TestRunner, options: TestSuperBlock.FormatOptions) void {
1675
- runner.pending += 1;
1676
- runner.superblock.format(format_callback, &runner.context_format, options);
1677
- }
1678
-
1679
- fn format_callback(context: *TestSuperBlock.Context) void {
1680
- const runner = @fieldParentPtr(TestRunner, "context_format", context);
1681
- runner.pending -= 1;
1682
- runner.open();
1683
- }
1684
-
1685
- fn open(runner: *TestRunner) void {
1686
- runner.pending += 1;
1687
- runner.superblock.open(open_callback, &runner.context_open);
1688
- }
1689
-
1690
- fn open_callback(context: *TestSuperBlock.Context) void {
1691
- const runner = @fieldParentPtr(TestRunner, "context_open", context);
1692
- runner.pending -= 1;
1693
- runner.checkpoint();
1694
- runner.view_change();
1695
- }
1696
-
1697
- fn view_change(runner: *TestRunner) void {
1698
- runner.pending += 1;
1699
- runner.superblock.view_change(
1700
- view_change_callback,
1701
- &runner.context_view_change,
1702
- .{
1703
- .commit_min = runner.superblock.working.vsr_state.commit_min + 1,
1704
- .commit_max = runner.superblock.working.vsr_state.commit_max + 2,
1705
- .view_normal = runner.superblock.working.vsr_state.view_normal + 3,
1706
- .view = runner.superblock.working.vsr_state.view + 4,
1707
- },
1708
- );
1709
- }
1710
-
1711
- fn view_change_callback(context: *TestSuperBlock.Context) void {
1712
- const runner = @fieldParentPtr(TestRunner, "context_view_change", context);
1713
- runner.pending -= 1;
1714
- runner.checkpoint();
1715
- }
1716
-
1717
- fn checkpoint(runner: *TestRunner) void {
1718
- runner.pending += 1;
1719
- runner.superblock.checkpoint(checkpoint_callback, &runner.context_checkpoint);
1720
- }
1721
-
1722
- fn checkpoint_callback(context: *TestSuperBlock.Context) void {
1723
- const runner = @fieldParentPtr(TestRunner, "context_checkpoint", context);
1724
- runner.pending -= 1;
1725
- }
1726
- };
1727
-
1728
- test "SuperBlock" {
1729
- const cluster = 32;
1730
- const replica = 4;
1731
- const size_max = data_file_size_min;
1732
-
1733
- var storage = try TestStorage.init(std.testing.allocator, superblock_zone_size, .{
1734
- .seed = 0,
1735
- .read_latency_min = 1,
1736
- .read_latency_mean = 1,
1737
- .write_latency_min = 1,
1738
- .write_latency_mean = 1,
1739
- .read_fault_probability = 0,
1740
- .write_fault_probability = 0,
1741
- }, replica, .{
1742
- .first_offset = superblock_zone_size,
1743
- .period = 1,
1744
- });
1745
- defer storage.deinit(std.testing.allocator);
1746
-
1747
- var message_pool = try MessagePool.init(std.testing.allocator, .replica);
1748
- defer message_pool.deinit(std.testing.allocator);
1749
-
1750
- var superblock = try TestSuperBlock.init(std.testing.allocator, &storage, &message_pool);
1751
- defer superblock.deinit(std.testing.allocator);
1752
-
1753
- var runner = TestRunner{ .superblock = &superblock };
1754
- runner.format(.{
1755
- .cluster = cluster,
1756
- .replica = replica,
1757
- .size_max = size_max,
1758
- });
1759
-
1760
- while (runner.pending > 0) storage.tick();
1761
- }