tigerbeetle-node 0.11.13 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  2. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  3. package/dist/bin/aarch64-macos/client.node +0 -0
  4. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  5. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  6. package/dist/bin/x86_64-macos/client.node +0 -0
  7. package/dist/index.js +33 -1
  8. package/dist/index.js.map +1 -1
  9. package/package-lock.json +66 -0
  10. package/package.json +6 -16
  11. package/src/index.ts +56 -1
  12. package/src/node.zig +9 -9
  13. package/dist/.client.node.sha256 +0 -1
  14. package/scripts/build_lib.sh +0 -61
  15. package/scripts/download_node_headers.sh +0 -32
  16. package/src/tigerbeetle/scripts/benchmark.bat +0 -55
  17. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  18. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  19. package/src/tigerbeetle/scripts/fail_on_diff.sh +0 -9
  20. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  21. package/src/tigerbeetle/scripts/fuzz_loop_hash_log.sh +0 -12
  22. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  23. package/src/tigerbeetle/scripts/install.bat +0 -7
  24. package/src/tigerbeetle/scripts/install.sh +0 -21
  25. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  26. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  27. package/src/tigerbeetle/scripts/lint.zig +0 -199
  28. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  29. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -55
  30. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  31. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  32. package/src/tigerbeetle/scripts/scripts/fail_on_diff.sh +0 -9
  33. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  34. package/src/tigerbeetle/scripts/scripts/fuzz_loop_hash_log.sh +0 -12
  35. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  36. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  37. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  38. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  39. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  40. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  41. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  42. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  43. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  44. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  45. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  46. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  47. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  48. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  49. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  50. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  51. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  52. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  53. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  54. package/src/tigerbeetle/src/benchmark.zig +0 -336
  55. package/src/tigerbeetle/src/config.zig +0 -233
  56. package/src/tigerbeetle/src/constants.zig +0 -428
  57. package/src/tigerbeetle/src/ewah.zig +0 -286
  58. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  59. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  60. package/src/tigerbeetle/src/fifo.zig +0 -120
  61. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  62. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  63. package/src/tigerbeetle/src/io/linux.zig +0 -1071
  64. package/src/tigerbeetle/src/io/test.zig +0 -643
  65. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  66. package/src/tigerbeetle/src/io.zig +0 -34
  67. package/src/tigerbeetle/src/iops.zig +0 -107
  68. package/src/tigerbeetle/src/lsm/README.md +0 -308
  69. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  70. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  71. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  72. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  73. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  74. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  75. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  76. package/src/tigerbeetle/src/lsm/forest.zig +0 -205
  77. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -450
  78. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  79. package/src/tigerbeetle/src/lsm/groove.zig +0 -1036
  80. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  81. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  82. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  83. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -878
  84. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  85. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  86. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  87. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  88. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -381
  89. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1329
  90. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  91. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  92. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  93. package/src/tigerbeetle/src/lsm/table.zig +0 -1009
  94. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -192
  95. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  96. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -203
  97. package/src/tigerbeetle/src/lsm/test.zig +0 -439
  98. package/src/tigerbeetle/src/lsm/tree.zig +0 -1169
  99. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -479
  100. package/src/tigerbeetle/src/message_bus.zig +0 -1013
  101. package/src/tigerbeetle/src/message_pool.zig +0 -156
  102. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  103. package/src/tigerbeetle/src/simulator.zig +0 -580
  104. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -578
  105. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  106. package/src/tigerbeetle/src/state_machine.zig +0 -2099
  107. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  108. package/src/tigerbeetle/src/stdx.zig +0 -171
  109. package/src/tigerbeetle/src/storage.zig +0 -393
  110. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  111. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  112. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  113. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  114. package/src/tigerbeetle/src/testing/cluster.zig +0 -444
  115. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  116. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  117. package/src/tigerbeetle/src/testing/id.zig +0 -99
  118. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -374
  119. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  120. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  121. package/src/tigerbeetle/src/testing/state_machine.zig +0 -250
  122. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  123. package/src/tigerbeetle/src/testing/table.zig +0 -247
  124. package/src/tigerbeetle/src/testing/time.zig +0 -84
  125. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  126. package/src/tigerbeetle/src/time.zig +0 -112
  127. package/src/tigerbeetle/src/tracer.zig +0 -529
  128. package/src/tigerbeetle/src/unit_tests.zig +0 -40
  129. package/src/tigerbeetle/src/vopr.zig +0 -495
  130. package/src/tigerbeetle/src/vsr/README.md +0 -209
  131. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  132. package/src/tigerbeetle/src/vsr/clock.zig +0 -855
  133. package/src/tigerbeetle/src/vsr/journal.zig +0 -2415
  134. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  135. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  136. package/src/tigerbeetle/src/vsr/replica.zig +0 -6616
  137. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  138. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  139. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  140. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  141. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  142. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  143. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  144. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  145. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  146. package/src/tigerbeetle/src/vsr.zig +0 -1425
@@ -1,1631 +0,0 @@
1
- //! SuperBlock invariants:
2
- //!
3
- //! * vsr_state
4
- //! - vsr_state.commit_min is initially 0 (for a newly-formatted replica).
5
- //! - vsr_state.commit_min ≤ vsr_state.commit_max
6
- //! - vsr_state.log_view ≤ vsr_state.view
7
- //! - checkpoint() must advance the superblock's vsr_state.commit_min.
8
- //! - view_change() must not advance the superblock's vsr_state.commit_min.
9
- //! - All fields of vsr_state except commit_min_checksum are monotonically increasing over
10
- //! view_change()/checkpoint().
11
- //!
12
- const std = @import("std");
13
- const assert = std.debug.assert;
14
- const crypto = std.crypto;
15
- const mem = std.mem;
16
- const meta = std.meta;
17
- const os = std.os;
18
-
19
- const constants = @import("../constants.zig");
20
- const stdx = @import("../stdx.zig");
21
- const vsr = @import("../vsr.zig");
22
- const log = std.log.scoped(.superblock);
23
-
24
- const MessagePool = @import("../message_pool.zig").MessagePool;
25
-
26
- pub const SuperBlockManifest = @import("superblock_manifest.zig").Manifest;
27
- pub const SuperBlockFreeSet = @import("superblock_free_set.zig").FreeSet;
28
- pub const SuperBlockClientTable = @import("superblock_client_table.zig").ClientTable;
29
- pub const Quorums = @import("superblock_quorums.zig").QuorumsType(.{
30
- .superblock_copies = constants.superblock_copies,
31
- });
32
-
33
- pub const SuperBlockVersion: u16 = 0;
34
-
35
- const vsr_headers_reserved_size = constants.sector_size -
36
- ((constants.view_change_headers_max * @sizeOf(vsr.Header)) % constants.sector_size);
37
-
38
- // Fields are aligned to work as an extern or packed struct.
39
- pub const SuperBlockHeader = extern struct {
40
- checksum: u128 = undefined,
41
-
42
- /// Protects against misdirected reads at startup.
43
- /// For example, if multiple reads are all misdirected to a single copy of the superblock.
44
- /// Excluded from the checksum calculation to ensure that all copies have the same checksum.
45
- /// This simplifies writing and comparing multiple copies.
46
- copy: u8 = 0,
47
-
48
- /// Protects against writing to or reading from the wrong data file.
49
- replica: u8,
50
-
51
- /// The version of the superblock format in use, reserved for major breaking changes.
52
- version: u16,
53
-
54
- /// Protects against writing to or reading from the wrong data file.
55
- cluster: u32,
56
-
57
- /// The current size of the data file.
58
- storage_size: u64,
59
-
60
- /// The maximum possible size of the data file.
61
- /// The maximum allowed runtime storage_size_limit.
62
- /// The FreeSet's on-disk size is a function of storage_size_max.
63
- storage_size_max: u64,
64
-
65
- /// A monotonically increasing counter to locate the latest superblock at startup.
66
- sequence: u64,
67
-
68
- /// The checksum of the previous superblock to hash chain across sequence numbers.
69
- parent: u128,
70
-
71
- /// The checksum over the manifest block references in the superblock trailer.
72
- manifest_checksum: u128,
73
-
74
- /// The checksum over the actual encoded block free set in the superblock trailer.
75
- free_set_checksum: u128,
76
-
77
- /// The checksum over the client table entries in the superblock trailer.
78
- client_table_checksum: u128,
79
-
80
- /// State stored on stable storage for the Viewstamped Replication consensus protocol.
81
- vsr_state: VSRState,
82
-
83
- /// Reserved for future minor features (e.g. changing the compression algorithm of the trailer).
84
- flags: u64 = 0,
85
-
86
- /// A listing of persistent read snapshots that have been issued to clients.
87
- /// A snapshot.created timestamp of 0 indicates that the snapshot is null.
88
- snapshots: [constants.lsm_snapshots_max]Snapshot,
89
-
90
- /// The size of the manifest block references stored in the superblock trailer.
91
- /// The block addresses and checksums in this section of the trailer are laid out as follows:
92
- /// [manifest_size / (16 + 8 + 1)]u128 checksum
93
- /// [manifest_size / (16 + 8 + 1)]u64 address
94
- /// [manifest_size / (16 + 8 + 1)]u8 tree
95
- manifest_size: u32,
96
-
97
- /// The size of the block free set stored in the superblock trailer.
98
- free_set_size: u32,
99
-
100
- /// The size of the client table entries stored in the superblock trailer.
101
- client_table_size: u32,
102
-
103
- /// The number of headers in vsr_headers_all.
104
- vsr_headers_count: u32,
105
-
106
- reserved: [3144]u8 = [_]u8{0} ** 3144,
107
-
108
- /// SV/DVC header suffix. Headers are ordered from high-to-low op.
109
- /// Unoccupied headers (after vsr_headers_count) are zeroed.
110
- ///
111
- /// When `vsr_state.log_view < vsr_state.view`, the headers are for a DVC.
112
- /// When `vsr_state.log_view = vsr_state.view`, the headers are for a SV.
113
- vsr_headers_all: [constants.view_change_headers_max]vsr.Header,
114
- vsr_headers_reserved: [vsr_headers_reserved_size]u8 =
115
- [_]u8{0} ** vsr_headers_reserved_size,
116
-
117
- pub const VSRState = extern struct {
118
- /// The vsr.Header.checksum of commit_min's message.
119
- commit_min_checksum: u128,
120
-
121
- /// The last operation committed to the state machine. At startup, replay the log hereafter.
122
- commit_min: u64,
123
-
124
- /// The highest operation up to which we may commit.
125
- commit_max: u64,
126
-
127
- /// The last view in which the replica's status was normal.
128
- log_view: u32,
129
-
130
- /// The view number of the replica.
131
- view: u32,
132
-
133
- reserved: [8]u8 = [_]u8{0} ** 8,
134
-
135
- comptime {
136
- assert(@sizeOf(VSRState) == 48);
137
- // Assert that there is no implicit padding in the struct.
138
- assert(@bitSizeOf(VSRState) == @sizeOf(VSRState) * 8);
139
- }
140
-
141
- pub fn root(cluster: u32) VSRState {
142
- return .{
143
- .commit_min_checksum = vsr.Header.root_prepare(cluster).checksum,
144
- .commit_min = 0,
145
- .commit_max = 0,
146
- .log_view = 0,
147
- .view = 0,
148
- };
149
- }
150
-
151
- pub fn internally_consistent(state: VSRState) bool {
152
- return state.commit_max >= state.commit_min and state.view >= state.log_view;
153
- }
154
-
155
- pub fn monotonic(old: VSRState, new: VSRState) bool {
156
- assert(old.internally_consistent());
157
- assert(new.internally_consistent());
158
- // The last case is for when checking monotonic() from the sequence=0 header.
159
- assert(old.commit_min != new.commit_min or
160
- old.commit_min_checksum == new.commit_min_checksum or
161
- (old.commit_min_checksum == 0 and old.commit_min == 0));
162
-
163
- if (old.view > new.view) return false;
164
- if (old.log_view > new.log_view) return false;
165
- if (old.commit_min > new.commit_min) return false;
166
- if (old.commit_max > new.commit_max) return false;
167
-
168
- return true;
169
- }
170
-
171
- pub fn would_be_updated_by(old: VSRState, new: VSRState) bool {
172
- assert(monotonic(old, new));
173
-
174
- return !meta.eql(old, new);
175
- }
176
-
177
- /// Compaction is one bar ahead of superblock's commit_min.
178
- /// The commits from the bar following commit_min were in the mutable table, and
179
- /// thus not preserved in the checkpoint.
180
- /// But the corresponding `compact()` updates were preserved, and must not be repeated
181
- /// to ensure determinstic storage.
182
- pub fn op_compacted(state: VSRState, op: u64) bool {
183
- // If commit_min is 0, we have never checkpointed, so no compactions are checkpointed.
184
- return state.commit_min > 0 and op <= state.commit_min + constants.lsm_batch_multiple;
185
- }
186
- };
187
-
188
- pub const Snapshot = extern struct {
189
- /// A creation timestamp of 0 indicates that the snapshot is null.
190
- created: u64,
191
-
192
- /// When a read query last used the snapshot.
193
- queried: u64,
194
-
195
- /// Snapshots may auto-expire after a timeout of inactivity.
196
- /// A timeout of 0 indicates that the snapshot must be explicitly released by the user.
197
- timeout: u64,
198
-
199
- pub fn exists(snapshot: Snapshot) bool {
200
- if (snapshot.created == 0) {
201
- assert(snapshot.queried == 0);
202
- assert(snapshot.timeout == 0);
203
-
204
- return false;
205
- } else {
206
- return true;
207
- }
208
- }
209
-
210
- comptime {
211
- assert(@sizeOf(Snapshot) == 24);
212
- // Assert that there is no implicit padding in the struct.
213
- assert(@bitSizeOf(Snapshot) == @sizeOf(Snapshot) * 8);
214
- }
215
- };
216
-
217
- comptime {
218
- assert(@sizeOf(SuperBlockHeader) % constants.sector_size == 0);
219
- assert(@divExact(@sizeOf(SuperBlockHeader), constants.sector_size) >= 2);
220
- assert(@offsetOf(SuperBlockHeader, "vsr_headers_all") == constants.sector_size);
221
- // Assert that there is no implicit padding in the struct.
222
- assert(@bitSizeOf(SuperBlockHeader) == @sizeOf(SuperBlockHeader) * 8);
223
- }
224
-
225
- pub fn calculate_checksum(superblock: *const SuperBlockHeader) u128 {
226
- comptime assert(meta.fieldIndex(SuperBlockHeader, "checksum") == 0);
227
- comptime assert(meta.fieldIndex(SuperBlockHeader, "copy") == 1);
228
-
229
- const checksum_size = @sizeOf(@TypeOf(superblock.checksum));
230
- comptime assert(checksum_size == @sizeOf(u128));
231
-
232
- const copy_size = @sizeOf(@TypeOf(superblock.copy));
233
- comptime assert(copy_size == 1);
234
-
235
- const ignore_size = checksum_size + copy_size;
236
-
237
- return vsr.checksum(std.mem.asBytes(superblock)[ignore_size..]);
238
- }
239
-
240
- pub fn set_checksum(superblock: *SuperBlockHeader) void {
241
- assert(superblock.copy < constants.superblock_copies);
242
- assert(superblock.version == SuperBlockVersion);
243
- assert(superblock.flags == 0);
244
-
245
- for (mem.bytesAsSlice(u64, &superblock.reserved)) |word| assert(word == 0);
246
- for (mem.bytesAsSlice(u64, &superblock.vsr_state.reserved)) |word| assert(word == 0);
247
- for (mem.bytesAsSlice(u64, &superblock.vsr_headers_reserved)) |word| assert(word == 0);
248
-
249
- superblock.checksum = superblock.calculate_checksum();
250
- }
251
-
252
- pub fn valid_checksum(superblock: *const SuperBlockHeader) bool {
253
- return superblock.checksum == superblock.calculate_checksum();
254
- }
255
-
256
- /// Does not consider { checksum, copy } when comparing equality.
257
- pub fn equal(a: *const SuperBlockHeader, b: *const SuperBlockHeader) bool {
258
- for (mem.bytesAsSlice(u64, &a.reserved)) |word| assert(word == 0);
259
- for (mem.bytesAsSlice(u64, &b.reserved)) |word| assert(word == 0);
260
-
261
- for (mem.bytesAsSlice(u64, &a.vsr_state.reserved)) |word| assert(word == 0);
262
- for (mem.bytesAsSlice(u64, &b.vsr_state.reserved)) |word| assert(word == 0);
263
-
264
- for (mem.bytesAsSlice(u64, &a.vsr_headers_reserved)) |word| assert(word == 0);
265
- for (mem.bytesAsSlice(u64, &b.vsr_headers_reserved)) |word| assert(word == 0);
266
-
267
- if (a.version != b.version) return false;
268
- if (a.replica != b.replica) return false;
269
- if (a.cluster != b.cluster) return false;
270
- if (a.storage_size != b.storage_size) return false;
271
- if (a.storage_size_max != b.storage_size_max) return false;
272
- if (a.sequence != b.sequence) return false;
273
- if (a.parent != b.parent) return false;
274
- if (a.manifest_checksum != b.manifest_checksum) return false;
275
- if (a.free_set_checksum != b.free_set_checksum) return false;
276
- if (a.client_table_checksum != b.client_table_checksum) return false;
277
- if (!meta.eql(a.vsr_state, b.vsr_state)) return false;
278
- if (a.flags != b.flags) return false;
279
- if (!meta.eql(a.snapshots, b.snapshots)) return false;
280
- if (a.manifest_size != b.manifest_size) return false;
281
- if (a.free_set_size != b.free_set_size) return false;
282
- if (a.vsr_headers_count != b.vsr_headers_count) return false;
283
- if (!meta.eql(a.vsr_headers_all, b.vsr_headers_all)) return false;
284
-
285
- return true;
286
- }
287
-
288
- pub fn vsr_headers(superblock: *const SuperBlockHeader) vsr.Headers.ViewChangeSlice {
289
- return vsr.Headers.ViewChangeSlice.init(
290
- superblock.vsr_headers_all[0..superblock.vsr_headers_count],
291
- );
292
- }
293
- };
294
-
295
- comptime {
296
- switch (constants.superblock_copies) {
297
- 4, 6, 8 => {},
298
- else => @compileError("superblock_copies must be either { 4, 6, 8 } for flexible quorums."),
299
- }
300
- }
301
-
302
- /// The size of the entire superblock storage zone.
303
- pub const superblock_zone_size = superblock_copy_size * constants.superblock_copies;
304
-
305
- /// The size of an individual superblock including trailer.
306
- pub const superblock_copy_size = @sizeOf(SuperBlockHeader) + superblock_trailer_size_max;
307
- comptime {
308
- assert(superblock_copy_size % constants.sector_size == 0);
309
- }
310
-
311
- /// The maximum possible size of the superblock trailer, following the superblock header.
312
- pub const superblock_trailer_size_max = blk: {
313
- // To calculate the size of the superblock trailer we need to know:
314
- // 1. the maximum number of manifest blocks that should be able to be referenced, and
315
- // 2. the maximum possible size of the EWAH-compressed bit set addressable by the free set.
316
-
317
- assert(superblock_trailer_manifest_size_max > 0);
318
- assert(superblock_trailer_manifest_size_max % constants.sector_size == 0);
319
- assert(superblock_trailer_manifest_size_max % SuperBlockManifest.BlockReferenceSize == 0);
320
-
321
- assert(superblock_trailer_free_set_size_max > 0);
322
- assert(superblock_trailer_free_set_size_max % constants.sector_size == 0);
323
-
324
- assert(superblock_trailer_client_table_size_max > 0);
325
- assert(superblock_trailer_client_table_size_max % constants.sector_size == 0);
326
-
327
- // We order the smaller manifest section ahead of the block free set for better access locality.
328
- // For example, it's cheaper to skip over 1 MiB when reading from disk than to skip over 32 MiB.
329
- break :blk superblock_trailer_manifest_size_max +
330
- superblock_trailer_free_set_size_max +
331
- superblock_trailer_client_table_size_max;
332
- };
333
-
334
- // A manifest block reference of 40 bytes contains a tree hash, checksum, and address.
335
- // These references are stored in struct-of-arrays layout in the trailer for the sake of alignment.
336
- const superblock_trailer_manifest_size_max = blk: {
337
- assert(SuperBlockManifest.BlockReferenceSize == 16 + 16 + 8);
338
-
339
- // Use a multiple of sector * reference so that the size is exactly divisible without padding:
340
- // For example, this 2.5 MiB manifest trailer == 65536 references == 65536 * 511 or 34m tables.
341
- // TODO Size this relative to the expected number of tables & fragmentation.
342
- break :blk 16 * constants.sector_size * SuperBlockManifest.BlockReferenceSize;
343
- };
344
-
345
- const superblock_trailer_free_set_size_max = blk: {
346
- const encode_size_max = SuperBlockFreeSet.encode_size_max(block_count_max);
347
- assert(encode_size_max > 0);
348
-
349
- break :blk vsr.sector_ceil(encode_size_max);
350
- };
351
-
352
- const superblock_trailer_client_table_size_max = blk: {
353
- const encode_size_max = SuperBlockClientTable.encode_size_max;
354
- assert(encode_size_max > 0);
355
-
356
- break :blk vsr.sector_ceil(encode_size_max);
357
- };
358
-
359
- pub const data_file_size_min = blk: {
360
- break :blk superblock_zone_size + constants.journal_size_max;
361
- };
362
-
363
- /// The maximum number of blocks in the grid.
364
- const block_count_max = blk: {
365
- var size = constants.storage_size_max;
366
- size -= constants.superblock_copies * @sizeOf(SuperBlockHeader);
367
- size -= constants.superblock_copies * superblock_trailer_client_table_size_max;
368
- size -= constants.superblock_copies * superblock_trailer_manifest_size_max;
369
- size -= constants.journal_size_max;
370
- // At this point, the remainder of size is split between the grid and the freeset copies.
371
- // The size of a freeset is related to the number of blocks it must store.
372
- // Maximize the number of grid blocks.
373
-
374
- var shard_count = @divFloor(size, constants.block_size * SuperBlockFreeSet.shard_bits);
375
- while (true) : (shard_count -= 1) {
376
- const block_count = shard_count * SuperBlockFreeSet.shard_bits;
377
- const grid_size = block_count * constants.block_size;
378
- const free_set_size = vsr.sector_ceil(SuperBlockFreeSet.encode_size_max(block_count));
379
- const free_sets_size = constants.superblock_copies * free_set_size;
380
- if (free_sets_size + grid_size <= size) break;
381
- }
382
- break :blk shard_count * SuperBlockFreeSet.shard_bits;
383
- };
384
-
385
- comptime {
386
- assert(block_count_max > 0);
387
- assert(block_count_max * constants.block_size + data_file_size_min <= constants.storage_size_max);
388
- }
389
-
390
- /// This table shows the sequence number progression of the SuperBlock's headers.
391
- ///
392
- /// action working staging disk
393
- /// format seq seq seq
394
- /// 0 - Initially the file has no headers.
395
- /// 0 1 -
396
- /// 0 1 1 Write a copyset for the first sequence.
397
- /// 1 1 1 Read quorum; verify 3/4 are valid.
398
- ///
399
- /// open seq seq seq
400
- /// a
401
- /// a a Read quorum; verify 2/4 are valid.
402
- /// a (a) a Repair any broken copies of `a`.
403
- ///
404
- /// checkpoint seq seq seq
405
- /// a a a
406
- /// a a+1
407
- /// a a+1 a+1
408
- /// a+1 a+1 a+1 Read quorum; verify 3/4 are valid.
409
- ///
410
- /// view_change seq seq seq
411
- /// a a
412
- /// a a+1 a The new sequence reuses the original parent.
413
- /// a a+1 a+1
414
- /// a+1 a+1 a+1 Read quorum; verify 3/4 are valid.
415
- /// working staging disk
416
- ///
417
- pub fn SuperBlockType(comptime Storage: type) type {
418
- return struct {
419
- const SuperBlock = @This();
420
-
421
- pub const Manifest = SuperBlockManifest;
422
- pub const FreeSet = SuperBlockFreeSet;
423
- pub const ClientTable = SuperBlockClientTable;
424
-
425
- pub const Context = struct {
426
- pub const Caller = enum {
427
- format,
428
- open,
429
- checkpoint,
430
- view_change,
431
- };
432
-
433
- superblock: *SuperBlock,
434
- callback: fn (context: *Context) void,
435
- caller: Caller,
436
-
437
- write: Storage.Write = undefined,
438
- read: Storage.Read = undefined,
439
- read_threshold: ?Quorums.Threshold = null,
440
- copy: ?u8 = null,
441
- /// Used by format(), checkpoint(), and view_change().
442
- vsr_state: ?SuperBlockHeader.VSRState = null,
443
- /// Used by format() and view_change().
444
- vsr_headers: ?vsr.Headers.ViewChangeArray = null,
445
- repairs: ?Quorums.RepairIterator = null, // Used by open().
446
- };
447
-
448
- storage: *Storage,
449
-
450
- /// The first logical offset that may be written to the superblock storage zone.
451
- storage_offset: u64 = 0,
452
-
453
- /// The total size of the superblock storage zone after this physical offset.
454
- storage_size: u64 = superblock_zone_size,
455
-
456
- /// The superblock that was recovered at startup after a crash or that was last written.
457
- working: *align(constants.sector_size) SuperBlockHeader,
458
-
459
- /// The superblock that will replace the current working superblock once written.
460
- /// We cannot mutate any working state directly until it is safely on stable storage.
461
- /// Otherwise, we may accidentally externalize guarantees that are not yet durable.
462
- staging: *align(constants.sector_size) SuperBlockHeader,
463
-
464
- /// The copies that we read into at startup or when verifying the written superblock.
465
- reading: []align(constants.sector_size) SuperBlockHeader,
466
-
467
- /// It might seem that, at startup, we simply install the copy with the highest sequence.
468
- ///
469
- /// However, there's a scenario where:
470
- /// 1. We are able to write sequence 7 to 3/4 copies, with the last write being lost.
471
- /// 2. We startup and read all copies, with reads misdirected to the copy with sequence 6.
472
- ///
473
- /// Another scenario:
474
- /// 1. We begin to write sequence 7 to 1 copy and then crash.
475
- /// 2. At startup, the read to this copy fails, and we recover at sequence=6.
476
- /// 3. We then checkpoint another sequence 7 to 3/4 copies and crash.
477
- /// 4. At startup, we then see 4 copies with the same sequence with 1 checksum different.
478
- ///
479
- /// To mitigate these scenarios, we ensure that we are able to read a quorum of copies.
480
- /// This also gives us confidence that our working superblock has sufficient redundancy.
481
- quorums: Quorums = Quorums{},
482
-
483
- manifest: Manifest,
484
- free_set: FreeSet,
485
- client_table: ClientTable,
486
-
487
- manifest_buffer: []align(constants.sector_size) u8,
488
- free_set_buffer: []align(constants.sector_size) u8,
489
- client_table_buffer: []align(constants.sector_size) u8,
490
-
491
- /// Whether the superblock has been opened. An open superblock may not be formatted.
492
- opened: bool = false,
493
- block_count_limit: usize,
494
- storage_size_limit: u64,
495
-
496
- /// Beyond formatting and opening of the superblock, which are mutually exclusive of all
497
- /// other operations, only the following queue combinations are allowed:
498
- /// 1. A view change may queue on a checkpoint.
499
- /// 2. A checkpoint may queue on a view change.
500
- ///
501
- /// There may only be a single caller queued at a time, to ensure that the VSR protocol is
502
- /// careful to submit at most one view change at a time.
503
- queue_head: ?*Context = null,
504
- queue_tail: ?*Context = null,
505
-
506
- pub const Options = struct {
507
- storage: *Storage,
508
- message_pool: *MessagePool,
509
- storage_size_limit: u64,
510
- };
511
-
512
- pub fn init(allocator: mem.Allocator, options: Options) !SuperBlock {
513
- assert(options.storage_size_limit >= data_file_size_min);
514
- assert(options.storage_size_limit <= constants.storage_size_max);
515
- assert(options.storage_size_limit % constants.sector_size == 0);
516
-
517
- const shard_count_limit = @intCast(usize, @divFloor(
518
- options.storage_size_limit - data_file_size_min,
519
- constants.block_size * FreeSet.shard_bits,
520
- ));
521
- const block_count_limit = shard_count_limit * FreeSet.shard_bits;
522
- assert(block_count_limit <= block_count_max);
523
-
524
- const a = try allocator.allocAdvanced(SuperBlockHeader, constants.sector_size, 1, .exact);
525
- errdefer allocator.free(a);
526
-
527
- const b = try allocator.allocAdvanced(SuperBlockHeader, constants.sector_size, 1, .exact);
528
- errdefer allocator.free(b);
529
-
530
- const reading = try allocator.allocAdvanced(
531
- [constants.superblock_copies]SuperBlockHeader,
532
- constants.sector_size,
533
- 1,
534
- .exact,
535
- );
536
- errdefer allocator.free(reading);
537
-
538
- var manifest = try Manifest.init(
539
- allocator,
540
- @divExact(
541
- superblock_trailer_manifest_size_max,
542
- Manifest.BlockReferenceSize,
543
- ),
544
- @import("../lsm/tree.zig").table_count_max,
545
- );
546
- errdefer manifest.deinit(allocator);
547
-
548
- var free_set = try FreeSet.init(allocator, block_count_limit);
549
- errdefer free_set.deinit(allocator);
550
-
551
- var client_table = try ClientTable.init(allocator, options.message_pool);
552
- errdefer client_table.deinit(allocator);
553
-
554
- const manifest_buffer = try allocator.allocAdvanced(
555
- u8,
556
- constants.sector_size,
557
- superblock_trailer_manifest_size_max,
558
- .exact,
559
- );
560
- errdefer allocator.free(manifest_buffer);
561
-
562
- const free_set_buffer = try allocator.allocAdvanced(
563
- u8,
564
- constants.sector_size,
565
- SuperBlockFreeSet.encode_size_max(block_count_limit),
566
- .exact,
567
- );
568
- errdefer allocator.free(free_set_buffer);
569
-
570
- const client_table_buffer = try allocator.allocAdvanced(
571
- u8,
572
- constants.sector_size,
573
- superblock_trailer_client_table_size_max,
574
- .exact,
575
- );
576
- errdefer allocator.free(client_table_buffer);
577
-
578
- return SuperBlock{
579
- .storage = options.storage,
580
- .working = &a[0],
581
- .staging = &b[0],
582
- .reading = &reading[0],
583
- .manifest = manifest,
584
- .free_set = free_set,
585
- .client_table = client_table,
586
- .manifest_buffer = manifest_buffer,
587
- .free_set_buffer = free_set_buffer,
588
- .client_table_buffer = client_table_buffer,
589
- .block_count_limit = block_count_limit,
590
- .storage_size_limit = options.storage_size_limit,
591
- };
592
- }
593
-
594
- pub fn deinit(superblock: *SuperBlock, allocator: mem.Allocator) void {
595
- allocator.destroy(superblock.working);
596
- allocator.destroy(superblock.staging);
597
- allocator.free(superblock.reading);
598
-
599
- superblock.manifest.deinit(allocator);
600
- superblock.free_set.deinit(allocator);
601
- superblock.client_table.deinit(allocator);
602
-
603
- allocator.free(superblock.manifest_buffer);
604
- allocator.free(superblock.free_set_buffer);
605
- allocator.free(superblock.client_table_buffer);
606
- }
607
-
608
- pub const FormatOptions = struct {
609
- cluster: u32,
610
- replica: u8,
611
- };
612
-
613
- pub fn format(
614
- superblock: *SuperBlock,
615
- callback: fn (context: *Context) void,
616
- context: *Context,
617
- options: FormatOptions,
618
- ) void {
619
- assert(!superblock.opened);
620
-
621
- assert(options.replica < constants.replicas_max);
622
-
623
- // This working copy provides the parent checksum, and will not be written to disk.
624
- // We therefore use zero values to make this parent checksum as stable as possible.
625
- superblock.working.* = .{
626
- .copy = 0,
627
- .version = SuperBlockVersion,
628
- .sequence = 0,
629
- .replica = options.replica,
630
- .cluster = options.cluster,
631
- .storage_size = 0,
632
- .storage_size_max = constants.storage_size_max,
633
- .parent = 0,
634
- .manifest_checksum = 0,
635
- .free_set_checksum = 0,
636
- .client_table_checksum = 0,
637
- .vsr_state = .{
638
- .commit_min_checksum = 0,
639
- .commit_min = 0,
640
- .commit_max = 0,
641
- .log_view = 0,
642
- .view = 0,
643
- },
644
- .snapshots = undefined,
645
- .manifest_size = 0,
646
- .free_set_size = 0,
647
- .client_table_size = 0,
648
- .vsr_headers_count = 0,
649
- .vsr_headers_all = mem.zeroes([constants.view_change_headers_max]vsr.Header),
650
- };
651
-
652
- mem.set(SuperBlockHeader.Snapshot, &superblock.working.snapshots, .{
653
- .created = 0,
654
- .queried = 0,
655
- .timeout = 0,
656
- });
657
-
658
- superblock.working.set_checksum();
659
-
660
- context.* = .{
661
- .superblock = superblock,
662
- .callback = callback,
663
- .caller = .format,
664
- .vsr_state = SuperBlockHeader.VSRState.root(options.cluster),
665
- .vsr_headers = vsr.Headers.ViewChangeArray.root(options.cluster),
666
- };
667
-
668
- // TODO At a higher layer, we must:
669
- // 1. verify that there is no valid superblock, and
670
- // 2. zero the superblock, WAL and client table to ensure storage determinism.
671
-
672
- superblock.acquire(context);
673
- }
674
-
675
- pub fn open(
676
- superblock: *SuperBlock,
677
- callback: fn (context: *Context) void,
678
- context: *Context,
679
- ) void {
680
- assert(!superblock.opened);
681
-
682
- context.* = .{
683
- .superblock = superblock,
684
- .callback = callback,
685
- .caller = .open,
686
- };
687
-
688
- superblock.acquire(context);
689
- }
690
-
691
- const UpdateCheckpoint = struct {
692
- commit_min_checksum: u128,
693
- commit_min: u64,
694
- commit_max: u64,
695
- };
696
-
697
- /// The vsr_state must update the commit_min and commit_min_checksum.
698
- /// The vsr_state must not update view/log_view.
699
- pub fn checkpoint(
700
- superblock: *SuperBlock,
701
- callback: fn (context: *Context) void,
702
- context: *Context,
703
- update: UpdateCheckpoint,
704
- ) void {
705
- assert(superblock.opened);
706
- assert(superblock.staging.vsr_state.commit_min < update.commit_min);
707
- assert(superblock.staging.vsr_state.commit_min_checksum != update.commit_min_checksum);
708
- assert(update.commit_min <= update.commit_max);
709
-
710
- const vsr_state = SuperBlockHeader.VSRState{
711
- .commit_min_checksum = update.commit_min_checksum,
712
- .commit_min = update.commit_min,
713
- .commit_max = update.commit_max,
714
- .log_view = superblock.staging.vsr_state.log_view,
715
- .view = superblock.staging.vsr_state.view,
716
- };
717
- assert(superblock.staging.vsr_state.would_be_updated_by(vsr_state));
718
-
719
- context.* = .{
720
- .superblock = superblock,
721
- .callback = callback,
722
- .caller = .checkpoint,
723
- .vsr_state = vsr_state,
724
- };
725
-
726
- superblock.acquire(context);
727
- }
728
-
729
- const UpdateViewChange = struct {
730
- commit_max: u64,
731
- log_view: u32,
732
- view: u32,
733
- headers: *const vsr.Headers.ViewChangeArray,
734
- };
735
-
736
- /// The replica calls view_change() to persist its view/log_view — it cannot
737
- /// advertise either value until it is certain they will never backtrack.
738
- ///
739
- /// The update must advance view/log_view (monotonically increasing).
740
- pub fn view_change(
741
- superblock: *SuperBlock,
742
- callback: fn (context: *Context) void,
743
- context: *Context,
744
- update: UpdateViewChange,
745
- ) void {
746
- assert(superblock.opened);
747
- assert(superblock.staging.vsr_state.commit_min <= update.headers.array.get(0).op);
748
- assert(superblock.staging.vsr_state.commit_max <= update.commit_max);
749
- assert(superblock.staging.vsr_state.view <= update.view);
750
- assert(superblock.staging.vsr_state.log_view <= update.log_view);
751
- assert(superblock.staging.vsr_state.log_view < update.log_view or
752
- superblock.staging.vsr_state.view < update.view);
753
-
754
- vsr.Headers.ViewChangeSlice.verify(update.headers.array.constSlice());
755
- assert(update.view >= update.log_view);
756
-
757
- const vsr_state = SuperBlockHeader.VSRState{
758
- .commit_min_checksum = superblock.staging.vsr_state.commit_min_checksum,
759
- .commit_min = superblock.staging.vsr_state.commit_min,
760
- .commit_max = update.commit_max,
761
- .log_view = update.log_view,
762
- .view = update.view,
763
- };
764
- assert(vsr_state.internally_consistent());
765
- assert(superblock.staging.vsr_state.would_be_updated_by(vsr_state));
766
- assert(superblock.staging.vsr_state.monotonic(vsr_state));
767
-
768
- log.debug("view_change: commit_max={}..{} log_view={}..{} view={}..{} head={}..{}", .{
769
- superblock.staging.vsr_state.commit_max,
770
- update.commit_max,
771
-
772
- superblock.staging.vsr_state.log_view,
773
- update.log_view,
774
-
775
- superblock.staging.vsr_state.view,
776
- update.view,
777
-
778
- superblock.staging.vsr_headers().slice[0].checksum,
779
- update.headers.array.get(0).checksum,
780
- });
781
-
782
- context.* = .{
783
- .superblock = superblock,
784
- .callback = callback,
785
- .caller = .view_change,
786
- .vsr_state = vsr_state,
787
- .vsr_headers = update.headers.*,
788
- };
789
-
790
- superblock.acquire(context);
791
- }
792
-
793
- pub fn view_change_in_progress(superblock: *const SuperBlock) bool {
794
- assert(superblock.opened);
795
-
796
- if (superblock.queue_head) |head| {
797
- if (head.caller == .view_change) return true;
798
- assert(head.caller == .checkpoint);
799
- }
800
-
801
- if (superblock.queue_tail) |tail| {
802
- assert(tail.caller == .view_change);
803
- return true;
804
- }
805
-
806
- return false;
807
- }
808
-
809
- fn write_staging(superblock: *SuperBlock, context: *Context) void {
810
- assert(context.caller != .open);
811
- assert(context.caller == .format or superblock.opened);
812
- assert(context.copy == null);
813
- assert(context.vsr_state.?.internally_consistent());
814
- assert(superblock.queue_head == context);
815
- assert(superblock.queue_tail == null);
816
- assert(superblock.working.vsr_state.would_be_updated_by(context.vsr_state.?));
817
-
818
- superblock.staging.* = superblock.working.*;
819
- superblock.staging.sequence = superblock.staging.sequence + 1;
820
- superblock.staging.parent = superblock.staging.checksum;
821
- superblock.staging.vsr_state = context.vsr_state.?;
822
-
823
- if (context.vsr_headers) |*headers| {
824
- assert(context.caller == .format or context.caller == .view_change);
825
-
826
- superblock.staging.vsr_headers_count = @intCast(u32, headers.array.len);
827
- stdx.copy_disjoint(
828
- .exact,
829
- vsr.Header,
830
- superblock.staging.vsr_headers_all[0..headers.array.len],
831
- headers.array.constSlice(),
832
- );
833
- std.mem.set(
834
- vsr.Header,
835
- superblock.staging.vsr_headers_all[headers.array.len..],
836
- std.mem.zeroes(vsr.Header),
837
- );
838
- } else {
839
- assert(context.caller == .checkpoint);
840
- }
841
-
842
- if (context.caller != .view_change) {
843
- superblock.write_staging_encode_manifest();
844
- superblock.write_staging_encode_free_set();
845
- superblock.write_staging_encode_client_table();
846
- }
847
- superblock.staging.set_checksum();
848
-
849
- context.copy = 0;
850
- if (context.caller == .view_change) {
851
- superblock.write_header(context);
852
- } else {
853
- superblock.write_manifest(context);
854
- }
855
- }
856
-
857
- fn write_staging_encode_manifest(superblock: *SuperBlock) void {
858
- const staging: *SuperBlockHeader = superblock.staging;
859
- const target = superblock.manifest_buffer;
860
-
861
- staging.manifest_size = @intCast(u32, superblock.manifest.encode(target));
862
- staging.manifest_checksum = vsr.checksum(target[0..staging.manifest_size]);
863
- }
864
-
865
- fn write_staging_encode_free_set(superblock: *SuperBlock) void {
866
- const staging: *SuperBlockHeader = superblock.staging;
867
- const encode_size_max = FreeSet.encode_size_max(superblock.block_count_limit);
868
- const target = superblock.free_set_buffer[0..encode_size_max];
869
-
870
- superblock.free_set.include_staging();
871
- defer superblock.free_set.exclude_staging();
872
-
873
- superblock.verify_manifest_blocks_are_acquired_in_free_set();
874
-
875
- staging.storage_size = data_file_size_min;
876
-
877
- if (superblock.free_set.highest_address_acquired()) |address| {
878
- staging.storage_size += address * constants.block_size;
879
- }
880
- assert(staging.storage_size >= data_file_size_min);
881
- assert(staging.storage_size <= staging.storage_size_max);
882
- assert(staging.storage_size <= superblock.storage_size_limit);
883
-
884
- if (superblock.free_set.count_acquired() == 0) {
885
- // EWAH encodes a zero-length bitset to an empty slice anyway, but handle this
886
- // condition separately so that during formatting it doesn't depend on the choice
887
- // of storage_size_limit.
888
- staging.free_set_size = 0;
889
- } else {
890
- staging.free_set_size = @intCast(u32, superblock.free_set.encode(target));
891
- }
892
- staging.free_set_checksum = vsr.checksum(target[0..staging.free_set_size]);
893
- }
894
-
895
- fn write_staging_encode_client_table(superblock: *SuperBlock) void {
896
- const staging: *SuperBlockHeader = superblock.staging;
897
- const target = superblock.client_table_buffer;
898
-
899
- staging.client_table_size = @intCast(u32, superblock.client_table.encode(target));
900
- staging.client_table_checksum = vsr.checksum(target[0..staging.client_table_size]);
901
- }
902
-
903
- fn write_manifest(superblock: *SuperBlock, context: *Context) void {
904
- assert(superblock.queue_head == context);
905
-
906
- const size = vsr.sector_ceil(superblock.staging.manifest_size);
907
- assert(size <= superblock_trailer_manifest_size_max);
908
-
909
- const buffer = superblock.manifest_buffer[0..size];
910
- const offset = areas.manifest.offset(context.copy.?);
911
-
912
- mem.set(u8, buffer[superblock.staging.manifest_size..], 0); // Zero sector padding.
913
-
914
- assert(superblock.staging.manifest_checksum == vsr.checksum(
915
- superblock.manifest_buffer[0..superblock.staging.manifest_size],
916
- ));
917
-
918
- log.debug("{s}: write_manifest: checksum={x} size={} offset={}", .{
919
- @tagName(context.caller),
920
- superblock.staging.manifest_checksum,
921
- superblock.staging.manifest_size,
922
- offset,
923
- });
924
-
925
- superblock.assert_bounds(offset, buffer.len);
926
-
927
- if (buffer.len == 0) {
928
- write_manifest_callback(&context.write);
929
- return;
930
- }
931
-
932
- superblock.storage.write_sectors(
933
- write_manifest_callback,
934
- &context.write,
935
- buffer,
936
- .superblock,
937
- offset,
938
- );
939
- }
940
-
941
- fn write_manifest_callback(write: *Storage.Write) void {
942
- const context = @fieldParentPtr(Context, "write", write);
943
- context.superblock.write_free_set(context);
944
- }
945
-
946
- fn write_free_set(superblock: *SuperBlock, context: *Context) void {
947
- assert(superblock.queue_head == context);
948
-
949
- const size = vsr.sector_ceil(superblock.staging.free_set_size);
950
- assert(size <= superblock_trailer_free_set_size_max);
951
-
952
- const buffer = superblock.free_set_buffer[0..size];
953
- const offset = areas.free_set.offset(context.copy.?);
954
-
955
- mem.set(u8, buffer[superblock.staging.free_set_size..], 0); // Zero sector padding.
956
-
957
- assert(superblock.staging.free_set_checksum == vsr.checksum(
958
- superblock.free_set_buffer[0..superblock.staging.free_set_size],
959
- ));
960
-
961
- log.debug("{s}: write_free_set: checksum={x} size={} offset={}", .{
962
- @tagName(context.caller),
963
- superblock.staging.free_set_checksum,
964
- superblock.staging.free_set_size,
965
- offset,
966
- });
967
-
968
- superblock.assert_bounds(offset, buffer.len);
969
-
970
- if (buffer.len == 0) {
971
- write_free_set_callback(&context.write);
972
- return;
973
- }
974
-
975
- superblock.storage.write_sectors(
976
- write_free_set_callback,
977
- &context.write,
978
- buffer,
979
- .superblock,
980
- offset,
981
- );
982
- }
983
-
984
- fn write_free_set_callback(write: *Storage.Write) void {
985
- const context = @fieldParentPtr(Context, "write", write);
986
- context.superblock.write_client_table(context);
987
- }
988
-
989
- fn write_client_table(superblock: *SuperBlock, context: *Context) void {
990
- assert(superblock.queue_head == context);
991
-
992
- const size = vsr.sector_ceil(superblock.staging.client_table_size);
993
- assert(size <= superblock_trailer_client_table_size_max);
994
-
995
- const buffer = superblock.client_table_buffer[0..size];
996
- const offset = areas.client_table.offset(context.copy.?);
997
-
998
- mem.set(u8, buffer[superblock.staging.client_table_size..], 0); // Zero sector padding.
999
-
1000
- assert(superblock.staging.client_table_checksum == vsr.checksum(
1001
- superblock.client_table_buffer[0..superblock.staging.client_table_size],
1002
- ));
1003
-
1004
- log.debug("{s}: write_client_table: checksum={x} size={} offset={}", .{
1005
- @tagName(context.caller),
1006
- superblock.staging.client_table_checksum,
1007
- superblock.staging.client_table_size,
1008
- offset,
1009
- });
1010
-
1011
- superblock.assert_bounds(offset, buffer.len);
1012
-
1013
- if (buffer.len == 0) {
1014
- write_client_table_callback(&context.write);
1015
- return;
1016
- }
1017
-
1018
- superblock.storage.write_sectors(
1019
- write_client_table_callback,
1020
- &context.write,
1021
- buffer,
1022
- .superblock,
1023
- offset,
1024
- );
1025
- }
1026
-
1027
- fn write_client_table_callback(write: *Storage.Write) void {
1028
- const context = @fieldParentPtr(Context, "write", write);
1029
- context.superblock.write_header(context);
1030
- }
1031
-
1032
- fn write_header(superblock: *SuperBlock, context: *Context) void {
1033
- assert(superblock.queue_head == context);
1034
-
1035
- // We update the working superblock for a checkpoint/format/view_change:
1036
- // open() does not update the working superblock, since it only writes to repair.
1037
- if (context.caller == .open) {
1038
- assert(superblock.staging.sequence == superblock.working.sequence);
1039
- } else {
1040
- assert(superblock.staging.sequence == superblock.working.sequence + 1);
1041
- assert(superblock.staging.parent == superblock.working.checksum);
1042
- }
1043
-
1044
- // The superblock cluster and replica should never change once formatted:
1045
- assert(superblock.staging.cluster == superblock.working.cluster);
1046
- assert(superblock.staging.replica == superblock.working.replica);
1047
-
1048
- assert(superblock.staging.storage_size >= data_file_size_min);
1049
- assert(superblock.staging.storage_size <= superblock.staging.storage_size_max);
1050
-
1051
- assert(context.copy.? < constants.superblock_copies);
1052
- superblock.staging.copy = context.copy.?;
1053
-
1054
- // Updating the copy number should not affect the checksum, which was previously set:
1055
- assert(superblock.staging.valid_checksum());
1056
-
1057
- const buffer = mem.asBytes(superblock.staging);
1058
- const offset = areas.header.offset(context.copy.?);
1059
-
1060
- log.debug("{}: {s}: write_header: checksum={x} sequence={} copy={} size={} offset={}", .{
1061
- superblock.staging.replica,
1062
- @tagName(context.caller),
1063
- superblock.staging.checksum,
1064
- superblock.staging.sequence,
1065
- context.copy.?,
1066
- buffer.len,
1067
- offset,
1068
- });
1069
-
1070
- superblock.assert_bounds(offset, buffer.len);
1071
-
1072
- superblock.storage.write_sectors(
1073
- write_header_callback,
1074
- &context.write,
1075
- buffer,
1076
- .superblock,
1077
- offset,
1078
- );
1079
- }
1080
-
1081
- fn write_header_callback(write: *Storage.Write) void {
1082
- const context = @fieldParentPtr(Context, "write", write);
1083
- const superblock = context.superblock;
1084
- const copy = context.copy.?;
1085
-
1086
- assert(superblock.queue_head == context);
1087
-
1088
- assert(copy < constants.superblock_copies);
1089
- assert(copy == superblock.staging.copy);
1090
-
1091
- if (context.caller == .open) {
1092
- context.copy = null;
1093
- superblock.repair(context);
1094
- return;
1095
- }
1096
-
1097
- if (copy + 1 == constants.superblock_copies) {
1098
- context.copy = null;
1099
- superblock.read_working(context, .verify);
1100
- } else {
1101
- context.copy = copy + 1;
1102
-
1103
- switch (context.caller) {
1104
- .open => unreachable,
1105
- .format, .checkpoint => superblock.write_manifest(context),
1106
- .view_change => superblock.write_header(context),
1107
- }
1108
- }
1109
- }
1110
-
1111
- fn read_working(
1112
- superblock: *SuperBlock,
1113
- context: *Context,
1114
- threshold: Quorums.Threshold,
1115
- ) void {
1116
- assert(superblock.queue_head == context);
1117
- assert(context.copy == null);
1118
- assert(context.read_threshold == null);
1119
-
1120
- // We do not submit reads in parallel, as while this would shave off 1ms, it would also
1121
- // increase the risk that a single fault applies to more reads due to temporal locality.
1122
- // This would make verification reads more flaky when we do experience a read fault.
1123
- // See "An Analysis of Data Corruption in the Storage Stack".
1124
-
1125
- context.copy = 0;
1126
- context.read_threshold = threshold;
1127
- for (superblock.reading) |*copy| copy.* = undefined;
1128
- superblock.read_header(context);
1129
- }
1130
-
1131
- fn read_header(superblock: *SuperBlock, context: *Context) void {
1132
- assert(superblock.queue_head == context);
1133
- assert(context.copy.? < constants.superblock_copies);
1134
- assert(context.read_threshold != null);
1135
-
1136
- const buffer = mem.asBytes(&superblock.reading[context.copy.?]);
1137
- const offset = areas.header.offset(context.copy.?);
1138
-
1139
- log.debug("{s}: read_header: copy={} size={} offset={}", .{
1140
- @tagName(context.caller),
1141
- context.copy.?,
1142
- buffer.len,
1143
- offset,
1144
- });
1145
-
1146
- superblock.assert_bounds(offset, buffer.len);
1147
-
1148
- superblock.storage.read_sectors(
1149
- read_header_callback,
1150
- &context.read,
1151
- buffer,
1152
- .superblock,
1153
- offset,
1154
- );
1155
- }
1156
-
1157
- fn read_header_callback(read: *Storage.Read) void {
1158
- const context = @fieldParentPtr(Context, "read", read);
1159
- const superblock = context.superblock;
1160
- const threshold = context.read_threshold.?;
1161
-
1162
- assert(superblock.queue_head == context);
1163
-
1164
- assert(context.copy.? < constants.superblock_copies);
1165
- if (context.copy.? + 1 != constants.superblock_copies) {
1166
- context.copy = context.copy.? + 1;
1167
- superblock.read_header(context);
1168
- return;
1169
- }
1170
-
1171
- context.read_threshold = null;
1172
- context.copy = null;
1173
-
1174
- if (superblock.quorums.working(superblock.reading, threshold)) |quorum| {
1175
- assert(quorum.valid);
1176
- assert(quorum.copies.count() >= threshold.count());
1177
- assert(quorum.header.storage_size_max == constants.storage_size_max);
1178
-
1179
- const working = quorum.header;
1180
- if (threshold == .verify) {
1181
- if (working.checksum != superblock.staging.checksum) {
1182
- @panic("superblock failed verification after writing");
1183
- }
1184
- assert(working.equal(superblock.staging));
1185
- }
1186
-
1187
- if (context.caller == .format) {
1188
- assert(working.sequence == 1);
1189
- assert(working.storage_size == data_file_size_min);
1190
- assert(working.manifest_size == 0);
1191
- assert(working.free_set_size == 0);
1192
- assert(working.client_table_size == 4);
1193
- assert(working.vsr_state.commit_min_checksum ==
1194
- vsr.Header.root_prepare(working.cluster).checksum);
1195
- assert(working.vsr_state.commit_min == 0);
1196
- assert(working.vsr_state.commit_max == 0);
1197
- assert(working.vsr_state.log_view == 0);
1198
- assert(working.vsr_state.view == 0);
1199
- assert(working.vsr_headers_count == 1);
1200
- } else if (context.caller == .checkpoint) {
1201
- superblock.free_set.checkpoint();
1202
- }
1203
-
1204
- superblock.working.* = working.*;
1205
- superblock.staging.* = working.*;
1206
- log.debug(
1207
- "{s}: installed working superblock: checksum={x} sequence={} cluster={} " ++
1208
- "replica={} size={} " ++
1209
- "commit_min_checksum={} commit_min={} commit_max={} " ++
1210
- "log_view={} view={}",
1211
- .{
1212
- @tagName(context.caller),
1213
- superblock.working.checksum,
1214
- superblock.working.sequence,
1215
- superblock.working.cluster,
1216
- superblock.working.replica,
1217
- superblock.working.storage_size,
1218
- superblock.working.vsr_state.commit_min_checksum,
1219
- superblock.working.vsr_state.commit_min,
1220
- superblock.working.vsr_state.commit_max,
1221
- superblock.working.vsr_state.log_view,
1222
- superblock.working.vsr_state.view,
1223
- },
1224
- );
1225
- for (superblock.working.vsr_headers().slice) |*header| {
1226
- log.debug("{s}: vsr_header: op={} checksum={}", .{
1227
- @tagName(context.caller),
1228
- header.op,
1229
- header.checksum,
1230
- });
1231
- }
1232
-
1233
- if (context.caller == .open) {
1234
- if (context.repairs) |_| {
1235
- // We just verified that the repair completed.
1236
- assert(threshold == .verify);
1237
- superblock.release(context);
1238
- } else {
1239
- assert(threshold == .open);
1240
- context.copy = 0;
1241
- context.repairs = quorum.repairs();
1242
- superblock.read_manifest(context);
1243
- }
1244
- } else {
1245
- // TODO Consider calling TRIM() on Grid's free suffix after checkpointing.
1246
- superblock.release(context);
1247
- }
1248
- } else |err| switch (err) {
1249
- error.Fork => @panic("superblock forked"),
1250
- error.NotFound => @panic("superblock not found"),
1251
- error.QuorumLost => @panic("superblock quorum lost"),
1252
- error.ParentNotConnected => @panic("superblock parent not connected"),
1253
- error.ParentSkipped => @panic("superblock parent superseded"),
1254
- error.VSRStateNotMonotonic => @panic("superblock vsr state not monotonic"),
1255
- }
1256
- }
1257
-
1258
- fn read_manifest(superblock: *SuperBlock, context: *Context) void {
1259
- assert(context.caller == .open);
1260
- assert(superblock.queue_head == context);
1261
- assert(context.copy.? < constants.superblock_copies);
1262
-
1263
- const size = vsr.sector_ceil(superblock.working.manifest_size);
1264
- assert(size <= superblock_trailer_manifest_size_max);
1265
-
1266
- const buffer = superblock.manifest_buffer[0..size];
1267
- const offset = areas.manifest.offset(context.copy.?);
1268
-
1269
- log.debug("{s}: read_manifest: copy={} size={} offset={}", .{
1270
- @tagName(context.caller),
1271
- context.copy.?,
1272
- buffer.len,
1273
- offset,
1274
- });
1275
-
1276
- superblock.assert_bounds(offset, buffer.len);
1277
-
1278
- if (buffer.len == 0) {
1279
- read_manifest_callback(&context.read);
1280
- return;
1281
- }
1282
-
1283
- superblock.storage.read_sectors(
1284
- read_manifest_callback,
1285
- &context.read,
1286
- buffer,
1287
- .superblock,
1288
- offset,
1289
- );
1290
- }
1291
-
1292
- fn read_manifest_callback(read: *Storage.Read) void {
1293
- const context = @fieldParentPtr(Context, "read", read);
1294
- const superblock = context.superblock;
1295
- const copy = context.copy.?;
1296
-
1297
- assert(context.caller == .open);
1298
- assert(superblock.queue_head == context);
1299
- assert(!superblock.opened);
1300
- assert(superblock.manifest.count == 0);
1301
-
1302
- const slice = superblock.manifest_buffer[0..superblock.working.manifest_size];
1303
- if (vsr.checksum(slice) == superblock.working.manifest_checksum) {
1304
- superblock.manifest.decode(slice);
1305
-
1306
- log.debug("open: read_manifest: manifest blocks: {}/{}", .{
1307
- superblock.manifest.count,
1308
- superblock.manifest.count_max,
1309
- });
1310
-
1311
- // TODO Repair any impaired copies before we continue.
1312
- // At present, we repair at the next checkpoint.
1313
- // We do not repair padding.
1314
- context.copy = 0;
1315
- superblock.read_free_set(context);
1316
- } else {
1317
- log.debug("open: read_manifest: corrupt copy={}", .{copy});
1318
- if (copy + 1 == constants.superblock_copies) {
1319
- @panic("superblock manifest lost");
1320
- } else {
1321
- context.copy = copy + 1;
1322
- superblock.read_manifest(context);
1323
- }
1324
- }
1325
- }
1326
-
1327
- fn read_free_set(superblock: *SuperBlock, context: *Context) void {
1328
- assert(context.caller == .open);
1329
- assert(superblock.queue_head == context);
1330
- assert(context.copy.? < constants.superblock_copies);
1331
-
1332
- const size = vsr.sector_ceil(superblock.working.free_set_size);
1333
- assert(size <= superblock_trailer_free_set_size_max);
1334
-
1335
- const buffer = superblock.free_set_buffer[0..size];
1336
- const offset = areas.free_set.offset(context.copy.?);
1337
-
1338
- log.debug("{s}: read_free_set: copy={} size={} offset={}", .{
1339
- @tagName(context.caller),
1340
- context.copy.?,
1341
- buffer.len,
1342
- offset,
1343
- });
1344
-
1345
- superblock.assert_bounds(offset, buffer.len);
1346
-
1347
- if (buffer.len == 0) {
1348
- read_free_set_callback(&context.read);
1349
- return;
1350
- }
1351
-
1352
- superblock.storage.read_sectors(
1353
- read_free_set_callback,
1354
- &context.read,
1355
- buffer,
1356
- .superblock,
1357
- offset,
1358
- );
1359
- }
1360
-
1361
- fn read_free_set_callback(read: *Storage.Read) void {
1362
- const context = @fieldParentPtr(Context, "read", read);
1363
- const superblock = context.superblock;
1364
- const copy = context.copy.?;
1365
-
1366
- assert(context.caller == .open);
1367
- assert(superblock.queue_head == context);
1368
- assert(!superblock.opened);
1369
- assert(superblock.free_set.count_acquired() == 0);
1370
-
1371
- const slice = superblock.free_set_buffer[0..superblock.working.free_set_size];
1372
- if (vsr.checksum(slice) == superblock.working.free_set_checksum) {
1373
- superblock.free_set.decode(slice);
1374
-
1375
- log.debug("open: read_free_set: acquired blocks: {}/{}/{}", .{
1376
- superblock.free_set.count_acquired(),
1377
- superblock.block_count_limit,
1378
- block_count_max,
1379
- });
1380
-
1381
- superblock.verify_manifest_blocks_are_acquired_in_free_set();
1382
-
1383
- // TODO Repair any impaired copies before we continue.
1384
- superblock.read_client_table(context);
1385
- } else if (copy + 1 == constants.superblock_copies) {
1386
- @panic("superblock free set lost");
1387
- } else {
1388
- log.debug("open: read_free_set: corrupt copy={}", .{copy});
1389
- context.copy = copy + 1;
1390
- superblock.read_free_set(context);
1391
- }
1392
- }
1393
-
1394
- fn verify_manifest_blocks_are_acquired_in_free_set(superblock: *SuperBlock) void {
1395
- assert(superblock.manifest.count <= superblock.free_set.count_acquired());
1396
- for (superblock.manifest.addresses[0..superblock.manifest.count]) |address| {
1397
- assert(!superblock.free_set.is_free(address));
1398
- }
1399
- }
1400
-
1401
- fn read_client_table(superblock: *SuperBlock, context: *Context) void {
1402
- assert(context.caller == .open);
1403
- assert(superblock.queue_head == context);
1404
- assert(context.copy.? < constants.superblock_copies);
1405
-
1406
- const size = vsr.sector_ceil(superblock.working.client_table_size);
1407
- assert(size <= superblock_trailer_client_table_size_max);
1408
-
1409
- const buffer = superblock.client_table_buffer[0..size];
1410
- const offset = areas.client_table.offset(context.copy.?);
1411
-
1412
- log.debug("{s}: read_client_table: copy={} size={} offset={}", .{
1413
- @tagName(context.caller),
1414
- context.copy.?,
1415
- buffer.len,
1416
- offset,
1417
- });
1418
-
1419
- superblock.assert_bounds(offset, buffer.len);
1420
-
1421
- if (buffer.len == 0) {
1422
- read_client_table_callback(&context.read);
1423
- return;
1424
- }
1425
-
1426
- superblock.storage.read_sectors(
1427
- read_client_table_callback,
1428
- &context.read,
1429
- buffer,
1430
- .superblock,
1431
- offset,
1432
- );
1433
- }
1434
-
1435
- fn read_client_table_callback(read: *Storage.Read) void {
1436
- const context = @fieldParentPtr(Context, "read", read);
1437
- const superblock = context.superblock;
1438
- const copy = context.copy.?;
1439
-
1440
- assert(context.caller == .open);
1441
- assert(superblock.queue_head == context);
1442
- assert(!superblock.opened);
1443
- assert(superblock.client_table.count() == 0);
1444
-
1445
- const slice = superblock.client_table_buffer[0..superblock.working.client_table_size];
1446
- if (vsr.checksum(slice) == superblock.working.client_table_checksum) {
1447
- superblock.client_table.decode(slice);
1448
-
1449
- log.debug("open: read_client_table: client requests: {}/{}", .{
1450
- superblock.client_table.count(),
1451
- constants.clients_max,
1452
- });
1453
-
1454
- context.copy = null;
1455
- superblock.repair(context);
1456
- } else if (copy + 1 == constants.superblock_copies) {
1457
- @panic("superblock client table lost");
1458
- } else {
1459
- log.debug("open: read_client_table: corrupt copy={}", .{copy});
1460
- context.copy = copy + 1;
1461
- superblock.read_client_table(context);
1462
- }
1463
- }
1464
-
1465
- fn repair(superblock: *SuperBlock, context: *Context) void {
1466
- assert(context.caller == .open);
1467
- assert(context.copy == null);
1468
- assert(superblock.queue_head == context);
1469
-
1470
- if (context.repairs.?.next()) |repair_copy| {
1471
- context.copy = repair_copy;
1472
- log.warn("repair: copy={}", .{repair_copy});
1473
-
1474
- superblock.staging.* = superblock.working.*;
1475
- superblock.write_manifest(context);
1476
- } else {
1477
- superblock.release(context);
1478
- }
1479
- }
1480
-
1481
- fn acquire(superblock: *SuperBlock, context: *Context) void {
1482
- if (superblock.queue_head) |head| {
1483
- // There should be nothing else happening when we format() or open():
1484
- assert(context.caller != .format and context.caller != .open);
1485
- assert(head.caller != .format and head.caller != .open);
1486
-
1487
- // There may only be one checkpoint() and one view_change() submitted at a time:
1488
- assert(head.caller != context.caller);
1489
- assert(superblock.queue_tail == null);
1490
-
1491
- log.debug("{s}: enqueued after {s}", .{
1492
- @tagName(context.caller),
1493
- @tagName(head.caller),
1494
- });
1495
-
1496
- superblock.queue_tail = context;
1497
- } else {
1498
- assert(superblock.queue_tail == null);
1499
-
1500
- superblock.queue_head = context;
1501
- log.debug("{s}: started", .{@tagName(context.caller)});
1502
-
1503
- if (context.caller == .open) {
1504
- superblock.read_working(context, .open);
1505
- } else {
1506
- superblock.write_staging(context);
1507
- }
1508
- }
1509
- }
1510
-
1511
- fn release(superblock: *SuperBlock, context: *Context) void {
1512
- assert(superblock.queue_head == context);
1513
-
1514
- log.debug("{s}: complete", .{@tagName(context.caller)});
1515
-
1516
- switch (context.caller) {
1517
- .format => {},
1518
- .open => {
1519
- assert(!superblock.opened);
1520
- superblock.opened = true;
1521
-
1522
- if (superblock.working.manifest_size > 0) {
1523
- assert(superblock.manifest.count > 0);
1524
- }
1525
- // TODO Make the FreeSet encoding format not dependant on the word size.
1526
- if (superblock.working.free_set_size > @sizeOf(usize)) {
1527
- assert(superblock.free_set.count_acquired() > 0);
1528
- }
1529
- },
1530
- .checkpoint, .view_change => {
1531
- assert(meta.eql(superblock.staging.vsr_state, context.vsr_state.?));
1532
- assert(meta.eql(superblock.working.vsr_state, context.vsr_state.?));
1533
- },
1534
- }
1535
-
1536
- const queue_tail = superblock.queue_tail;
1537
- superblock.queue_head = null;
1538
- superblock.queue_tail = null;
1539
- if (queue_tail) |tail| superblock.acquire(tail);
1540
-
1541
- context.callback(context);
1542
- }
1543
-
1544
- fn assert_bounds(superblock: *SuperBlock, offset: u64, size: u64) void {
1545
- assert(offset >= superblock.storage_offset);
1546
- assert(offset + size <= superblock.storage_offset + superblock.storage_size);
1547
- }
1548
-
1549
- /// We use flexible quorums for even quorums with write quorum > read quorum, for example:
1550
- /// * When writing, we must verify that at least 3/4 copies were written.
1551
- /// * At startup, we must verify that at least 2/4 copies were read.
1552
- ///
1553
- /// This ensures that our read and write quorums will intersect.
1554
- /// Using flexible quorums in this way increases resiliency of the superblock.
1555
- fn threshold_for_caller(caller: Context.Caller) u8 {
1556
- // Working these threshold out by formula is easy to get wrong, so enumerate them:
1557
- // The rule is that the write quorum plus the read quorum must be exactly copies + 1.
1558
-
1559
- return switch (caller) {
1560
- .format, .checkpoint, .view_change => switch (constants.superblock_copies) {
1561
- 4 => 3,
1562
- 6 => 4,
1563
- 8 => 5,
1564
- else => unreachable,
1565
- },
1566
- // The open quorum must allow for at least two copy faults, because our view change
1567
- // updates an existing set of copies in place, temporarily impairing one copy.
1568
- .open => switch (constants.superblock_copies) {
1569
- 4 => 2,
1570
- 6 => 3,
1571
- 8 => 4,
1572
- else => unreachable,
1573
- },
1574
- };
1575
- }
1576
- };
1577
- }
1578
-
1579
- pub const Area = enum {
1580
- header,
1581
- manifest,
1582
- free_set,
1583
- client_table,
1584
- };
1585
-
1586
- pub const areas = struct {
1587
- pub const header = AreaRange{
1588
- .base = 0,
1589
- .size_max = @sizeOf(SuperBlockHeader),
1590
- };
1591
-
1592
- pub const manifest = AreaRange{
1593
- .base = header.base + header.size_max,
1594
- .size_max = superblock_trailer_manifest_size_max, // TODO inline these constants?
1595
- };
1596
-
1597
- pub const free_set = AreaRange{
1598
- .base = manifest.base + manifest.size_max,
1599
- .size_max = superblock_trailer_free_set_size_max,
1600
- };
1601
-
1602
- pub const client_table = AreaRange{
1603
- .base = free_set.base + free_set.size_max,
1604
- .size_max = superblock_trailer_client_table_size_max,
1605
- };
1606
-
1607
- const AreaRange = struct {
1608
- base: u64,
1609
- size_max: u64,
1610
-
1611
- pub fn offset(area: AreaRange, copy: u8) u64 {
1612
- return superblock_copy_size * @as(u64, copy) + area.base;
1613
- }
1614
- };
1615
- };
1616
-
1617
- test "SuperBlockHeader" {
1618
- const expect = std.testing.expect;
1619
-
1620
- var a = std.mem.zeroes(SuperBlockHeader);
1621
- a.set_checksum();
1622
-
1623
- assert(a.copy == 0);
1624
- try expect(a.valid_checksum());
1625
-
1626
- a.copy += 1;
1627
- try expect(a.valid_checksum());
1628
-
1629
- a.replica += 1;
1630
- try expect(!a.valid_checksum());
1631
- }