tigerbeetle-node 0.9.0 → 0.9.143

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +580 -179
  2. package/dist/benchmark.js +44 -36
  3. package/dist/benchmark.js.map +1 -1
  4. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  5. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  6. package/dist/bin/aarch64-macos/client.node +0 -0
  7. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  8. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  9. package/dist/bin/x86_64-macos/client.node +0 -0
  10. package/dist/bin/x86_64-windows/client.node +0 -0
  11. package/dist/bindings.d.ts +141 -0
  12. package/dist/bindings.js +112 -0
  13. package/dist/bindings.js.map +1 -0
  14. package/dist/index.d.ts +2 -125
  15. package/dist/index.js +51 -101
  16. package/dist/index.js.map +1 -1
  17. package/dist/test.js +68 -54
  18. package/dist/test.js.map +1 -1
  19. package/package-lock.json +26 -0
  20. package/package.json +13 -22
  21. package/src/benchmark.ts +58 -49
  22. package/src/bindings.ts +631 -0
  23. package/src/index.ts +71 -163
  24. package/src/node.zig +169 -148
  25. package/src/test.ts +71 -57
  26. package/src/translate.zig +19 -36
  27. package/scripts/download_node_headers.sh +0 -25
  28. package/src/tigerbeetle/scripts/benchmark.bat +0 -46
  29. package/src/tigerbeetle/scripts/benchmark.sh +0 -55
  30. package/src/tigerbeetle/scripts/install.sh +0 -6
  31. package/src/tigerbeetle/scripts/install_zig.bat +0 -109
  32. package/src/tigerbeetle/scripts/install_zig.sh +0 -84
  33. package/src/tigerbeetle/scripts/lint.zig +0 -199
  34. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -39
  35. package/src/tigerbeetle/scripts/vopr.bat +0 -48
  36. package/src/tigerbeetle/scripts/vopr.sh +0 -33
  37. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  38. package/src/tigerbeetle/src/benchmark.zig +0 -290
  39. package/src/tigerbeetle/src/cli.zig +0 -244
  40. package/src/tigerbeetle/src/config.zig +0 -239
  41. package/src/tigerbeetle/src/demo.zig +0 -125
  42. package/src/tigerbeetle/src/demo_01_create_accounts.zig +0 -35
  43. package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +0 -7
  44. package/src/tigerbeetle/src/demo_03_create_transfers.zig +0 -24
  45. package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +0 -61
  46. package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +0 -37
  47. package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +0 -24
  48. package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +0 -7
  49. package/src/tigerbeetle/src/fifo.zig +0 -104
  50. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  51. package/src/tigerbeetle/src/io/darwin.zig +0 -793
  52. package/src/tigerbeetle/src/io/linux.zig +0 -1038
  53. package/src/tigerbeetle/src/io/test.zig +0 -643
  54. package/src/tigerbeetle/src/io/windows.zig +0 -1161
  55. package/src/tigerbeetle/src/io.zig +0 -34
  56. package/src/tigerbeetle/src/main.zig +0 -144
  57. package/src/tigerbeetle/src/message_bus.zig +0 -1000
  58. package/src/tigerbeetle/src/message_pool.zig +0 -142
  59. package/src/tigerbeetle/src/ring_buffer.zig +0 -289
  60. package/src/tigerbeetle/src/simulator.zig +0 -417
  61. package/src/tigerbeetle/src/state_machine.zig +0 -2470
  62. package/src/tigerbeetle/src/storage.zig +0 -308
  63. package/src/tigerbeetle/src/test/cluster.zig +0 -351
  64. package/src/tigerbeetle/src/test/message_bus.zig +0 -93
  65. package/src/tigerbeetle/src/test/network.zig +0 -179
  66. package/src/tigerbeetle/src/test/packet_simulator.zig +0 -387
  67. package/src/tigerbeetle/src/test/state_checker.zig +0 -145
  68. package/src/tigerbeetle/src/test/state_machine.zig +0 -76
  69. package/src/tigerbeetle/src/test/storage.zig +0 -438
  70. package/src/tigerbeetle/src/test/time.zig +0 -84
  71. package/src/tigerbeetle/src/tigerbeetle.zig +0 -222
  72. package/src/tigerbeetle/src/time.zig +0 -113
  73. package/src/tigerbeetle/src/unit_tests.zig +0 -14
  74. package/src/tigerbeetle/src/vsr/client.zig +0 -505
  75. package/src/tigerbeetle/src/vsr/clock.zig +0 -812
  76. package/src/tigerbeetle/src/vsr/journal.zig +0 -2293
  77. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  78. package/src/tigerbeetle/src/vsr/replica.zig +0 -5015
  79. package/src/tigerbeetle/src/vsr.zig +0 -1017
@@ -1,145 +0,0 @@
1
- const std = @import("std");
2
- const assert = std.debug.assert;
3
- const mem = std.mem;
4
-
5
- const config = @import("../config.zig");
6
-
7
- const Cluster = @import("cluster.zig").Cluster;
8
- const Network = @import("network.zig").Network;
9
- const StateMachine = @import("state_machine.zig").StateMachine;
10
-
11
- const message_pool = @import("../message_pool.zig");
12
- const MessagePool = message_pool.MessagePool;
13
- const Message = MessagePool.Message;
14
-
15
- const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
16
-
17
- const RequestQueue = RingBuffer(u128, config.client_request_queue_max);
18
- const StateTransitions = std.AutoHashMap(u128, u64);
19
-
20
- const log = std.log.scoped(.state_checker);
21
-
22
- pub const StateChecker = struct {
23
- /// Indexed by client index as used by Cluster.
24
- client_requests: [config.clients_max]RequestQueue =
25
- [_]RequestQueue{.{}} ** config.clients_max,
26
-
27
- /// Indexed by replica index.
28
- state_machine_states: [config.replicas_max]u128,
29
-
30
- history: StateTransitions,
31
-
32
- /// The highest cannonical state reached by the cluster.
33
- state: u128,
34
-
35
- /// The number of times the cannonical state has been advanced.
36
- transitions: u64 = 0,
37
-
38
- pub fn init(allocator: mem.Allocator, cluster: *Cluster) !StateChecker {
39
- const state = cluster.state_machines[0].state;
40
-
41
- var state_machine_states: [config.replicas_max]u128 = undefined;
42
- for (cluster.state_machines) |state_machine, i| {
43
- assert(state_machine.state == state);
44
- state_machine_states[i] = state_machine.state;
45
- }
46
-
47
- var history = StateTransitions.init(allocator);
48
- errdefer history.deinit();
49
-
50
- var state_checker = StateChecker{
51
- .state_machine_states = state_machine_states,
52
- .history = history,
53
- .state = state,
54
- };
55
-
56
- try state_checker.history.putNoClobber(state, state_checker.transitions);
57
-
58
- return state_checker;
59
- }
60
-
61
- pub fn deinit(state_checker: *StateChecker) void {
62
- state_checker.history.deinit();
63
- }
64
-
65
- pub fn check_state(state_checker: *StateChecker, replica: u8) void {
66
- const cluster = @fieldParentPtr(Cluster, "state_checker", state_checker);
67
-
68
- const a = state_checker.state_machine_states[replica];
69
- const b = cluster.state_machines[replica].state;
70
-
71
- if (b == a) return;
72
- state_checker.state_machine_states[replica] = b;
73
-
74
- // If some other replica has already reached this state, then it will be in the history:
75
- if (state_checker.history.get(b)) |transition| {
76
- // A replica may transition more than once to the same state, for example, when
77
- // restarting after a crash and replaying the log. The more important invariant is that
78
- // the cluster as a whole may not transition to the same state more than once, and once
79
- // transitioned may not regress.
80
- log.info(
81
- "{d:0>4}/{d:0>4} {x:0>32} > {x:0>32} {}",
82
- .{ transition, state_checker.transitions, a, b, replica },
83
- );
84
- return;
85
- }
86
-
87
- // The replica has transitioned to state `b` that is not yet in the history.
88
- // Check if this is a valid new state based on all currently inflight client requests.
89
- for (state_checker.client_requests) |*queue| {
90
- if (queue.head_ptr()) |input| {
91
- if (b == StateMachine.hash(state_checker.state, std.mem.asBytes(input))) {
92
- const transitions_executed = state_checker.history.get(a).?;
93
- if (transitions_executed < state_checker.transitions) {
94
- @panic("replica skipped interim transitions");
95
- } else {
96
- assert(transitions_executed == state_checker.transitions);
97
- }
98
-
99
- state_checker.state = b;
100
- state_checker.transitions += 1;
101
-
102
- log.info(" {d:0>4} {x:0>32} > {x:0>32} {}", .{
103
- state_checker.transitions,
104
- a,
105
- b,
106
- replica,
107
- });
108
-
109
- state_checker.history.putNoClobber(b, state_checker.transitions) catch {
110
- @panic("state checker unable to allocate memory for history.put()");
111
- };
112
-
113
- // As soon as we reach a valid state we must pop the inflight request.
114
- // We cannot wait until the client receives the reply because that would allow
115
- // the inflight request to be used to reach other states in the interim.
116
- // We must therefore use our own queue rather than the clients' queues.
117
- _ = queue.pop();
118
- return;
119
- }
120
- }
121
- }
122
-
123
- @panic("replica transitioned to an invalid state");
124
- }
125
-
126
- pub fn convergence(state_checker: *StateChecker) bool {
127
- const cluster = @fieldParentPtr(Cluster, "state_checker", state_checker);
128
-
129
- const a = state_checker.state_machine_states[0];
130
- for (state_checker.state_machine_states[1..cluster.options.replica_count]) |b| {
131
- if (b != a) return false;
132
- }
133
-
134
- const transitions_executed = state_checker.history.get(a).?;
135
- if (transitions_executed < state_checker.transitions) {
136
- // Cluster reached convergence but on a regressed state.
137
- // A replica reached the transition limit, crashed, then repaired.
138
- return false;
139
- } else {
140
- assert(transitions_executed == state_checker.transitions);
141
- }
142
-
143
- return true;
144
- }
145
- };
@@ -1,76 +0,0 @@
1
- const std = @import("std");
2
- const assert = std.debug.assert;
3
-
4
- const log = std.log.scoped(.state_machine);
5
-
6
- pub const StateMachine = struct {
7
- pub const Operation = enum(u8) {
8
- /// Operations reserved by VR protocol (for all state machines):
9
- reserved,
10
- root,
11
- register,
12
-
13
- hash,
14
- };
15
-
16
- state: u128,
17
- prepare_timestamp: u64 = 0,
18
- commit_timestamp: u64 = 0,
19
-
20
- pub fn init(seed: u64) StateMachine {
21
- return .{ .state = hash(0, std.mem.asBytes(&seed)) };
22
- }
23
-
24
- pub fn prepare(
25
- state_machine: *StateMachine,
26
- operation: Operation,
27
- input: []u8,
28
- ) u64 {
29
- _ = operation;
30
- _ = input;
31
-
32
- return state_machine.prepare_timestamp;
33
- }
34
-
35
- pub fn commit(
36
- state_machine: *StateMachine,
37
- client: u128,
38
- operation: Operation,
39
- input: []const u8,
40
- output: []u8,
41
- ) usize {
42
- switch (operation) {
43
- .reserved, .root => unreachable,
44
- .register => return 0,
45
-
46
- // TODO: instead of always using the first 32 bytes of the output
47
- // buffer, get tricky and use a random but deterministic slice
48
- // of it, filling the rest with 0s.
49
- .hash => {
50
- // Fold the input into our current state, creating a hash chain.
51
- // Hash the input with the client ID since small inputs may collide across clients.
52
- const client_input = hash(client, input);
53
- const new_state = hash(state_machine.state, std.mem.asBytes(&client_input));
54
-
55
- log.debug("state={x} input={x} input.len={} new state={x}", .{
56
- state_machine.state,
57
- client_input,
58
- input.len,
59
- new_state,
60
- });
61
-
62
- state_machine.state = new_state;
63
- std.mem.copy(u8, output, std.mem.asBytes(&state_machine.state));
64
- return @sizeOf(@TypeOf(state_machine.state));
65
- },
66
- }
67
- }
68
-
69
- pub fn hash(state: u128, input: []const u8) u128 {
70
- var key: [32]u8 = [_]u8{0} ** 32;
71
- std.mem.copy(u8, key[0..16], std.mem.asBytes(&state));
72
- var target: [32]u8 = undefined;
73
- std.crypto.hash.Blake3.hash(input, &target, .{ .key = key });
74
- return @bitCast(u128, target[0..16].*);
75
- }
76
- };
@@ -1,438 +0,0 @@
1
- const std = @import("std");
2
- const assert = std.debug.assert;
3
- const math = std.math;
4
- const mem = std.mem;
5
-
6
- const config = @import("../config.zig");
7
- const vsr = @import("../vsr.zig");
8
-
9
- const log = std.log.scoped(.storage);
10
-
11
- // TODOs:
12
- // less than a majority of replicas may have corruption
13
- // have an option to enable/disable the following corruption types:
14
- // bitrot
15
- // misdirected read/write
16
- // corrupt sector
17
- // latent sector error
18
- // - emulate by zeroing sector, as this is how we handle this in the real Storage implementation
19
- // - likely that surrounding sectors also corrupt
20
- // - likely that stuff written at the same time is also corrupt even if written to a far away sector
21
- pub const Storage = struct {
22
- /// Options for fault injection during fuzz testing
23
- pub const Options = struct {
24
- /// Seed for the storage PRNG
25
- seed: u64,
26
-
27
- /// Minimum number of ticks it may take to read data.
28
- read_latency_min: u64,
29
- /// Average number of ticks it may take to read data. Must be >= read_latency_min.
30
- read_latency_mean: u64,
31
- /// Minimum number of ticks it may take to write data.
32
- write_latency_min: u64,
33
- /// Average number of ticks it may take to write data. Must be >= write_latency_min.
34
- write_latency_mean: u64,
35
-
36
- /// Chance out of 100 that a read will return incorrect data, if the target memory is within
37
- /// the faulty area of this replica.
38
- read_fault_probability: u8,
39
- /// Chance out of 100 that a read will return incorrect data, if the target memory is within
40
- /// the faulty area of this replica.
41
- write_fault_probability: u8,
42
- };
43
-
44
- /// See usage in Journal.write_sectors() for details.
45
- /// TODO: allow testing in both modes.
46
- pub const synchronicity: enum {
47
- always_synchronous,
48
- always_asynchronous,
49
- } = .always_asynchronous;
50
-
51
- pub const Read = struct {
52
- callback: fn (read: *Storage.Read) void,
53
- buffer: []u8,
54
- offset: u64,
55
- /// Tick at which this read is considered "completed" and the callback should be called.
56
- done_at_tick: u64,
57
-
58
- fn less_than(context: void, a: *Read, b: *Read) math.Order {
59
- _ = context;
60
-
61
- return math.order(a.done_at_tick, b.done_at_tick);
62
- }
63
- };
64
-
65
- pub const Write = struct {
66
- callback: fn (write: *Storage.Write) void,
67
- buffer: []const u8,
68
- offset: u64,
69
- /// Tick at which this write is considered "completed" and the callback should be called.
70
- done_at_tick: u64,
71
-
72
- fn less_than(context: void, a: *Write, b: *Write) math.Order {
73
- _ = context;
74
-
75
- return math.order(a.done_at_tick, b.done_at_tick);
76
- }
77
- };
78
-
79
- /// Faulty areas are always sized to message_size_max
80
- /// If the faulty areas of all replicas are superimposed, the padding between them is always message_size_max.
81
- /// For a single replica, the padding between faulty areas depends on the number of other replicas.
82
- pub const FaultyAreas = struct {
83
- first_offset: u64,
84
- period: u64,
85
- };
86
-
87
- memory: []align(config.sector_size) u8,
88
- size: u64,
89
- /// Set bits correspond to faulty sectors. The underlying sectors of `memory` is left clean.
90
- faults: std.DynamicBitSetUnmanaged,
91
-
92
- options: Options,
93
- replica_index: u8,
94
- prng: std.rand.DefaultPrng,
95
-
96
- // We can't allow storage faults for the same message in a majority of
97
- // the replicas as that would make recovery impossible. Instead, we only
98
- // allow faults in certain areas which differ between replicas.
99
- faulty_areas: FaultyAreas,
100
- /// Whether to enable faults (when false, this supersedes `faulty_areas`).
101
- /// This is used to disable faults during the replica's first startup.
102
- faulty: bool = true,
103
-
104
- reads: std.PriorityQueue(*Storage.Read, void, Storage.Read.less_than),
105
- writes: std.PriorityQueue(*Storage.Write, void, Storage.Write.less_than),
106
-
107
- ticks: u64 = 0,
108
-
109
- pub fn init(
110
- allocator: mem.Allocator,
111
- size: u64,
112
- options: Storage.Options,
113
- replica_index: u8,
114
- faulty_areas: FaultyAreas,
115
- ) !Storage {
116
- assert(options.write_latency_mean >= options.write_latency_min);
117
- assert(options.read_latency_mean >= options.read_latency_min);
118
-
119
- const memory = try allocator.allocAdvanced(u8, config.sector_size, size, .exact);
120
- errdefer allocator.free(memory);
121
- // TODO: random data
122
- mem.set(u8, memory, 0);
123
-
124
- var faults = try std.DynamicBitSetUnmanaged.initEmpty(
125
- allocator,
126
- @divExact(size, config.sector_size),
127
- );
128
- errdefer faults.deinit(allocator);
129
-
130
- var reads = std.PriorityQueue(*Storage.Read, void, Storage.Read.less_than).init(allocator, {});
131
- errdefer reads.deinit();
132
- try reads.ensureTotalCapacity(config.io_depth_read);
133
-
134
- var writes = std.PriorityQueue(*Storage.Write, void, Storage.Write.less_than).init(allocator, {});
135
- errdefer writes.deinit();
136
- try writes.ensureTotalCapacity(config.io_depth_write);
137
-
138
- return Storage{
139
- .memory = memory,
140
- .size = size,
141
- .faults = faults,
142
- .options = options,
143
- .replica_index = replica_index,
144
- .prng = std.rand.DefaultPrng.init(options.seed),
145
- .faulty_areas = faulty_areas,
146
- .reads = reads,
147
- .writes = writes,
148
- };
149
- }
150
-
151
- /// Cancel any currently in progress reads/writes but leave the stored data untouched.
152
- pub fn reset(storage: *Storage) void {
153
- while (storage.writes.peek()) |write| {
154
- _ = storage.writes.remove();
155
- storage.fault_sectors(write.offset, write.buffer.len);
156
- }
157
-
158
- storage.reads.len = 0;
159
- assert(storage.writes.len == 0);
160
- }
161
-
162
- pub fn deinit(storage: *Storage, allocator: mem.Allocator) void {
163
- allocator.free(storage.memory);
164
- storage.faults.deinit(allocator);
165
- storage.reads.deinit();
166
- storage.writes.deinit();
167
- }
168
-
169
- pub fn tick(storage: *Storage) void {
170
- storage.ticks += 1;
171
-
172
- while (storage.reads.peek()) |read| {
173
- if (read.done_at_tick > storage.ticks) break;
174
- _ = storage.reads.remove();
175
- storage.read_sectors_finish(read);
176
- }
177
-
178
- while (storage.writes.peek()) |write| {
179
- if (write.done_at_tick > storage.ticks) break;
180
- _ = storage.writes.remove();
181
- storage.write_sectors_finish(write);
182
- }
183
- }
184
-
185
- pub fn read_sectors(
186
- storage: *Storage,
187
- callback: fn (read: *Storage.Read) void,
188
- read: *Storage.Read,
189
- buffer: []u8,
190
- offset: u64,
191
- ) void {
192
- storage.assert_bounds_and_alignment(buffer, offset);
193
-
194
- read.* = .{
195
- .callback = callback,
196
- .buffer = buffer,
197
- .offset = offset,
198
- .done_at_tick = storage.ticks + storage.read_latency(),
199
- };
200
-
201
- // We ensure the capacity is sufficient for config.io_depth_read in init()
202
- storage.reads.add(read) catch unreachable;
203
- }
204
-
205
- fn read_sectors_finish(storage: *Storage, read: *Storage.Read) void {
206
- mem.copy(u8, read.buffer, storage.memory[read.offset..][0..read.buffer.len]);
207
-
208
- if (storage.x_in_100(storage.options.read_fault_probability)) {
209
- storage.fault_sectors(read.offset, read.buffer.len);
210
- }
211
-
212
- if (storage.faulty) {
213
- // Corrupt faulty sectors.
214
- const sector_min = @divExact(read.offset, config.sector_size);
215
- var sector: usize = 0;
216
- while (sector < @divExact(read.buffer.len, config.sector_size)) : (sector += 1) {
217
- if (storage.faults.isSet(sector_min + sector)) {
218
- const faulty_sector_offset = sector * config.sector_size;
219
- const faulty_sector_bytes = read.buffer[faulty_sector_offset..][0..config.sector_size];
220
- storage.prng.random().bytes(faulty_sector_bytes);
221
- }
222
- }
223
- }
224
-
225
- read.callback(read);
226
- }
227
-
228
- pub fn write_sectors(
229
- storage: *Storage,
230
- callback: fn (write: *Storage.Write) void,
231
- write: *Storage.Write,
232
- buffer: []const u8,
233
- offset: u64,
234
- ) void {
235
- storage.assert_bounds_and_alignment(buffer, offset);
236
-
237
- // Verify that there are no concurrent overlapping writes.
238
- var iterator = storage.writes.iterator();
239
- while (iterator.next()) |other| {
240
- assert(offset + buffer.len <= other.offset or
241
- other.offset + other.buffer.len <= offset);
242
- }
243
-
244
- write.* = .{
245
- .callback = callback,
246
- .buffer = buffer,
247
- .offset = offset,
248
- .done_at_tick = storage.ticks + storage.write_latency(),
249
- };
250
-
251
- // We ensure the capacity is sufficient for config.io_depth_write in init()
252
- storage.writes.add(write) catch unreachable;
253
- }
254
-
255
- fn write_sectors_finish(storage: *Storage, write: *Storage.Write) void {
256
- mem.copy(u8, storage.memory[write.offset..][0..write.buffer.len], write.buffer);
257
-
258
- {
259
- const sector_min = @divExact(write.offset, config.sector_size);
260
- const sector_max = @divExact(write.offset + write.buffer.len, config.sector_size);
261
- var sector: usize = sector_min;
262
- while (sector < sector_max) : (sector += 1) storage.faults.unset(sector);
263
- }
264
-
265
- if (storage.x_in_100(storage.options.write_fault_probability)) {
266
- storage.fault_sectors(write.offset, write.buffer.len);
267
- }
268
- write.callback(write);
269
- }
270
-
271
- fn assert_bounds_and_alignment(storage: *const Storage, buffer: []const u8, offset: u64) void {
272
- assert(buffer.len > 0);
273
- assert(offset + buffer.len <= storage.size);
274
-
275
- // Ensure that the read or write is aligned correctly for Direct I/O:
276
- // If this is not the case, the underlying syscall will return EINVAL.
277
- assert(@mod(@ptrToInt(buffer.ptr), config.sector_size) == 0);
278
- assert(@mod(buffer.len, config.sector_size) == 0);
279
- assert(@mod(offset, config.sector_size) == 0);
280
- }
281
-
282
- fn read_latency(storage: *Storage) u64 {
283
- return storage.latency(storage.options.read_latency_min, storage.options.read_latency_mean);
284
- }
285
-
286
- fn write_latency(storage: *Storage) u64 {
287
- return storage.latency(storage.options.write_latency_min, storage.options.write_latency_mean);
288
- }
289
-
290
- fn latency(storage: *Storage, min: u64, mean: u64) u64 {
291
- return min + @floatToInt(u64, @intToFloat(f64, mean - min) * storage.prng.random().floatExp(f64));
292
- }
293
-
294
- /// Return true with probability x/100.
295
- fn x_in_100(storage: *Storage, x: u8) bool {
296
- assert(x <= 100);
297
- return x > storage.prng.random().uintLessThan(u8, 100);
298
- }
299
-
300
- fn random_uint_between(storage: *Storage, comptime T: type, min: T, max: T) T {
301
- return min + storage.prng.random().uintLessThan(T, max - min);
302
- }
303
-
304
- /// The return value is a slice into the provided out array.
305
- pub fn generate_faulty_areas(
306
- prng: std.rand.Random,
307
- size: u64,
308
- replica_count: u8,
309
- out: *[config.replicas_max]FaultyAreas,
310
- ) []FaultyAreas {
311
- comptime assert(config.message_size_max % config.sector_size == 0);
312
- const message_size_max = config.message_size_max;
313
-
314
- // We need to ensure there is message_size_max fault-free padding
315
- // between faulty areas of memory so that a single message
316
- // cannot straddle the corruptable areas of a majority of replicas.
317
- comptime assert(config.replicas_max == 6);
318
- switch (replica_count) {
319
- 1 => {
320
- // If there is only one replica in the cluster, storage faults are not recoverable.
321
- out[0] = .{ .first_offset = size, .period = 1 };
322
- },
323
- 2 => {
324
- // 0123456789
325
- // 0X X X
326
- // 1 X X X
327
- out[0] = .{ .first_offset = 0 * message_size_max, .period = 4 * message_size_max };
328
- out[1] = .{ .first_offset = 2 * message_size_max, .period = 4 * message_size_max };
329
- },
330
- 3 => {
331
- // 0123456789
332
- // 0X X
333
- // 1 X X
334
- // 2 X X
335
- out[0] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
336
- out[1] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
337
- out[2] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
338
- },
339
- 4 => {
340
- // 0123456789
341
- // 0X X X
342
- // 1X X X
343
- // 2 X X X
344
- // 3 X X X
345
- out[0] = .{ .first_offset = 0 * message_size_max, .period = 4 * message_size_max };
346
- out[1] = .{ .first_offset = 0 * message_size_max, .period = 4 * message_size_max };
347
- out[2] = .{ .first_offset = 2 * message_size_max, .period = 4 * message_size_max };
348
- out[3] = .{ .first_offset = 2 * message_size_max, .period = 4 * message_size_max };
349
- },
350
- 5 => {
351
- // 0123456789
352
- // 0X X
353
- // 1X X
354
- // 2 X X
355
- // 3 X X
356
- // 4 X X
357
- out[0] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
358
- out[1] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
359
- out[2] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
360
- out[3] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
361
- out[4] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
362
- },
363
- 6 => {
364
- // 0123456789
365
- // 0X X
366
- // 1X X
367
- // 2 X X
368
- // 3 X X
369
- // 4 X X
370
- // 5 X X
371
- out[0] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
372
- out[1] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
373
- out[2] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
374
- out[3] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
375
- out[4] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
376
- out[5] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
377
- },
378
- else => unreachable,
379
- }
380
-
381
- {
382
- // Allow at most `f` faulty replicas to ensure the view change can succeed.
383
- // TODO Allow more than `f` faulty replicas when the fault is to the right of the
384
- // highest known replica.op (and to the left of the last checkpointed op).
385
- const majority = @divFloor(replica_count, 2) + 1;
386
- const quorum_replication = std.math.min(config.quorum_replication_max, majority);
387
- const quorum_view_change = std.math.max(
388
- replica_count - quorum_replication + 1,
389
- majority,
390
- );
391
- var i: usize = quorum_view_change;
392
- while (i < replica_count) : (i += 1) {
393
- out[i].first_offset = size;
394
- }
395
- }
396
-
397
- prng.shuffle(FaultyAreas, out[0..replica_count]);
398
- return out[0..replica_count];
399
- }
400
-
401
- const SectorRange = struct {
402
- min: usize, // inclusive sector index
403
- max: usize, // exclusive sector index
404
- };
405
-
406
- /// Given an offset and size of a read/write, returns the range of any faulty sectors touched
407
- /// by the read/write.
408
- fn faulty_sectors(storage: *const Storage, offset: u64, size: u64) ?SectorRange {
409
- assert(size <= config.message_size_max);
410
- const message_size_max = config.message_size_max;
411
- const period = storage.faulty_areas.period;
412
-
413
- const faulty_offset = storage.faulty_areas.first_offset + (offset / period) * period;
414
-
415
- const start = std.math.max(offset, faulty_offset);
416
- const end = std.math.min(offset + size, faulty_offset + message_size_max);
417
-
418
- // The read/write does not touch any faulty sectors.
419
- if (start >= end) return null;
420
-
421
- return SectorRange{
422
- .min = @divExact(start, config.sector_size),
423
- .max = @divExact(end, config.sector_size),
424
- };
425
- }
426
-
427
- fn fault_sectors(storage: *Storage, offset: u64, size: u64) void {
428
- const faulty = storage.faulty_sectors(offset, size) orelse return;
429
- // Randomly corrupt one of the faulty sectors the operation targeted.
430
- // TODO: inject more realistic and varied storage faults as described above.
431
- const faulty_sector = storage.random_uint_between(usize, faulty.min, faulty.max);
432
- log.info("corrupting sector {} by replica {}", .{
433
- faulty_sector,
434
- storage.replica_index,
435
- });
436
- storage.faults.set(faulty_sector);
437
- }
438
- };