tigerbeetle-node 0.3.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/dist/benchmark.js +1 -1
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +22 -20
- package/dist/index.js +40 -18
- package/dist/index.js.map +1 -1
- package/dist/test.js +13 -1
- package/dist/test.js.map +1 -1
- package/package.json +12 -12
- package/scripts/postinstall.sh +2 -2
- package/src/benchmark.ts +4 -4
- package/src/index.ts +35 -9
- package/src/node.zig +139 -28
- package/src/test.ts +19 -5
- package/src/tigerbeetle/scripts/benchmark.sh +10 -3
- package/src/tigerbeetle/scripts/install.sh +2 -2
- package/src/tigerbeetle/scripts/install_zig.bat +109 -0
- package/src/tigerbeetle/scripts/install_zig.sh +21 -4
- package/src/tigerbeetle/scripts/vopr.bat +48 -0
- package/src/tigerbeetle/scripts/vopr.sh +33 -0
- package/src/tigerbeetle/src/benchmark.zig +74 -42
- package/src/tigerbeetle/src/cli.zig +136 -83
- package/src/tigerbeetle/src/config.zig +80 -26
- package/src/tigerbeetle/src/demo.zig +101 -78
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +2 -7
- package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +2 -7
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_04_create_transfers_two_phase_commit.zig +2 -5
- package/src/tigerbeetle/src/demo_05_accept_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_06_reject_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +8 -0
- package/src/tigerbeetle/src/fifo.zig +20 -11
- package/src/tigerbeetle/src/io.zig +35 -22
- package/src/tigerbeetle/src/io_darwin.zig +701 -0
- package/src/tigerbeetle/src/main.zig +72 -25
- package/src/tigerbeetle/src/message_bus.zig +379 -456
- package/src/tigerbeetle/src/message_pool.zig +3 -3
- package/src/tigerbeetle/src/ring_buffer.zig +192 -37
- package/src/tigerbeetle/src/simulator.zig +317 -0
- package/src/tigerbeetle/src/state_machine.zig +846 -38
- package/src/tigerbeetle/src/storage.zig +488 -90
- package/src/tigerbeetle/src/test/cluster.zig +221 -0
- package/src/tigerbeetle/src/test/message_bus.zig +92 -0
- package/src/tigerbeetle/src/test/network.zig +182 -0
- package/src/tigerbeetle/src/test/packet_simulator.zig +371 -0
- package/src/tigerbeetle/src/test/state_checker.zig +142 -0
- package/src/tigerbeetle/src/test/state_machine.zig +71 -0
- package/src/tigerbeetle/src/test/storage.zig +375 -0
- package/src/tigerbeetle/src/test/time.zig +84 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +6 -3
- package/src/tigerbeetle/src/time.zig +65 -0
- package/src/tigerbeetle/src/unit_tests.zig +14 -0
- package/src/tigerbeetle/src/vsr/client.zig +519 -0
- package/src/tigerbeetle/src/vsr/clock.zig +829 -0
- package/src/tigerbeetle/src/vsr/journal.zig +1368 -0
- package/src/tigerbeetle/src/vsr/marzullo.zig +306 -0
- package/src/tigerbeetle/src/vsr/replica.zig +4248 -0
- package/src/tigerbeetle/src/vsr.zig +601 -0
- package/src/tigerbeetle/LICENSE +0 -177
- package/src/tigerbeetle/README.md +0 -116
- package/src/tigerbeetle/src/client.zig +0 -319
- package/src/tigerbeetle/src/concurrent_ranges.zig +0 -162
- package/src/tigerbeetle/src/fixed_array_list.zig +0 -53
- package/src/tigerbeetle/src/io_async.zig +0 -600
- package/src/tigerbeetle/src/journal.zig +0 -567
- package/src/tigerbeetle/src/test_client.zig +0 -41
- package/src/tigerbeetle/src/test_main.zig +0 -118
- package/src/tigerbeetle/src/test_message_bus.zig +0 -132
- package/src/tigerbeetle/src/vr/journal.zig +0 -672
- package/src/tigerbeetle/src/vr/replica.zig +0 -3061
- package/src/tigerbeetle/src/vr.zig +0 -374
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const math = std.math;
|
|
4
|
+
|
|
5
|
+
const log = std.log.scoped(.packet_simulator);
|
|
6
|
+
|
|
7
|
+
pub const PacketSimulatorOptions = struct {
|
|
8
|
+
/// Mean for the exponential distribution used to calculate forward delay.
|
|
9
|
+
one_way_delay_mean: u64,
|
|
10
|
+
one_way_delay_min: u64,
|
|
11
|
+
|
|
12
|
+
packet_loss_probability: u8,
|
|
13
|
+
packet_replay_probability: u8,
|
|
14
|
+
seed: u64,
|
|
15
|
+
|
|
16
|
+
replica_count: u8,
|
|
17
|
+
client_count: u8,
|
|
18
|
+
node_count: u8,
|
|
19
|
+
|
|
20
|
+
/// How the partitions should be generated
|
|
21
|
+
partition_mode: PartitionMode,
|
|
22
|
+
|
|
23
|
+
/// Probability per tick that a partition will occur
|
|
24
|
+
partition_probability: u8,
|
|
25
|
+
|
|
26
|
+
/// Probability per tick that a partition will resolve
|
|
27
|
+
unpartition_probability: u8,
|
|
28
|
+
|
|
29
|
+
/// Minimum time a partition lasts
|
|
30
|
+
partition_stability: u32,
|
|
31
|
+
|
|
32
|
+
/// Minimum time the cluster is fully connected until it is partitioned again
|
|
33
|
+
unpartition_stability: u32,
|
|
34
|
+
|
|
35
|
+
/// The maximum number of in-flight packets a path can have before packets are randomly dropped.
|
|
36
|
+
path_maximum_capacity: u8,
|
|
37
|
+
|
|
38
|
+
/// Mean for the exponential distribution used to calculate how long a path is clogged for.
|
|
39
|
+
path_clog_duration_mean: u64,
|
|
40
|
+
path_clog_probability: u8,
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
pub const Path = struct {
|
|
44
|
+
source: u8,
|
|
45
|
+
target: u8,
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
/// Determines how the partitions are created. Partitions
|
|
49
|
+
/// are two-way, i.e. if i cannot communicate with j, then
|
|
50
|
+
/// j cannot communicate with i.
|
|
51
|
+
///
|
|
52
|
+
/// Only replicas are partitioned. There will always be exactly two partitions.
|
|
53
|
+
pub const PartitionMode = enum {
|
|
54
|
+
/// Draws the size of the partition uniformly at random from (1, n-1).
|
|
55
|
+
/// Replicas are randomly assigned a partition.
|
|
56
|
+
uniform_size,
|
|
57
|
+
|
|
58
|
+
/// Assigns each node to a partition uniformly at random. This biases towards
|
|
59
|
+
/// equal-size partitions.
|
|
60
|
+
uniform_partition,
|
|
61
|
+
|
|
62
|
+
/// Isolates exactly one replica.
|
|
63
|
+
isolate_single,
|
|
64
|
+
|
|
65
|
+
/// User-defined partitioning algorithm.
|
|
66
|
+
custom,
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
/// A fully connected network of nodes used for testing. Simulates the fault model:
|
|
70
|
+
/// Packets may be dropped.
|
|
71
|
+
/// Packets may be delayed.
|
|
72
|
+
/// Packets may be replayed.
|
|
73
|
+
pub const PacketStatistics = enum(u8) {
|
|
74
|
+
dropped_due_to_partition,
|
|
75
|
+
dropped_due_to_congestion,
|
|
76
|
+
dropped,
|
|
77
|
+
replay,
|
|
78
|
+
};
|
|
79
|
+
pub fn PacketSimulator(comptime Packet: type) type {
|
|
80
|
+
return struct {
|
|
81
|
+
const Self = @This();
|
|
82
|
+
|
|
83
|
+
const Data = struct {
|
|
84
|
+
expiry: u64,
|
|
85
|
+
callback: fn (packet: Packet, path: Path) void,
|
|
86
|
+
packet: Packet,
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
/// A send and receive path between each node in the network. We use the `path` function to
|
|
90
|
+
/// index it.
|
|
91
|
+
paths: []std.PriorityQueue(Data),
|
|
92
|
+
|
|
93
|
+
/// We can arbitrary clog a path until a tick.
|
|
94
|
+
path_clogged_till: []u64,
|
|
95
|
+
ticks: u64 = 0,
|
|
96
|
+
options: PacketSimulatorOptions,
|
|
97
|
+
prng: std.rand.DefaultPrng,
|
|
98
|
+
stats: [@typeInfo(PacketStatistics).Enum.fields.len]u32 = [_]u32{0} **
|
|
99
|
+
@typeInfo(PacketStatistics).Enum.fields.len,
|
|
100
|
+
|
|
101
|
+
is_partitioned: bool,
|
|
102
|
+
partition: []bool,
|
|
103
|
+
replicas: []u8,
|
|
104
|
+
stability: u32,
|
|
105
|
+
|
|
106
|
+
pub fn init(allocator: *std.mem.Allocator, options: PacketSimulatorOptions) !Self {
|
|
107
|
+
assert(options.one_way_delay_mean >= options.one_way_delay_min);
|
|
108
|
+
var self = Self{
|
|
109
|
+
.paths = try allocator.alloc(
|
|
110
|
+
std.PriorityQueue(Data),
|
|
111
|
+
@as(usize, options.node_count) * options.node_count,
|
|
112
|
+
),
|
|
113
|
+
.path_clogged_till = try allocator.alloc(
|
|
114
|
+
u64,
|
|
115
|
+
@as(usize, options.node_count) * options.node_count,
|
|
116
|
+
),
|
|
117
|
+
.options = options,
|
|
118
|
+
.prng = std.rand.DefaultPrng.init(options.seed),
|
|
119
|
+
|
|
120
|
+
.is_partitioned = false,
|
|
121
|
+
.stability = options.unpartition_stability,
|
|
122
|
+
.partition = try allocator.alloc(bool, @as(usize, options.replica_count)),
|
|
123
|
+
.replicas = try allocator.alloc(u8, @as(usize, options.replica_count)),
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
for (self.replicas) |_, i| {
|
|
127
|
+
self.replicas[i] = @intCast(u8, i);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
for (self.paths) |*queue| {
|
|
131
|
+
queue.* = std.PriorityQueue(Data).init(allocator, Self.order_packets);
|
|
132
|
+
try queue.ensureCapacity(options.path_maximum_capacity);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
for (self.path_clogged_till) |*clogged_till| {
|
|
136
|
+
clogged_till.* = 0;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return self;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
pub fn deinit(self: *Self, allocator: *std.mem.Allocator) void {
|
|
143
|
+
for (self.paths) |*queue| {
|
|
144
|
+
while (queue.popOrNull()) |*data| data.packet.deinit();
|
|
145
|
+
queue.deinit();
|
|
146
|
+
}
|
|
147
|
+
allocator.free(self.paths);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
fn order_packets(a: Data, b: Data) math.Order {
|
|
151
|
+
return math.order(a.expiry, b.expiry);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
fn should_drop(self: *Self) bool {
|
|
155
|
+
return self.prng.random.uintAtMost(u8, 100) < self.options.packet_loss_probability;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
fn path_index(self: *Self, path: Path) usize {
|
|
159
|
+
assert(path.source < self.options.node_count and path.target < self.options.node_count);
|
|
160
|
+
|
|
161
|
+
return @as(usize, path.source) * self.options.node_count + path.target;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
pub fn path_queue(self: *Self, path: Path) *std.PriorityQueue(Data) {
|
|
165
|
+
var index = self.path_index(path);
|
|
166
|
+
return &self.paths[@as(usize, path.source) * self.options.node_count + path.target];
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
fn is_clogged(self: *Self, path: Path) bool {
|
|
170
|
+
return self.path_clogged_till[self.path_index(path)] > self.ticks;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
fn should_clog(self: *Self, path: Path) bool {
|
|
174
|
+
return self.prng.random.uintAtMost(u8, 100) < self.options.path_clog_probability;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
fn clog_for(self: *Self, path: Path, ticks: u64) void {
|
|
178
|
+
const clog_expiry = &self.path_clogged_till[self.path_index(path)];
|
|
179
|
+
clog_expiry.* = self.ticks + ticks;
|
|
180
|
+
log.debug("Path path.source={} path.target={} clogged for ticks={}", .{
|
|
181
|
+
path.source,
|
|
182
|
+
path.target,
|
|
183
|
+
ticks,
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
fn should_replay(self: *Self) bool {
|
|
188
|
+
return self.prng.random.uintAtMost(u8, 100) < self.options.packet_replay_probability;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
fn should_partition(self: *Self) bool {
|
|
192
|
+
return self.prng.random.uintAtMost(u8, 100) < self.options.partition_probability;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
fn should_unpartition(self: *Self) bool {
|
|
196
|
+
return self.prng.random.uintAtMost(u8, 100) < self.options.unpartition_probability;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/// Return a value produced using an exponential distribution with
|
|
200
|
+
/// the minimum and mean specified in self.options
|
|
201
|
+
fn one_way_delay(self: *Self) u64 {
|
|
202
|
+
const min = self.options.one_way_delay_min;
|
|
203
|
+
const mean = self.options.one_way_delay_mean;
|
|
204
|
+
return min + @floatToInt(u64, @intToFloat(f64, mean - min) * self.prng.random.floatExp(f64));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/// Partitions the network. Guaranteed to isolate at least one replica.
|
|
208
|
+
fn partition_network(
|
|
209
|
+
self: *Self,
|
|
210
|
+
) void {
|
|
211
|
+
assert(self.options.replica_count > 1);
|
|
212
|
+
|
|
213
|
+
self.is_partitioned = true;
|
|
214
|
+
self.stability = self.options.partition_stability;
|
|
215
|
+
|
|
216
|
+
switch (self.options.partition_mode) {
|
|
217
|
+
.uniform_size => {
|
|
218
|
+
// Exclude cases sz == 0 and sz == replica_count
|
|
219
|
+
const sz =
|
|
220
|
+
1 + self.prng.random.uintAtMost(u8, self.options.replica_count - 2);
|
|
221
|
+
self.prng.random.shuffle(u8, self.replicas);
|
|
222
|
+
for (self.replicas) |r, i| {
|
|
223
|
+
self.partition[r] = i < sz;
|
|
224
|
+
}
|
|
225
|
+
},
|
|
226
|
+
.uniform_partition => {
|
|
227
|
+
var only_same = true;
|
|
228
|
+
self.partition[0] =
|
|
229
|
+
self.prng.random.uintLessThan(u8, 2) == 1;
|
|
230
|
+
|
|
231
|
+
var i: usize = 1;
|
|
232
|
+
while (i < self.options.replica_count) : (i += 1) {
|
|
233
|
+
self.partition[i] =
|
|
234
|
+
self.prng.random.uintLessThan(u8, 2) == 1;
|
|
235
|
+
only_same =
|
|
236
|
+
only_same and (self.partition[i - 1] == self.partition[i]);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (only_same) {
|
|
240
|
+
const n = self.prng.random.uintLessThan(u8, self.options.replica_count);
|
|
241
|
+
self.partition[n] = true;
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
.isolate_single => {
|
|
245
|
+
for (self.replicas) |_, i| {
|
|
246
|
+
self.partition[i] = false;
|
|
247
|
+
}
|
|
248
|
+
const n = self.prng.random.uintLessThan(u8, self.options.replica_count);
|
|
249
|
+
self.partition[n] = true;
|
|
250
|
+
},
|
|
251
|
+
// Put your own partitioning logic here.
|
|
252
|
+
.custom => unreachable,
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
fn unpartition_network(
|
|
257
|
+
self: *Self,
|
|
258
|
+
) void {
|
|
259
|
+
self.is_partitioned = false;
|
|
260
|
+
self.stability = self.options.unpartition_stability;
|
|
261
|
+
|
|
262
|
+
for (self.replicas) |_, i| {
|
|
263
|
+
self.partition[i] = false;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
fn replicas_are_in_different_partitions(self: *Self, from: u8, to: u8) bool {
|
|
268
|
+
return from < self.options.replica_count and
|
|
269
|
+
to < self.options.replica_count and
|
|
270
|
+
self.partition[from] != self.partition[to];
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
pub fn tick(self: *Self) void {
|
|
274
|
+
self.ticks += 1;
|
|
275
|
+
|
|
276
|
+
if (self.stability > 0) {
|
|
277
|
+
self.stability -= 1;
|
|
278
|
+
} else {
|
|
279
|
+
if (self.is_partitioned) {
|
|
280
|
+
if (self.should_unpartition()) {
|
|
281
|
+
self.unpartition_network();
|
|
282
|
+
log.alert("unpartitioned network: partition={d}", .{self.partition});
|
|
283
|
+
}
|
|
284
|
+
} else {
|
|
285
|
+
if (self.options.replica_count > 1 and self.should_partition()) {
|
|
286
|
+
self.partition_network();
|
|
287
|
+
log.alert("partitioned network: partition={d}", .{self.partition});
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
var from: u8 = 0;
|
|
293
|
+
while (from < self.options.node_count) : (from += 1) {
|
|
294
|
+
var to: u8 = 0;
|
|
295
|
+
while (to < self.options.node_count) : (to += 1) {
|
|
296
|
+
const path = .{ .source = from, .target = to };
|
|
297
|
+
if (self.is_clogged(path)) continue;
|
|
298
|
+
|
|
299
|
+
const queue = self.path_queue(path);
|
|
300
|
+
while (queue.peek()) |*data| {
|
|
301
|
+
if (data.expiry > self.ticks) break;
|
|
302
|
+
_ = queue.remove();
|
|
303
|
+
|
|
304
|
+
if (self.is_partitioned and
|
|
305
|
+
self.replicas_are_in_different_partitions(from, to))
|
|
306
|
+
{
|
|
307
|
+
self.stats[@enumToInt(PacketStatistics.dropped_due_to_partition)] += 1;
|
|
308
|
+
log.alert("dropped packet (different partitions): from={} to={}", .{ from, to });
|
|
309
|
+
data.packet.deinit(path);
|
|
310
|
+
continue;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (self.should_drop()) {
|
|
314
|
+
self.stats[@enumToInt(PacketStatistics.dropped)] += 1;
|
|
315
|
+
log.alert("dropped packet from={} to={}.", .{ from, to });
|
|
316
|
+
data.packet.deinit(path);
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
if (self.should_replay()) {
|
|
321
|
+
self.submit_packet(data.packet, data.callback, path);
|
|
322
|
+
|
|
323
|
+
log.debug("replayed packet from={} to={}", .{ from, to });
|
|
324
|
+
self.stats[@enumToInt(PacketStatistics.replay)] += 1;
|
|
325
|
+
|
|
326
|
+
data.callback(data.packet, path);
|
|
327
|
+
} else {
|
|
328
|
+
log.debug("delivering packet from={} to={}", .{ from, to });
|
|
329
|
+
data.callback(data.packet, path);
|
|
330
|
+
data.packet.deinit(path);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const reverse_path: Path = .{ .source = to, .target = from };
|
|
335
|
+
|
|
336
|
+
if (self.should_clog(reverse_path)) {
|
|
337
|
+
log.debug("reverse path clogged", .{});
|
|
338
|
+
const mean = @intToFloat(f64, self.options.path_clog_duration_mean);
|
|
339
|
+
const ticks = @floatToInt(u64, mean * self.prng.random.floatExp(f64));
|
|
340
|
+
self.clog_for(reverse_path, ticks);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
pub fn submit_packet(
|
|
347
|
+
self: *Self,
|
|
348
|
+
packet: Packet,
|
|
349
|
+
callback: fn (packet: Packet, path: Path) void,
|
|
350
|
+
path: Path,
|
|
351
|
+
) void {
|
|
352
|
+
const queue = self.path_queue(path);
|
|
353
|
+
var queue_length = queue.count();
|
|
354
|
+
if (queue_length + 1 > queue.capacity()) {
|
|
355
|
+
const index = self.prng.random.uintLessThanBiased(u64, queue_length);
|
|
356
|
+
const data = queue.removeIndex(index);
|
|
357
|
+
data.packet.deinit(path);
|
|
358
|
+
log.alert("submit_packet: {} reached capacity, dropped packet={}", .{
|
|
359
|
+
path,
|
|
360
|
+
index,
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
queue.add(.{
|
|
365
|
+
.expiry = self.ticks + self.one_way_delay(),
|
|
366
|
+
.packet = packet,
|
|
367
|
+
.callback = callback,
|
|
368
|
+
}) catch unreachable;
|
|
369
|
+
}
|
|
370
|
+
};
|
|
371
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const mem = std.mem;
|
|
4
|
+
|
|
5
|
+
const config = @import("../config.zig");
|
|
6
|
+
|
|
7
|
+
const Cluster = @import("cluster.zig").Cluster;
|
|
8
|
+
const Network = @import("network.zig").Network;
|
|
9
|
+
const StateMachine = @import("state_machine.zig").StateMachine;
|
|
10
|
+
|
|
11
|
+
const MessagePool = @import("../message_pool.zig").MessagePool;
|
|
12
|
+
const Message = MessagePool.Message;
|
|
13
|
+
|
|
14
|
+
const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
|
|
15
|
+
|
|
16
|
+
const RequestQueue = RingBuffer(u128, config.message_bus_messages_max - 1);
|
|
17
|
+
const StateTransitions = std.AutoHashMap(u128, u64);
|
|
18
|
+
|
|
19
|
+
const log = std.log.scoped(.state_checker);
|
|
20
|
+
|
|
21
|
+
pub const StateChecker = struct {
|
|
22
|
+
/// Indexed by client index as used by Cluster.
|
|
23
|
+
client_requests: [config.clients_max]RequestQueue =
|
|
24
|
+
[_]RequestQueue{.{}} ** config.clients_max,
|
|
25
|
+
|
|
26
|
+
/// Indexed by replica index.
|
|
27
|
+
state_machine_states: [config.replicas_max]u128,
|
|
28
|
+
|
|
29
|
+
history: StateTransitions,
|
|
30
|
+
|
|
31
|
+
/// The highest cannonical state reached by the cluster.
|
|
32
|
+
state: u128,
|
|
33
|
+
|
|
34
|
+
/// The number of times the cannonical state has been advanced.
|
|
35
|
+
transitions: u64 = 0,
|
|
36
|
+
|
|
37
|
+
pub fn init(allocator: *mem.Allocator, cluster: *Cluster) !StateChecker {
|
|
38
|
+
const state = cluster.state_machines[0].state;
|
|
39
|
+
|
|
40
|
+
var state_machine_states: [config.replicas_max]u128 = undefined;
|
|
41
|
+
for (cluster.state_machines) |state_machine, i| {
|
|
42
|
+
assert(state_machine.state == state);
|
|
43
|
+
state_machine_states[i] = state_machine.state;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
var history = StateTransitions.init(allocator);
|
|
47
|
+
errdefer history.deinit();
|
|
48
|
+
|
|
49
|
+
var state_checker = StateChecker{
|
|
50
|
+
.state_machine_states = state_machine_states,
|
|
51
|
+
.history = history,
|
|
52
|
+
.state = state,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
try state_checker.history.putNoClobber(state, state_checker.transitions);
|
|
56
|
+
|
|
57
|
+
return state_checker;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
pub fn deinit(state_checker: *StateChecker) void {
|
|
61
|
+
state_checker.history.deinit();
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
pub fn check_state(state_checker: *StateChecker, replica: u8) void {
|
|
65
|
+
const cluster = @fieldParentPtr(Cluster, "state_checker", state_checker);
|
|
66
|
+
|
|
67
|
+
const a = state_checker.state_machine_states[replica];
|
|
68
|
+
const b = cluster.state_machines[replica].state;
|
|
69
|
+
|
|
70
|
+
if (b == a) return;
|
|
71
|
+
state_checker.state_machine_states[replica] = b;
|
|
72
|
+
|
|
73
|
+
// If some other replica has already reached this state, then it will be in the history:
|
|
74
|
+
if (state_checker.history.get(b)) |transition| {
|
|
75
|
+
// A replica may transition more than once to the same state, for example, when
|
|
76
|
+
// restarting after a crash and replaying the log. The more important invariant is that
|
|
77
|
+
// the cluster as a whole may not transition to the same state more than once, and once
|
|
78
|
+
// transitioned may not regress.
|
|
79
|
+
log.info(
|
|
80
|
+
"{d:0>4}/{d:0>4} {x:0>32} > {x:0>32} {}",
|
|
81
|
+
.{ transition, state_checker.transitions, a, b, replica },
|
|
82
|
+
);
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// The replica has transitioned to state `b` that is not yet in the history.
|
|
87
|
+
// Check if this is a valid new state based on all currently inflight client requests.
|
|
88
|
+
for (state_checker.client_requests) |*queue| {
|
|
89
|
+
if (queue.head_ptr()) |input| {
|
|
90
|
+
if (b == StateMachine.hash(state_checker.state, std.mem.asBytes(input))) {
|
|
91
|
+
const transitions_executed = state_checker.history.get(a).?;
|
|
92
|
+
if (transitions_executed < state_checker.transitions) {
|
|
93
|
+
@panic("replica skipped interim transitions");
|
|
94
|
+
} else {
|
|
95
|
+
assert(transitions_executed == state_checker.transitions);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
state_checker.state = b;
|
|
99
|
+
state_checker.transitions += 1;
|
|
100
|
+
|
|
101
|
+
log.info(" {d:0>4} {x:0>32} > {x:0>32} {}", .{
|
|
102
|
+
state_checker.transitions,
|
|
103
|
+
a,
|
|
104
|
+
b,
|
|
105
|
+
replica,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
state_checker.history.putNoClobber(b, state_checker.transitions) catch {
|
|
109
|
+
@panic("state checker unable to allocate memory for history.put()");
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
// As soon as we reach a valid state we must pop the inflight request.
|
|
113
|
+
// We cannot wait until the client receives the reply because that would allow
|
|
114
|
+
// the inflight request to be used to reach other states in the interim.
|
|
115
|
+
// We must therefore use our own queue rather than the clients' queues.
|
|
116
|
+
_ = queue.pop();
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
@panic("replica transitioned to an invalid state");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
pub fn convergence(state_checker: *StateChecker) bool {
|
|
126
|
+
const cluster = @fieldParentPtr(Cluster, "state_checker", state_checker);
|
|
127
|
+
|
|
128
|
+
const a = state_checker.state_machine_states[0];
|
|
129
|
+
for (state_checker.state_machine_states[1..cluster.options.replica_count]) |b| {
|
|
130
|
+
if (b != a) return false;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const transitions_executed = state_checker.history.get(a).?;
|
|
134
|
+
if (transitions_executed < state_checker.transitions) {
|
|
135
|
+
@panic("cluster reached convergence but on a regressed state");
|
|
136
|
+
} else {
|
|
137
|
+
assert(transitions_executed == state_checker.transitions);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return true;
|
|
141
|
+
}
|
|
142
|
+
};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
|
|
3
|
+
const log = std.log.scoped(.state_machine);
|
|
4
|
+
|
|
5
|
+
pub const StateMachine = struct {
|
|
6
|
+
pub const Operation = enum(u8) {
|
|
7
|
+
/// Operations reserved by VR protocol (for all state machines):
|
|
8
|
+
reserved,
|
|
9
|
+
init,
|
|
10
|
+
register,
|
|
11
|
+
|
|
12
|
+
hash,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
state: u128,
|
|
16
|
+
|
|
17
|
+
pub fn init(seed: u64) StateMachine {
|
|
18
|
+
return .{ .state = hash(0, std.mem.asBytes(&seed)) };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
pub fn prepare(
|
|
22
|
+
state_machine: *StateMachine,
|
|
23
|
+
realtime: i64,
|
|
24
|
+
operation: Operation,
|
|
25
|
+
input: []u8,
|
|
26
|
+
) void {
|
|
27
|
+
// TODO: use realtime in some way to test the system
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
pub fn commit(
|
|
31
|
+
state_machine: *StateMachine,
|
|
32
|
+
client: u128,
|
|
33
|
+
operation: Operation,
|
|
34
|
+
input: []const u8,
|
|
35
|
+
output: []u8,
|
|
36
|
+
) usize {
|
|
37
|
+
switch (operation) {
|
|
38
|
+
.reserved, .init => unreachable,
|
|
39
|
+
.register => return 0,
|
|
40
|
+
|
|
41
|
+
// TODO: instead of always using the first 32 bytes of the output
|
|
42
|
+
// buffer, get tricky and use a random but deterministic slice
|
|
43
|
+
// of it, filling the rest with 0s.
|
|
44
|
+
.hash => {
|
|
45
|
+
// Fold the input into our current state, creating a hash chain.
|
|
46
|
+
// Hash the input with the client ID since small inputs may collide across clients.
|
|
47
|
+
const client_input = hash(client, input);
|
|
48
|
+
const new_state = hash(state_machine.state, std.mem.asBytes(&client_input));
|
|
49
|
+
|
|
50
|
+
log.debug("state={x} input={x} input.len={} new state={x}", .{
|
|
51
|
+
state_machine.state,
|
|
52
|
+
client_input,
|
|
53
|
+
input.len,
|
|
54
|
+
new_state,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
state_machine.state = new_state;
|
|
58
|
+
std.mem.copy(u8, output, std.mem.asBytes(&state_machine.state));
|
|
59
|
+
return @sizeOf(@TypeOf(state_machine.state));
|
|
60
|
+
},
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
pub fn hash(state: u128, input: []const u8) u128 {
|
|
65
|
+
var key: [32]u8 = [_]u8{0} ** 32;
|
|
66
|
+
std.mem.copy(u8, key[0..16], std.mem.asBytes(&state));
|
|
67
|
+
var target: [32]u8 = undefined;
|
|
68
|
+
std.crypto.hash.Blake3.hash(input, &target, .{ .key = key });
|
|
69
|
+
return @bitCast(u128, target[0..16].*);
|
|
70
|
+
}
|
|
71
|
+
};
|