tigerbeetle-node 0.11.7 → 0.11.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.client.node.sha256 +1 -1
- package/package.json +4 -3
- package/scripts/build_lib.sh +29 -0
- package/src/node.zig +1 -1
- package/src/tigerbeetle/scripts/validate_docs.sh +7 -1
- package/src/tigerbeetle/src/benchmark.zig +3 -3
- package/src/tigerbeetle/src/config.zig +29 -16
- package/src/tigerbeetle/src/constants.zig +30 -9
- package/src/tigerbeetle/src/ewah.zig +5 -5
- package/src/tigerbeetle/src/ewah_fuzz.zig +1 -1
- package/src/tigerbeetle/src/lsm/binary_search.zig +1 -1
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +1 -1
- package/src/tigerbeetle/src/lsm/compaction.zig +34 -21
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +85 -103
- package/src/tigerbeetle/src/lsm/grid.zig +19 -13
- package/src/tigerbeetle/src/lsm/manifest_log.zig +8 -10
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +12 -8
- package/src/tigerbeetle/src/lsm/merge_iterator.zig +1 -1
- package/src/tigerbeetle/src/lsm/segmented_array.zig +17 -17
- package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +1 -1
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +1 -1
- package/src/tigerbeetle/src/lsm/table.zig +8 -20
- package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
- package/src/tigerbeetle/src/lsm/table_iterator.zig +3 -3
- package/src/tigerbeetle/src/lsm/table_mutable.zig +14 -2
- package/src/tigerbeetle/src/lsm/tree.zig +31 -5
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +86 -114
- package/src/tigerbeetle/src/message_bus.zig +4 -4
- package/src/tigerbeetle/src/message_pool.zig +7 -10
- package/src/tigerbeetle/src/ring_buffer.zig +22 -12
- package/src/tigerbeetle/src/simulator.zig +360 -214
- package/src/tigerbeetle/src/state_machine/auditor.zig +5 -5
- package/src/tigerbeetle/src/state_machine/workload.zig +3 -3
- package/src/tigerbeetle/src/state_machine.zig +190 -178
- package/src/tigerbeetle/src/{util.zig → stdx.zig} +2 -0
- package/src/tigerbeetle/src/storage.zig +13 -6
- package/src/tigerbeetle/src/{test → testing/cluster}/message_bus.zig +3 -3
- package/src/tigerbeetle/src/{test → testing/cluster}/network.zig +46 -22
- package/src/tigerbeetle/src/testing/cluster/state_checker.zig +169 -0
- package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +202 -0
- package/src/tigerbeetle/src/testing/cluster.zig +537 -0
- package/src/tigerbeetle/src/{test → testing}/fuzz.zig +0 -0
- package/src/tigerbeetle/src/testing/hash_log.zig +66 -0
- package/src/tigerbeetle/src/{test → testing}/id.zig +0 -0
- package/src/tigerbeetle/src/testing/packet_simulator.zig +365 -0
- package/src/tigerbeetle/src/{test → testing}/priority_queue.zig +1 -1
- package/src/tigerbeetle/src/testing/reply_sequence.zig +139 -0
- package/src/tigerbeetle/src/{test → testing}/state_machine.zig +3 -1
- package/src/tigerbeetle/src/testing/storage.zig +754 -0
- package/src/tigerbeetle/src/{test → testing}/table.zig +21 -0
- package/src/tigerbeetle/src/{test → testing}/time.zig +0 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +2 -0
- package/src/tigerbeetle/src/tracer.zig +3 -3
- package/src/tigerbeetle/src/unit_tests.zig +4 -4
- package/src/tigerbeetle/src/vopr.zig +2 -2
- package/src/tigerbeetle/src/vsr/client.zig +16 -9
- package/src/tigerbeetle/src/vsr/clock.zig +93 -53
- package/src/tigerbeetle/src/vsr/journal.zig +29 -14
- package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +2 -2
- package/src/tigerbeetle/src/vsr/replica.zig +1383 -774
- package/src/tigerbeetle/src/vsr/replica_format.zig +2 -2
- package/src/tigerbeetle/src/vsr/superblock.zig +59 -43
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -7
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +1 -1
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +1 -1
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +15 -7
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +38 -19
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +1 -1
- package/src/tigerbeetle/src/vsr.zig +6 -4
- package/src/tigerbeetle/src/demo.zig +0 -132
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +0 -35
- package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +0 -7
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +0 -37
- package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +0 -61
- package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +0 -37
- package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +0 -24
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +0 -7
- package/src/tigerbeetle/src/test/cluster.zig +0 -352
- package/src/tigerbeetle/src/test/conductor.zig +0 -366
- package/src/tigerbeetle/src/test/packet_simulator.zig +0 -398
- package/src/tigerbeetle/src/test/state_checker.zig +0 -169
- package/src/tigerbeetle/src/test/storage.zig +0 -864
- package/src/tigerbeetle/src/test/storage_checker.zig +0 -204
|
@@ -1,398 +0,0 @@
|
|
|
1
|
-
const std = @import("std");
|
|
2
|
-
const assert = std.debug.assert;
|
|
3
|
-
const math = std.math;
|
|
4
|
-
|
|
5
|
-
const log = std.log.scoped(.packet_simulator);
|
|
6
|
-
const ReplicaHealth = @import("./cluster.zig").ReplicaHealth;
|
|
7
|
-
|
|
8
|
-
pub const PacketSimulatorOptions = struct {
|
|
9
|
-
/// Mean for the exponential distribution used to calculate forward delay.
|
|
10
|
-
one_way_delay_mean: u64,
|
|
11
|
-
one_way_delay_min: u64,
|
|
12
|
-
|
|
13
|
-
packet_loss_probability: u8,
|
|
14
|
-
packet_replay_probability: u8,
|
|
15
|
-
seed: u64,
|
|
16
|
-
|
|
17
|
-
replica_count: u8,
|
|
18
|
-
client_count: u8,
|
|
19
|
-
node_count: u8,
|
|
20
|
-
|
|
21
|
-
/// How the partitions should be generated
|
|
22
|
-
partition_mode: PartitionMode,
|
|
23
|
-
|
|
24
|
-
/// Probability per tick that a partition will occur
|
|
25
|
-
partition_probability: u8,
|
|
26
|
-
|
|
27
|
-
/// Probability per tick that a partition will resolve
|
|
28
|
-
unpartition_probability: u8,
|
|
29
|
-
|
|
30
|
-
/// Minimum time a partition lasts
|
|
31
|
-
partition_stability: u32,
|
|
32
|
-
|
|
33
|
-
/// Minimum time the cluster is fully connected until it is partitioned again
|
|
34
|
-
unpartition_stability: u32,
|
|
35
|
-
|
|
36
|
-
/// The maximum number of in-flight packets a path can have before packets are randomly dropped.
|
|
37
|
-
path_maximum_capacity: u8,
|
|
38
|
-
|
|
39
|
-
/// Mean for the exponential distribution used to calculate how long a path is clogged for.
|
|
40
|
-
path_clog_duration_mean: u64,
|
|
41
|
-
path_clog_probability: u8,
|
|
42
|
-
};
|
|
43
|
-
|
|
44
|
-
pub const Path = struct {
|
|
45
|
-
source: u8,
|
|
46
|
-
target: u8,
|
|
47
|
-
};
|
|
48
|
-
|
|
49
|
-
/// Determines how the partitions are created. Partitions
|
|
50
|
-
/// are two-way, i.e. if i cannot communicate with j, then
|
|
51
|
-
/// j cannot communicate with i.
|
|
52
|
-
///
|
|
53
|
-
/// Only replicas are partitioned. There will always be exactly two partitions.
|
|
54
|
-
pub const PartitionMode = enum {
|
|
55
|
-
/// Draws the size of the partition uniformly at random from (1, n-1).
|
|
56
|
-
/// Replicas are randomly assigned a partition.
|
|
57
|
-
uniform_size,
|
|
58
|
-
|
|
59
|
-
/// Assigns each node to a partition uniformly at random. This biases towards
|
|
60
|
-
/// equal-size partitions.
|
|
61
|
-
uniform_partition,
|
|
62
|
-
|
|
63
|
-
/// Isolates exactly one replica.
|
|
64
|
-
isolate_single,
|
|
65
|
-
|
|
66
|
-
/// User-defined partitioning algorithm.
|
|
67
|
-
custom,
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
/// A fully connected network of nodes used for testing. Simulates the fault model:
|
|
71
|
-
/// Packets may be dropped.
|
|
72
|
-
/// Packets may be delayed.
|
|
73
|
-
/// Packets may be replayed.
|
|
74
|
-
pub const PacketStatistics = enum(u8) {
|
|
75
|
-
dropped_due_to_partition,
|
|
76
|
-
dropped_due_to_congestion,
|
|
77
|
-
dropped_due_to_crash,
|
|
78
|
-
dropped,
|
|
79
|
-
replay,
|
|
80
|
-
};
|
|
81
|
-
|
|
82
|
-
pub fn PacketSimulator(comptime Packet: type) type {
|
|
83
|
-
return struct {
|
|
84
|
-
const Self = @This();
|
|
85
|
-
|
|
86
|
-
const Data = struct {
|
|
87
|
-
expiry: u64,
|
|
88
|
-
callback: fn (packet: Packet, path: Path) void,
|
|
89
|
-
packet: Packet,
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
/// A send and receive path between each node in the network. We use the `path_index`
|
|
93
|
-
/// function to index it.
|
|
94
|
-
paths: []std.PriorityQueue(Data, void, Self.order_packets),
|
|
95
|
-
|
|
96
|
-
/// We can arbitrary clog a path until a tick.
|
|
97
|
-
path_clogged_till: []u64,
|
|
98
|
-
ticks: u64 = 0,
|
|
99
|
-
options: PacketSimulatorOptions,
|
|
100
|
-
prng: std.rand.DefaultPrng,
|
|
101
|
-
stats: [@typeInfo(PacketStatistics).Enum.fields.len]u32 = [_]u32{0} **
|
|
102
|
-
@typeInfo(PacketStatistics).Enum.fields.len,
|
|
103
|
-
|
|
104
|
-
is_partitioned: bool,
|
|
105
|
-
partition: []bool,
|
|
106
|
-
replicas: []u8,
|
|
107
|
-
stability: u32,
|
|
108
|
-
|
|
109
|
-
pub fn init(allocator: std.mem.Allocator, options: PacketSimulatorOptions) !Self {
|
|
110
|
-
assert(options.one_way_delay_mean >= options.one_way_delay_min);
|
|
111
|
-
|
|
112
|
-
const paths = try allocator.alloc(
|
|
113
|
-
std.PriorityQueue(Data, void, Self.order_packets),
|
|
114
|
-
@as(usize, options.node_count) * options.node_count,
|
|
115
|
-
);
|
|
116
|
-
errdefer allocator.free(paths);
|
|
117
|
-
|
|
118
|
-
const path_clogged_till = try allocator.alloc(
|
|
119
|
-
u64,
|
|
120
|
-
@as(usize, options.node_count) * options.node_count,
|
|
121
|
-
);
|
|
122
|
-
errdefer allocator.free(path_clogged_till);
|
|
123
|
-
std.mem.set(u64, path_clogged_till, 0);
|
|
124
|
-
|
|
125
|
-
const partition = try allocator.alloc(bool, @as(usize, options.replica_count));
|
|
126
|
-
errdefer allocator.free(partition);
|
|
127
|
-
|
|
128
|
-
const replicas = try allocator.alloc(u8, @as(usize, options.replica_count));
|
|
129
|
-
errdefer allocator.free(replicas);
|
|
130
|
-
for (replicas) |*replica, i| replica.* = @intCast(u8, i);
|
|
131
|
-
|
|
132
|
-
var self = Self{
|
|
133
|
-
.paths = paths,
|
|
134
|
-
.path_clogged_till = path_clogged_till,
|
|
135
|
-
.options = options,
|
|
136
|
-
.prng = std.rand.DefaultPrng.init(options.seed),
|
|
137
|
-
|
|
138
|
-
.is_partitioned = false,
|
|
139
|
-
.stability = options.unpartition_stability,
|
|
140
|
-
.partition = partition,
|
|
141
|
-
.replicas = replicas,
|
|
142
|
-
};
|
|
143
|
-
|
|
144
|
-
for (self.paths) |*queue, i| {
|
|
145
|
-
errdefer for (self.paths[0..i]) |path| path.deinit();
|
|
146
|
-
|
|
147
|
-
queue.* = std.PriorityQueue(Data, void, Self.order_packets).init(allocator, {});
|
|
148
|
-
try queue.ensureTotalCapacity(options.path_maximum_capacity);
|
|
149
|
-
}
|
|
150
|
-
errdefer for (self.paths) |path| path.deinit();
|
|
151
|
-
|
|
152
|
-
return self;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
|
|
156
|
-
for (self.paths) |*queue| {
|
|
157
|
-
while (queue.peek()) |_| queue.remove().packet.deinit();
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
allocator.free(self.paths);
|
|
161
|
-
allocator.free(self.path_clogged_till);
|
|
162
|
-
allocator.free(self.partition);
|
|
163
|
-
allocator.free(self.replicas);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
fn order_packets(context: void, a: Data, b: Data) math.Order {
|
|
167
|
-
_ = context;
|
|
168
|
-
|
|
169
|
-
return math.order(a.expiry, b.expiry);
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
fn should_drop(self: *Self) bool {
|
|
173
|
-
return self.prng.random().uintAtMost(u8, 100) < self.options.packet_loss_probability;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
fn path_index(self: *Self, path: Path) usize {
|
|
177
|
-
assert(path.source < self.options.node_count);
|
|
178
|
-
assert(path.target < self.options.node_count);
|
|
179
|
-
|
|
180
|
-
return @as(usize, path.source) * self.options.node_count + path.target;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
pub fn path_queue(self: *Self, path: Path) *std.PriorityQueue(Data, void, Self.order_packets) {
|
|
184
|
-
return &self.paths[self.path_index(path)];
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
fn is_clogged(self: *Self, path: Path) bool {
|
|
188
|
-
return self.path_clogged_till[self.path_index(path)] > self.ticks;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
fn should_clog(self: *Self, path: Path) bool {
|
|
192
|
-
_ = path;
|
|
193
|
-
|
|
194
|
-
return self.prng.random().uintAtMost(u8, 100) < self.options.path_clog_probability;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
fn clog_for(self: *Self, path: Path, ticks: u64) void {
|
|
198
|
-
const clog_expiry = &self.path_clogged_till[self.path_index(path)];
|
|
199
|
-
clog_expiry.* = self.ticks + ticks;
|
|
200
|
-
log.debug("Path path.source={} path.target={} clogged for ticks={}", .{
|
|
201
|
-
path.source,
|
|
202
|
-
path.target,
|
|
203
|
-
ticks,
|
|
204
|
-
});
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
fn should_replay(self: *Self) bool {
|
|
208
|
-
return self.prng.random().uintAtMost(u8, 100) < self.options.packet_replay_probability;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
fn should_partition(self: *Self) bool {
|
|
212
|
-
return self.prng.random().uintAtMost(u8, 100) < self.options.partition_probability;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
fn should_unpartition(self: *Self) bool {
|
|
216
|
-
return self.prng.random().uintAtMost(u8, 100) < self.options.unpartition_probability;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
/// Return a value produced using an exponential distribution with
|
|
220
|
-
/// the minimum and mean specified in self.options
|
|
221
|
-
fn one_way_delay(self: *Self) u64 {
|
|
222
|
-
const min = self.options.one_way_delay_min;
|
|
223
|
-
const mean = self.options.one_way_delay_mean;
|
|
224
|
-
return min + @floatToInt(u64, @intToFloat(f64, mean - min) * self.prng.random().floatExp(f64));
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
/// Partitions the network. Guaranteed to isolate at least one replica.
|
|
228
|
-
fn partition_network(
|
|
229
|
-
self: *Self,
|
|
230
|
-
) void {
|
|
231
|
-
assert(self.options.replica_count > 1);
|
|
232
|
-
|
|
233
|
-
self.is_partitioned = true;
|
|
234
|
-
self.stability = self.options.partition_stability;
|
|
235
|
-
|
|
236
|
-
switch (self.options.partition_mode) {
|
|
237
|
-
.uniform_size => {
|
|
238
|
-
// Exclude cases sz == 0 and sz == replica_count
|
|
239
|
-
const sz =
|
|
240
|
-
1 + self.prng.random().uintAtMost(u8, self.options.replica_count - 2);
|
|
241
|
-
self.prng.random().shuffle(u8, self.replicas);
|
|
242
|
-
for (self.replicas) |r, i| {
|
|
243
|
-
self.partition[r] = i < sz;
|
|
244
|
-
}
|
|
245
|
-
},
|
|
246
|
-
.uniform_partition => {
|
|
247
|
-
var only_same = true;
|
|
248
|
-
self.partition[0] =
|
|
249
|
-
self.prng.random().uintLessThan(u8, 2) == 1;
|
|
250
|
-
|
|
251
|
-
var i: usize = 1;
|
|
252
|
-
while (i < self.options.replica_count) : (i += 1) {
|
|
253
|
-
self.partition[i] =
|
|
254
|
-
self.prng.random().uintLessThan(u8, 2) == 1;
|
|
255
|
-
only_same =
|
|
256
|
-
only_same and (self.partition[i - 1] == self.partition[i]);
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
if (only_same) {
|
|
260
|
-
const n = self.prng.random().uintLessThan(u8, self.options.replica_count);
|
|
261
|
-
self.partition[n] = true;
|
|
262
|
-
}
|
|
263
|
-
},
|
|
264
|
-
.isolate_single => {
|
|
265
|
-
for (self.replicas) |_, i| {
|
|
266
|
-
self.partition[i] = false;
|
|
267
|
-
}
|
|
268
|
-
const n = self.prng.random().uintLessThan(u8, self.options.replica_count);
|
|
269
|
-
self.partition[n] = true;
|
|
270
|
-
},
|
|
271
|
-
// Put your own partitioning logic here.
|
|
272
|
-
.custom => unreachable,
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
fn unpartition_network(
|
|
277
|
-
self: *Self,
|
|
278
|
-
) void {
|
|
279
|
-
self.is_partitioned = false;
|
|
280
|
-
self.stability = self.options.unpartition_stability;
|
|
281
|
-
|
|
282
|
-
for (self.replicas) |_, i| {
|
|
283
|
-
self.partition[i] = false;
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
fn replicas_are_in_different_partitions(self: *Self, from: u8, to: u8) bool {
|
|
288
|
-
return from < self.options.replica_count and
|
|
289
|
-
to < self.options.replica_count and
|
|
290
|
-
self.partition[from] != self.partition[to];
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
pub fn tick(self: *Self, cluster_health: []const ReplicaHealth) void {
|
|
294
|
-
self.ticks += 1;
|
|
295
|
-
|
|
296
|
-
if (self.stability > 0) {
|
|
297
|
-
self.stability -= 1;
|
|
298
|
-
} else {
|
|
299
|
-
if (self.is_partitioned) {
|
|
300
|
-
if (self.should_unpartition()) {
|
|
301
|
-
self.unpartition_network();
|
|
302
|
-
log.err("unpartitioned network: partition={d}", .{self.partition});
|
|
303
|
-
}
|
|
304
|
-
} else {
|
|
305
|
-
if (self.options.replica_count > 1 and self.should_partition()) {
|
|
306
|
-
self.partition_network();
|
|
307
|
-
log.err("partitioned network: partition={d}", .{self.partition});
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
var from: u8 = 0;
|
|
313
|
-
while (from < self.options.node_count) : (from += 1) {
|
|
314
|
-
var to: u8 = 0;
|
|
315
|
-
while (to < self.options.node_count) : (to += 1) {
|
|
316
|
-
const path = .{ .source = from, .target = to };
|
|
317
|
-
if (self.is_clogged(path)) continue;
|
|
318
|
-
|
|
319
|
-
const queue = self.path_queue(path);
|
|
320
|
-
while (queue.peek()) |*data| {
|
|
321
|
-
if (data.expiry > self.ticks) break;
|
|
322
|
-
_ = queue.remove();
|
|
323
|
-
|
|
324
|
-
if (self.is_partitioned and
|
|
325
|
-
self.replicas_are_in_different_partitions(from, to))
|
|
326
|
-
{
|
|
327
|
-
self.stats[@enumToInt(PacketStatistics.dropped_due_to_partition)] += 1;
|
|
328
|
-
log.err("dropped packet (different partitions): from={} to={}", .{ from, to });
|
|
329
|
-
data.packet.deinit();
|
|
330
|
-
continue;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
if (self.should_drop()) {
|
|
334
|
-
self.stats[@enumToInt(PacketStatistics.dropped)] += 1;
|
|
335
|
-
log.err("dropped packet from={} to={}.", .{ from, to });
|
|
336
|
-
data.packet.deinit();
|
|
337
|
-
continue;
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
if (to < self.options.replica_count and cluster_health[to] == .down) {
|
|
341
|
-
self.stats[@enumToInt(PacketStatistics.dropped_due_to_crash)] += 1;
|
|
342
|
-
log.err("dropped packet (destination is crashed): from={} to={}", .{ from, to });
|
|
343
|
-
data.packet.deinit();
|
|
344
|
-
continue;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
if (self.should_replay()) {
|
|
348
|
-
self.submit_packet(data.packet, data.callback, path);
|
|
349
|
-
|
|
350
|
-
log.debug("replayed packet from={} to={}", .{ from, to });
|
|
351
|
-
self.stats[@enumToInt(PacketStatistics.replay)] += 1;
|
|
352
|
-
|
|
353
|
-
data.callback(data.packet, path);
|
|
354
|
-
} else {
|
|
355
|
-
log.debug("delivering packet from={} to={}", .{ from, to });
|
|
356
|
-
data.callback(data.packet, path);
|
|
357
|
-
data.packet.deinit();
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
const reverse_path: Path = .{ .source = to, .target = from };
|
|
362
|
-
|
|
363
|
-
if (self.should_clog(reverse_path)) {
|
|
364
|
-
log.debug("reverse path clogged", .{});
|
|
365
|
-
const mean = @intToFloat(f64, self.options.path_clog_duration_mean);
|
|
366
|
-
const ticks = @floatToInt(u64, mean * self.prng.random().floatExp(f64));
|
|
367
|
-
self.clog_for(reverse_path, ticks);
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
pub fn submit_packet(
|
|
374
|
-
self: *Self,
|
|
375
|
-
packet: Packet,
|
|
376
|
-
callback: fn (packet: Packet, path: Path) void,
|
|
377
|
-
path: Path,
|
|
378
|
-
) void {
|
|
379
|
-
const queue = self.path_queue(path);
|
|
380
|
-
var queue_length = queue.count();
|
|
381
|
-
if (queue_length + 1 > self.options.path_maximum_capacity) {
|
|
382
|
-
const index = self.prng.random().uintLessThanBiased(u64, queue_length);
|
|
383
|
-
const data = queue.removeIndex(index);
|
|
384
|
-
data.packet.deinit();
|
|
385
|
-
log.err("submit_packet: {} reached capacity, dropped packet={}", .{
|
|
386
|
-
path,
|
|
387
|
-
index,
|
|
388
|
-
});
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
queue.add(.{
|
|
392
|
-
.expiry = self.ticks + self.one_way_delay(),
|
|
393
|
-
.packet = packet,
|
|
394
|
-
.callback = callback,
|
|
395
|
-
}) catch unreachable;
|
|
396
|
-
}
|
|
397
|
-
};
|
|
398
|
-
}
|
|
@@ -1,169 +0,0 @@
|
|
|
1
|
-
const std = @import("std");
|
|
2
|
-
const assert = std.debug.assert;
|
|
3
|
-
const mem = std.mem;
|
|
4
|
-
|
|
5
|
-
const constants = @import("../constants.zig");
|
|
6
|
-
const vsr = @import("../vsr.zig");
|
|
7
|
-
|
|
8
|
-
const Cluster = @import("cluster.zig").Cluster;
|
|
9
|
-
const Network = @import("network.zig").Network;
|
|
10
|
-
const Storage = @import("storage.zig").Storage;
|
|
11
|
-
const Client = @import("cluster.zig").Client;
|
|
12
|
-
const Replica = @import("cluster.zig").Replica;
|
|
13
|
-
|
|
14
|
-
const message_pool = @import("../message_pool.zig");
|
|
15
|
-
const MessagePool = message_pool.MessagePool;
|
|
16
|
-
const Message = MessagePool.Message;
|
|
17
|
-
|
|
18
|
-
const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
|
|
19
|
-
|
|
20
|
-
const StateTransitions = std.AutoHashMap(u128, u64);
|
|
21
|
-
|
|
22
|
-
const log = std.log.scoped(.state_checker);
|
|
23
|
-
|
|
24
|
-
pub const StateChecker = struct {
|
|
25
|
-
/// Indexed by replica index.
|
|
26
|
-
replica_states: [constants.replicas_max]u128 = [_]u128{0} ** constants.replicas_max,
|
|
27
|
-
|
|
28
|
-
/// Keyed by committed `message.header.checksum`.
|
|
29
|
-
///
|
|
30
|
-
/// The first state is always `root_prepare.checksum`, since the root prepare doesn't
|
|
31
|
-
/// commit normally.
|
|
32
|
-
history: StateTransitions,
|
|
33
|
-
|
|
34
|
-
root: u128,
|
|
35
|
-
|
|
36
|
-
// TODO When StateChecker is owned by the Simulation, use @fieldParentPtr to get these.
|
|
37
|
-
replicas: []const Replica,
|
|
38
|
-
clients: []const Client,
|
|
39
|
-
|
|
40
|
-
/// The highest canonical state reached by the cluster.
|
|
41
|
-
state: u128 = 0,
|
|
42
|
-
|
|
43
|
-
/// The number of times the canonical state has been advanced.
|
|
44
|
-
requests_committed: u64 = 0,
|
|
45
|
-
|
|
46
|
-
pub fn init(
|
|
47
|
-
allocator: mem.Allocator,
|
|
48
|
-
cluster: u32,
|
|
49
|
-
replicas: []const Replica,
|
|
50
|
-
clients: []const Client,
|
|
51
|
-
) !StateChecker {
|
|
52
|
-
var history = StateTransitions.init(allocator);
|
|
53
|
-
errdefer history.deinit();
|
|
54
|
-
|
|
55
|
-
const root_checksum = vsr.Header.root_prepare(cluster).checksum;
|
|
56
|
-
|
|
57
|
-
var state_checker = StateChecker{
|
|
58
|
-
.history = history,
|
|
59
|
-
.root = root_checksum,
|
|
60
|
-
.replicas = replicas,
|
|
61
|
-
.clients = clients,
|
|
62
|
-
};
|
|
63
|
-
try state_checker.history.putNoClobber(root_checksum, state_checker.requests_committed);
|
|
64
|
-
|
|
65
|
-
return state_checker;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
pub fn deinit(state_checker: *StateChecker) void {
|
|
69
|
-
state_checker.history.deinit();
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
pub fn check_state(state_checker: *StateChecker, replica_index: u8) !void {
|
|
73
|
-
const replica = &state_checker.replicas[replica_index];
|
|
74
|
-
const commit_header = header: {
|
|
75
|
-
if (replica.journal.status == .recovered) {
|
|
76
|
-
const commit_header = replica.journal.header_with_op(replica.commit_min);
|
|
77
|
-
assert(commit_header != null or replica.commit_min == replica.op_checkpoint);
|
|
78
|
-
break :header replica.journal.header_with_op(replica.commit_min);
|
|
79
|
-
} else {
|
|
80
|
-
// Still recovering.
|
|
81
|
-
break :header null;
|
|
82
|
-
}
|
|
83
|
-
};
|
|
84
|
-
|
|
85
|
-
const a = state_checker.replica_states[replica_index];
|
|
86
|
-
const b = if (commit_header) |h| h.checksum else state_checker.root;
|
|
87
|
-
|
|
88
|
-
if (b == a) return;
|
|
89
|
-
state_checker.replica_states[replica_index] = b;
|
|
90
|
-
|
|
91
|
-
// If some other replica has already reached this state, then it will be in the history:
|
|
92
|
-
if (state_checker.history.get(b)) |transition| {
|
|
93
|
-
// A replica may transition more than once to the same state, for example, when
|
|
94
|
-
// restarting after a crash and replaying the log. The more important invariant is that
|
|
95
|
-
// the cluster as a whole may not transition to the same state more than once, and once
|
|
96
|
-
// transitioned may not regress.
|
|
97
|
-
log.info(
|
|
98
|
-
"{d:0>4}/{d:0>4} {x:0>32} > {x:0>32} {}",
|
|
99
|
-
.{ transition, state_checker.requests_committed, a, b, replica_index },
|
|
100
|
-
);
|
|
101
|
-
return;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
if (commit_header == null) return;
|
|
105
|
-
assert(commit_header.?.parent == a);
|
|
106
|
-
assert(commit_header.?.op > 0);
|
|
107
|
-
assert(commit_header.?.command == .prepare);
|
|
108
|
-
assert(commit_header.?.operation != .reserved);
|
|
109
|
-
|
|
110
|
-
// The replica has transitioned to state `b` that is not yet in the history.
|
|
111
|
-
// Check if this is a valid new state based on the originating client's inflight request.
|
|
112
|
-
const client = for (state_checker.clients) |*client| {
|
|
113
|
-
if (client.id == commit_header.?.client) break client;
|
|
114
|
-
} else unreachable;
|
|
115
|
-
|
|
116
|
-
if (client.request_queue.empty()) {
|
|
117
|
-
return error.ReplicaTransitionedToInvalidState;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const request = client.request_queue.head_ptr_const().?;
|
|
121
|
-
assert(request.message.header.client == commit_header.?.client);
|
|
122
|
-
assert(request.message.header.request == commit_header.?.request);
|
|
123
|
-
assert(request.message.header.command == .request);
|
|
124
|
-
assert(request.message.header.operation == commit_header.?.operation);
|
|
125
|
-
assert(request.message.header.size == commit_header.?.size);
|
|
126
|
-
// `checksum_body` will not match; the leader's StateMachine updated the timestamps in the
|
|
127
|
-
// prepare body's accounts/transfers.
|
|
128
|
-
|
|
129
|
-
const transitions_executed = state_checker.history.get(a).?;
|
|
130
|
-
if (transitions_executed < state_checker.requests_committed) {
|
|
131
|
-
return error.ReplicaSkippedInterimTransitions;
|
|
132
|
-
} else {
|
|
133
|
-
assert(transitions_executed == state_checker.requests_committed);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
state_checker.state = b;
|
|
137
|
-
state_checker.requests_committed += 1;
|
|
138
|
-
assert(state_checker.requests_committed == commit_header.?.op);
|
|
139
|
-
|
|
140
|
-
log.info(" {d:0>4} {x:0>32} > {x:0>32} {}", .{
|
|
141
|
-
state_checker.requests_committed,
|
|
142
|
-
a,
|
|
143
|
-
b,
|
|
144
|
-
replica_index,
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
state_checker.history.putNoClobber(b, state_checker.requests_committed) catch {
|
|
148
|
-
@panic("state checker unable to allocate memory for history.put()");
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
pub fn convergence(state_checker: *StateChecker) bool {
|
|
153
|
-
const a = state_checker.replica_states[0];
|
|
154
|
-
for (state_checker.replica_states[1..state_checker.replicas[0].replica_count]) |b| {
|
|
155
|
-
if (b != a) return false;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
const transitions_executed = state_checker.history.get(a).?;
|
|
159
|
-
if (transitions_executed < state_checker.requests_committed) {
|
|
160
|
-
// Cluster reached convergence but on a regressed state.
|
|
161
|
-
// A replica reached the transition limit, crashed, then repaired.
|
|
162
|
-
return false;
|
|
163
|
-
} else {
|
|
164
|
-
assert(transitions_executed == state_checker.requests_committed);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return true;
|
|
168
|
-
}
|
|
169
|
-
};
|