tigerbeetle-node 0.4.1 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -5
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +18 -16
- package/dist/index.js +35 -13
- package/dist/index.js.map +1 -1
- package/dist/test.js +13 -1
- package/dist/test.js.map +1 -1
- package/package.json +12 -12
- package/scripts/postinstall.sh +2 -2
- package/src/benchmark.ts +2 -2
- package/src/index.ts +29 -4
- package/src/node.zig +124 -21
- package/src/test.ts +18 -4
- package/src/tigerbeetle/scripts/install.sh +2 -2
- package/src/tigerbeetle/scripts/install_zig.bat +109 -0
- package/src/tigerbeetle/scripts/install_zig.sh +22 -3
- package/src/tigerbeetle/scripts/lint.zig +8 -2
- package/src/tigerbeetle/scripts/vopr.bat +48 -0
- package/src/tigerbeetle/scripts/vopr.sh +24 -4
- package/src/tigerbeetle/src/benchmark.zig +18 -14
- package/src/tigerbeetle/src/cli.zig +8 -6
- package/src/tigerbeetle/src/config.zig +10 -18
- package/src/tigerbeetle/src/demo.zig +122 -92
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +5 -3
- package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +2 -3
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +5 -3
- package/src/tigerbeetle/src/demo_04_create_transfers_two_phase_commit.zig +5 -3
- package/src/tigerbeetle/src/demo_05_accept_transfers.zig +5 -3
- package/src/tigerbeetle/src/demo_06_reject_transfers.zig +5 -3
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +7 -0
- package/src/tigerbeetle/src/fifo.zig +14 -14
- package/src/tigerbeetle/src/io/benchmark.zig +238 -0
- package/src/tigerbeetle/src/{io_darwin.zig → io/darwin.zig} +88 -121
- package/src/tigerbeetle/src/io/linux.zig +933 -0
- package/src/tigerbeetle/src/io/test.zig +621 -0
- package/src/tigerbeetle/src/io.zig +7 -1322
- package/src/tigerbeetle/src/main.zig +22 -13
- package/src/tigerbeetle/src/message_bus.zig +50 -61
- package/src/tigerbeetle/src/message_pool.zig +6 -5
- package/src/tigerbeetle/src/ring_buffer.zig +135 -68
- package/src/tigerbeetle/src/simulator.zig +120 -47
- package/src/tigerbeetle/src/state_machine.zig +853 -27
- package/src/tigerbeetle/src/storage.zig +51 -48
- package/src/tigerbeetle/src/test/cluster.zig +90 -14
- package/src/tigerbeetle/src/test/message_bus.zig +7 -10
- package/src/tigerbeetle/src/test/network.zig +5 -5
- package/src/tigerbeetle/src/test/packet_simulator.zig +188 -32
- package/src/tigerbeetle/src/test/state_checker.zig +3 -3
- package/src/tigerbeetle/src/test/state_machine.zig +6 -4
- package/src/tigerbeetle/src/test/storage.zig +322 -26
- package/src/tigerbeetle/src/test/time.zig +2 -2
- package/src/tigerbeetle/src/tigerbeetle.zig +6 -129
- package/src/tigerbeetle/src/time.zig +6 -5
- package/src/tigerbeetle/src/unit_tests.zig +14 -0
- package/src/tigerbeetle/src/{vr → vsr}/client.zig +21 -21
- package/src/tigerbeetle/src/{vr → vsr}/clock.zig +34 -48
- package/src/tigerbeetle/src/{vr → vsr}/journal.zig +259 -61
- package/src/tigerbeetle/src/{marzullo.zig → vsr/marzullo.zig} +6 -3
- package/src/tigerbeetle/src/{vr → vsr}/replica.zig +711 -349
- package/src/tigerbeetle/src/{vr.zig → vsr.zig} +32 -25
- package/src/translate.zig +55 -55
- package/src/tigerbeetle/src/fixed_array_list.zig +0 -53
- package/src/tigerbeetle/src/io_async.zig +0 -600
- package/src/tigerbeetle/src/test_client.zig +0 -41
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
const std = @import("std");
|
|
2
|
+
const builtin = @import("builtin");
|
|
2
3
|
const os = std.os;
|
|
3
4
|
const Allocator = std.mem.Allocator;
|
|
4
5
|
const assert = std.debug.assert;
|
|
5
6
|
const log = std.log.scoped(.storage);
|
|
6
7
|
|
|
7
8
|
const IO = @import("io.zig").IO;
|
|
8
|
-
const is_darwin =
|
|
9
|
+
const is_darwin = builtin.target.isDarwin();
|
|
9
10
|
|
|
10
11
|
const config = @import("config.zig");
|
|
11
|
-
const
|
|
12
|
+
const vsr = @import("vsr.zig");
|
|
12
13
|
|
|
13
14
|
pub const Storage = struct {
|
|
14
15
|
/// See usage in Journal.write_sectors() for details.
|
|
@@ -90,7 +91,7 @@ pub const Storage = struct {
|
|
|
90
91
|
buffer: []u8,
|
|
91
92
|
offset: u64,
|
|
92
93
|
) void {
|
|
93
|
-
|
|
94
|
+
assert_alignment(buffer, offset);
|
|
94
95
|
|
|
95
96
|
read.* = .{
|
|
96
97
|
.completion = undefined,
|
|
@@ -169,7 +170,7 @@ pub const Storage = struct {
|
|
|
169
170
|
assert(target.len > 0);
|
|
170
171
|
std.mem.set(u8, target, 0);
|
|
171
172
|
|
|
172
|
-
// We could set `read.target_max` to `
|
|
173
|
+
// We could set `read.target_max` to `vsr.sector_ceil(read.buffer.len)` here
|
|
173
174
|
// in order to restart our pseudo-binary search on the rest of the sectors to be
|
|
174
175
|
// read, optimistically assuming that this is the last failing sector.
|
|
175
176
|
// However, data corruption that causes EIO errors often has spacial locality.
|
|
@@ -189,7 +190,7 @@ pub const Storage = struct {
|
|
|
189
190
|
error.Unseekable,
|
|
190
191
|
error.Unexpected,
|
|
191
192
|
=> {
|
|
192
|
-
log.
|
|
193
|
+
log.err(
|
|
193
194
|
"impossible read: offset={} buffer.len={} error={s}",
|
|
194
195
|
.{ read.offset, read.buffer.len, @errorName(err) },
|
|
195
196
|
);
|
|
@@ -201,7 +202,7 @@ pub const Storage = struct {
|
|
|
201
202
|
// We tried to read more than there really is available to read.
|
|
202
203
|
// In other words, we thought we could read beyond the end of the file descriptor.
|
|
203
204
|
// This can happen if the data file inode `size` was truncated or corrupted.
|
|
204
|
-
log.
|
|
205
|
+
log.err(
|
|
205
206
|
"short read: buffer.len={} offset={} bytes_read={}",
|
|
206
207
|
.{ read.offset, read.buffer.len, bytes_read },
|
|
207
208
|
);
|
|
@@ -227,7 +228,7 @@ pub const Storage = struct {
|
|
|
227
228
|
buffer: []const u8,
|
|
228
229
|
offset: u64,
|
|
229
230
|
) void {
|
|
230
|
-
|
|
231
|
+
assert_alignment(buffer, offset);
|
|
231
232
|
|
|
232
233
|
write.* = .{
|
|
233
234
|
.completion = undefined,
|
|
@@ -262,7 +263,7 @@ pub const Storage = struct {
|
|
|
262
263
|
// TODO: It seems like it might be possible for some filesystems to return ETIMEDOUT
|
|
263
264
|
// here. Consider handling this without panicking.
|
|
264
265
|
else => {
|
|
265
|
-
log.
|
|
266
|
+
log.err(
|
|
266
267
|
"impossible write: offset={} buffer.len={} error={s}",
|
|
267
268
|
.{ write.offset, write.buffer.len, @errorName(err) },
|
|
268
269
|
);
|
|
@@ -295,7 +296,7 @@ pub const Storage = struct {
|
|
|
295
296
|
/// If this is not the case, then the underlying syscall will return EINVAL.
|
|
296
297
|
/// We check this only at the start of a read or write because the physical sector size may be
|
|
297
298
|
/// less than our logical sector size so that partial IOs then leave us no longer aligned.
|
|
298
|
-
fn assert_alignment(
|
|
299
|
+
fn assert_alignment(buffer: []const u8, offset: u64) void {
|
|
299
300
|
assert(@ptrToInt(buffer.ptr) % config.sector_size == 0);
|
|
300
301
|
assert(buffer.len % config.sector_size == 0);
|
|
301
302
|
assert(offset % config.sector_size == 0);
|
|
@@ -330,17 +331,17 @@ pub const Storage = struct {
|
|
|
330
331
|
// TODO Use O_EXCL when opening as a block device to obtain a mandatory exclusive lock.
|
|
331
332
|
// This is much stronger than an advisory exclusive lock, and is required on some platforms.
|
|
332
333
|
|
|
333
|
-
var flags: u32 = os.
|
|
334
|
+
var flags: u32 = os.O.CLOEXEC | os.O.RDWR | os.O.DSYNC;
|
|
334
335
|
var mode: os.mode_t = 0;
|
|
335
336
|
|
|
336
337
|
// TODO Document this and investigate whether this is in fact correct to set here.
|
|
337
|
-
if (@hasDecl(os, "O_LARGEFILE")) flags |= os.
|
|
338
|
+
if (@hasDecl(os, "O_LARGEFILE")) flags |= os.O.LARGEFILE;
|
|
338
339
|
|
|
339
340
|
var direct_io_supported = false;
|
|
340
341
|
if (config.direct_io) {
|
|
341
342
|
direct_io_supported = try Storage.fs_supports_direct_io(dir_fd);
|
|
342
343
|
if (direct_io_supported) {
|
|
343
|
-
if (!is_darwin) flags |= os.
|
|
344
|
+
if (!is_darwin) flags |= os.O.DIRECT;
|
|
344
345
|
} else if (config.deployment_environment == .development) {
|
|
345
346
|
log.warn("file system does not support Direct I/O", .{});
|
|
346
347
|
} else {
|
|
@@ -352,15 +353,15 @@ pub const Storage = struct {
|
|
|
352
353
|
|
|
353
354
|
if (must_create) {
|
|
354
355
|
log.info("creating \"{s}\"...", .{relative_path});
|
|
355
|
-
flags |= os.
|
|
356
|
-
flags |= os.
|
|
356
|
+
flags |= os.O.CREAT;
|
|
357
|
+
flags |= os.O.EXCL;
|
|
357
358
|
mode = 0o666;
|
|
358
359
|
} else {
|
|
359
360
|
log.info("opening \"{s}\"...", .{relative_path});
|
|
360
361
|
}
|
|
361
362
|
|
|
362
363
|
// This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
|
|
363
|
-
assert((flags & os.
|
|
364
|
+
assert((flags & os.O.DSYNC) > 0);
|
|
364
365
|
|
|
365
366
|
// Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
|
|
366
367
|
assert(!std.fs.path.isAbsolute(relative_path));
|
|
@@ -372,12 +373,12 @@ pub const Storage = struct {
|
|
|
372
373
|
|
|
373
374
|
// On darwin, use F_NOCACHE on direct_io to disable the page cache as O_DIRECT doesn't exit.
|
|
374
375
|
if (is_darwin and config.direct_io and direct_io_supported) {
|
|
375
|
-
_ = try os.fcntl(fd, os.
|
|
376
|
+
_ = try os.fcntl(fd, os.F.NOCACHE, 1);
|
|
376
377
|
}
|
|
377
378
|
|
|
378
379
|
// Obtain an advisory exclusive lock that works only if all processes actually use flock().
|
|
379
380
|
// LOCK_NB means that we want to fail the lock without waiting if another process has it.
|
|
380
|
-
os.flock(fd, os.
|
|
381
|
+
os.flock(fd, os.LOCK.EX | os.LOCK.NB) catch |err| switch (err) {
|
|
381
382
|
error.WouldBlock => @panic("another process holds the data file lock"),
|
|
382
383
|
else => return err,
|
|
383
384
|
};
|
|
@@ -411,7 +412,7 @@ pub const Storage = struct {
|
|
|
411
412
|
Storage.fallocate(fd, 0, 0, @intCast(i64, size)) catch |err| switch (err) {
|
|
412
413
|
error.OperationNotSupported => {
|
|
413
414
|
log.warn("file system does not support fallocate(), an ENOSPC will panic", .{});
|
|
414
|
-
log.
|
|
415
|
+
log.info("allocating by writing to the last sector of the file instead...", .{});
|
|
415
416
|
|
|
416
417
|
const sector_size = config.sector_size;
|
|
417
418
|
const sector: [sector_size]u8 align(sector_size) = [_]u8{0} ** sector_size;
|
|
@@ -436,6 +437,8 @@ pub const Storage = struct {
|
|
|
436
437
|
const F_ALLOCATEALL = 0x4; // allocate all or nothing
|
|
437
438
|
const F_PEOFPOSMODE = 3; // use relative offset from the seek pos mode
|
|
438
439
|
const F_VOLPOSMODE = 4; // use the specified volume offset
|
|
440
|
+
_ = F_VOLPOSMODE;
|
|
441
|
+
|
|
439
442
|
const fstore_t = extern struct {
|
|
440
443
|
fst_flags: c_uint,
|
|
441
444
|
fst_posmode: c_int,
|
|
@@ -453,24 +456,24 @@ pub const Storage = struct {
|
|
|
453
456
|
};
|
|
454
457
|
|
|
455
458
|
// try to pre-allocate contiguous space and fall back to default non-continugous
|
|
456
|
-
var res = os.system.fcntl(fd, os.
|
|
457
|
-
if (os.errno(res) !=
|
|
459
|
+
var res = os.system.fcntl(fd, os.F.PREALLOCATE, @ptrToInt(&store));
|
|
460
|
+
if (os.errno(res) != .SUCCESS) {
|
|
458
461
|
store.fst_flags = F_ALLOCATEALL;
|
|
459
|
-
res = os.system.fcntl(fd, os.
|
|
462
|
+
res = os.system.fcntl(fd, os.F.PREALLOCATE, @ptrToInt(&store));
|
|
460
463
|
}
|
|
461
464
|
|
|
462
465
|
switch (os.errno(res)) {
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
466
|
+
.SUCCESS => {},
|
|
467
|
+
.ACCES => unreachable, // F_SETLK or F_SETSIZE of F_WRITEBOOTSTRAP
|
|
468
|
+
.BADF => return error.FileDescriptorInvalid,
|
|
469
|
+
.DEADLK => unreachable, // F_SETLKW
|
|
470
|
+
.INTR => unreachable, // F_SETLKW
|
|
471
|
+
.INVAL => return error.ArgumentsInvalid, // for F_PREALLOCATE (offset invalid)
|
|
472
|
+
.MFILE => unreachable, // F_DUPFD or F_DUPED
|
|
473
|
+
.NOLCK => unreachable, // F_SETLK or F_SETLKW
|
|
474
|
+
.OVERFLOW => return error.FileTooBig,
|
|
475
|
+
.SRCH => unreachable, // F_SETOWN
|
|
476
|
+
.OPNOTSUPP => return error.OperationNotSupported, // not reported but need same error union
|
|
474
477
|
else => |errno| return os.unexpectedErrno(errno),
|
|
475
478
|
}
|
|
476
479
|
|
|
@@ -484,19 +487,19 @@ pub const Storage = struct {
|
|
|
484
487
|
while (true) {
|
|
485
488
|
const rc = os.linux.fallocate(fd, mode, offset, length);
|
|
486
489
|
switch (os.linux.getErrno(rc)) {
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
490
|
+
.SUCCESS => return,
|
|
491
|
+
.BADF => return error.FileDescriptorInvalid,
|
|
492
|
+
.FBIG => return error.FileTooBig,
|
|
493
|
+
.INTR => continue,
|
|
494
|
+
.INVAL => return error.ArgumentsInvalid,
|
|
495
|
+
.IO => return error.InputOutput,
|
|
496
|
+
.NODEV => return error.NoDevice,
|
|
497
|
+
.NOSPC => return error.NoSpaceLeft,
|
|
498
|
+
.NOSYS => return error.SystemOutdated,
|
|
499
|
+
.OPNOTSUPP => return error.OperationNotSupported,
|
|
500
|
+
.PERM => return error.PermissionDenied,
|
|
501
|
+
.SPIPE => return error.Unseekable,
|
|
502
|
+
.TXTBSY => return error.FileBusy,
|
|
500
503
|
else => |errno| return os.unexpectedErrno(errno),
|
|
501
504
|
}
|
|
502
505
|
}
|
|
@@ -509,18 +512,18 @@ pub const Storage = struct {
|
|
|
509
512
|
|
|
510
513
|
const path = "fs_supports_direct_io";
|
|
511
514
|
const dir = std.fs.Dir{ .fd = dir_fd };
|
|
512
|
-
const fd = try os.openatZ(dir_fd, path, os.
|
|
515
|
+
const fd = try os.openatZ(dir_fd, path, os.O.CLOEXEC | os.O.CREAT | os.O.TRUNC, 0o666);
|
|
513
516
|
defer os.close(fd);
|
|
514
517
|
defer dir.deleteFile(path) catch {};
|
|
515
518
|
|
|
516
519
|
// F_NOCACHE on darwin is the most similar option to O_DIRECT on linux.
|
|
517
520
|
if (is_darwin) {
|
|
518
|
-
_ = os.fcntl(fd, os.
|
|
521
|
+
_ = os.fcntl(fd, os.F.NOCACHE, 1) catch return false;
|
|
519
522
|
return true;
|
|
520
523
|
}
|
|
521
524
|
|
|
522
525
|
while (true) {
|
|
523
|
-
const res = os.system.openat(dir_fd, path, os.
|
|
526
|
+
const res = os.system.openat(dir_fd, path, os.O.CLOEXEC | os.O.RDONLY | os.O.DIRECT, 0);
|
|
524
527
|
switch (os.linux.getErrno(res)) {
|
|
525
528
|
0 => {
|
|
526
529
|
os.close(@intCast(os.fd_t, res));
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
2
3
|
const mem = std.mem;
|
|
3
4
|
|
|
4
5
|
const config = @import("../config.zig");
|
|
5
6
|
|
|
6
7
|
const StateChecker = @import("state_checker.zig").StateChecker;
|
|
7
8
|
|
|
9
|
+
const MessagePool = @import("../message_pool.zig").MessagePool;
|
|
10
|
+
const Message = MessagePool.Message;
|
|
11
|
+
|
|
8
12
|
const Network = @import("network.zig").Network;
|
|
9
13
|
const NetworkOptions = @import("network.zig").NetworkOptions;
|
|
10
14
|
|
|
@@ -13,9 +17,9 @@ const MessageBus = @import("message_bus.zig").MessageBus;
|
|
|
13
17
|
const Storage = @import("storage.zig").Storage;
|
|
14
18
|
const Time = @import("time.zig").Time;
|
|
15
19
|
|
|
16
|
-
const
|
|
17
|
-
pub const Replica =
|
|
18
|
-
pub const Client =
|
|
20
|
+
const vsr = @import("../vsr.zig");
|
|
21
|
+
pub const Replica = vsr.Replica(StateMachine, MessageBus, Storage, Time);
|
|
22
|
+
pub const Client = vsr.Client(StateMachine, MessageBus);
|
|
19
23
|
|
|
20
24
|
pub const ClusterOptions = struct {
|
|
21
25
|
cluster: u32,
|
|
@@ -26,22 +30,26 @@ pub const ClusterOptions = struct {
|
|
|
26
30
|
seed: u64,
|
|
27
31
|
|
|
28
32
|
network_options: NetworkOptions,
|
|
33
|
+
storage_options: Storage.Options,
|
|
29
34
|
};
|
|
30
35
|
|
|
31
36
|
pub const Cluster = struct {
|
|
32
|
-
allocator:
|
|
37
|
+
allocator: mem.Allocator,
|
|
33
38
|
options: ClusterOptions,
|
|
34
39
|
|
|
35
40
|
state_machines: []StateMachine,
|
|
36
41
|
storages: []Storage,
|
|
42
|
+
times: []Time,
|
|
37
43
|
replicas: []Replica,
|
|
38
44
|
|
|
39
45
|
clients: []Client,
|
|
40
46
|
network: Network,
|
|
41
47
|
|
|
48
|
+
// TODO: Initializing these fields in main() is a bit ugly
|
|
42
49
|
state_checker: StateChecker = undefined,
|
|
50
|
+
on_change_state: fn (replica: *Replica) void = undefined,
|
|
43
51
|
|
|
44
|
-
pub fn create(allocator:
|
|
52
|
+
pub fn create(allocator: mem.Allocator, prng: std.rand.Random, options: ClusterOptions) !*Cluster {
|
|
45
53
|
const cluster = try allocator.create(Cluster);
|
|
46
54
|
errdefer allocator.destroy(cluster);
|
|
47
55
|
|
|
@@ -52,6 +60,9 @@ pub const Cluster = struct {
|
|
|
52
60
|
const storages = try allocator.alloc(Storage, options.replica_count);
|
|
53
61
|
errdefer allocator.free(storages);
|
|
54
62
|
|
|
63
|
+
const times = try allocator.alloc(Time, options.replica_count);
|
|
64
|
+
errdefer allocator.free(times);
|
|
65
|
+
|
|
55
66
|
const replicas = try allocator.alloc(Replica, options.replica_count);
|
|
56
67
|
errdefer allocator.free(replicas);
|
|
57
68
|
|
|
@@ -71,21 +82,31 @@ pub const Cluster = struct {
|
|
|
71
82
|
.options = options,
|
|
72
83
|
.state_machines = state_machines,
|
|
73
84
|
.storages = storages,
|
|
85
|
+
.times = times,
|
|
74
86
|
.replicas = replicas,
|
|
75
87
|
.clients = clients,
|
|
76
88
|
.network = network,
|
|
77
89
|
};
|
|
78
90
|
}
|
|
79
91
|
|
|
92
|
+
var buffer: [config.replicas_max]Storage.FaultyAreas = undefined;
|
|
93
|
+
const faulty_areas = Storage.generate_faulty_areas(prng, config.journal_size_max, options.replica_count, &buffer);
|
|
94
|
+
|
|
80
95
|
for (cluster.replicas) |*replica, replica_index| {
|
|
81
|
-
|
|
96
|
+
cluster.times[replica_index] = .{
|
|
82
97
|
.resolution = config.tick_ms * std.time.ns_per_ms,
|
|
83
98
|
.offset_type = .linear,
|
|
84
99
|
.offset_coefficient_A = 0,
|
|
85
100
|
.offset_coefficient_B = 0,
|
|
86
101
|
};
|
|
87
102
|
cluster.state_machines[replica_index] = StateMachine.init(options.seed);
|
|
88
|
-
cluster.storages[replica_index] = try Storage.init(
|
|
103
|
+
cluster.storages[replica_index] = try Storage.init(
|
|
104
|
+
allocator,
|
|
105
|
+
config.journal_size_max,
|
|
106
|
+
options.storage_options,
|
|
107
|
+
@intCast(u8, replica_index),
|
|
108
|
+
faulty_areas[replica_index],
|
|
109
|
+
);
|
|
89
110
|
const message_bus = try cluster.network.init_message_bus(
|
|
90
111
|
options.cluster,
|
|
91
112
|
.{ .replica = @intCast(u8, replica_index) },
|
|
@@ -96,7 +117,7 @@ pub const Cluster = struct {
|
|
|
96
117
|
options.cluster,
|
|
97
118
|
options.replica_count,
|
|
98
119
|
@intCast(u8, replica_index),
|
|
99
|
-
|
|
120
|
+
&cluster.times[replica_index],
|
|
100
121
|
&cluster.storages[replica_index],
|
|
101
122
|
message_bus,
|
|
102
123
|
&cluster.state_machines[replica_index],
|
|
@@ -127,13 +148,10 @@ pub const Cluster = struct {
|
|
|
127
148
|
for (cluster.clients) |*client| client.deinit();
|
|
128
149
|
cluster.allocator.free(cluster.clients);
|
|
129
150
|
|
|
130
|
-
for (cluster.replicas) |*replica| replica.deinit();
|
|
151
|
+
for (cluster.replicas) |*replica| replica.deinit(cluster.allocator);
|
|
131
152
|
cluster.allocator.free(cluster.replicas);
|
|
132
153
|
|
|
133
|
-
for (cluster.
|
|
134
|
-
cluster.allocator.free(cluster.state_machines);
|
|
135
|
-
|
|
136
|
-
for (cluster.storages) |*storage| storage.deinit();
|
|
154
|
+
for (cluster.storages) |*storage| storage.deinit(cluster.allocator);
|
|
137
155
|
cluster.allocator.free(cluster.storages);
|
|
138
156
|
|
|
139
157
|
cluster.network.deinit();
|
|
@@ -141,5 +159,63 @@ pub const Cluster = struct {
|
|
|
141
159
|
cluster.allocator.destroy(cluster);
|
|
142
160
|
}
|
|
143
161
|
|
|
144
|
-
|
|
162
|
+
/// Reset a replica to its initial state, simulating a random crash/panic.
|
|
163
|
+
/// Leave the persistent storage untouched, and leave any currently
|
|
164
|
+
/// inflight messages to/from the replica in the network.
|
|
165
|
+
pub fn simulate_replica_crash(cluster: *Cluster, replica_index: u8) !void {
|
|
166
|
+
const replica = &cluster.replicas[replica_index];
|
|
167
|
+
replica.deinit(cluster.allocator);
|
|
168
|
+
|
|
169
|
+
cluster.storages[replica_index].reset();
|
|
170
|
+
cluster.state_machines[replica_index] = StateMachine.init(cluster.options.seed);
|
|
171
|
+
|
|
172
|
+
// The message bus and network should be left alone, as messages
|
|
173
|
+
// may still be inflight to/from this replica. However, we should
|
|
174
|
+
// do a check to ensure that we aren't leaking any messages when
|
|
175
|
+
// deinitializing the replica above.
|
|
176
|
+
const packet_simulator = &cluster.network.packet_simulator;
|
|
177
|
+
// The same message may be used for multiple network packets, so simply counting how
|
|
178
|
+
// many packets are inflight from the replica is insufficient, we need to dedup them.
|
|
179
|
+
var messages_in_network_set = std.AutoHashMap(*Message, void).init(cluster.allocator);
|
|
180
|
+
defer messages_in_network_set.deinit();
|
|
181
|
+
|
|
182
|
+
var target: u8 = 0;
|
|
183
|
+
while (target < packet_simulator.options.node_count) : (target += 1) {
|
|
184
|
+
const path = .{ .source = replica_index, .target = target };
|
|
185
|
+
const queue = packet_simulator.path_queue(path);
|
|
186
|
+
var it = queue.iterator();
|
|
187
|
+
while (it.next()) |data| {
|
|
188
|
+
try messages_in_network_set.put(data.packet.message, {});
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const messages_in_network = messages_in_network_set.count();
|
|
193
|
+
|
|
194
|
+
var messages_in_pool: usize = 0;
|
|
195
|
+
const message_bus = cluster.network.get_message_bus(.{ .replica = replica_index });
|
|
196
|
+
{
|
|
197
|
+
var it = message_bus.pool.free_list;
|
|
198
|
+
while (it) |message| : (it = message.next) messages_in_pool += 1;
|
|
199
|
+
}
|
|
200
|
+
{
|
|
201
|
+
var it = message_bus.pool.header_only_free_list;
|
|
202
|
+
while (it) |message| : (it = message.next) messages_in_pool += 1;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const total_messages = config.message_bus_messages_max + config.message_bus_headers_max;
|
|
206
|
+
assert(messages_in_network + messages_in_pool == total_messages);
|
|
207
|
+
|
|
208
|
+
replica.* = try Replica.init(
|
|
209
|
+
cluster.allocator,
|
|
210
|
+
cluster.options.cluster,
|
|
211
|
+
cluster.options.replica_count,
|
|
212
|
+
@intCast(u8, replica_index),
|
|
213
|
+
&cluster.times[replica_index],
|
|
214
|
+
&cluster.storages[replica_index],
|
|
215
|
+
message_bus,
|
|
216
|
+
&cluster.state_machines[replica_index],
|
|
217
|
+
);
|
|
218
|
+
message_bus.set_on_message(*Replica, replica, Replica.on_message);
|
|
219
|
+
replica.on_change_state = cluster.on_change_state;
|
|
220
|
+
}
|
|
145
221
|
};
|
|
@@ -5,7 +5,7 @@ const config = @import("../config.zig");
|
|
|
5
5
|
|
|
6
6
|
const MessagePool = @import("../message_pool.zig").MessagePool;
|
|
7
7
|
const Message = MessagePool.Message;
|
|
8
|
-
const Header = @import("../
|
|
8
|
+
const Header = @import("../vsr.zig").Header;
|
|
9
9
|
|
|
10
10
|
const Network = @import("network.zig").Network;
|
|
11
11
|
|
|
@@ -25,11 +25,11 @@ pub const MessageBus = struct {
|
|
|
25
25
|
|
|
26
26
|
/// The callback to be called when a message is received. Use set_on_message() to set
|
|
27
27
|
/// with type safety for the context pointer.
|
|
28
|
-
on_message_callback: ?fn (context: ?*
|
|
29
|
-
on_message_context: ?*
|
|
28
|
+
on_message_callback: ?fn (context: ?*anyopaque, message: *Message) void = null,
|
|
29
|
+
on_message_context: ?*anyopaque = null,
|
|
30
30
|
|
|
31
31
|
pub fn init(
|
|
32
|
-
allocator:
|
|
32
|
+
allocator: std.mem.Allocator,
|
|
33
33
|
cluster: u32,
|
|
34
34
|
process: Process,
|
|
35
35
|
network: *Network,
|
|
@@ -43,7 +43,7 @@ pub const MessageBus = struct {
|
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
/// TODO
|
|
46
|
-
pub fn deinit(
|
|
46
|
+
pub fn deinit(_: *MessageBus) void {}
|
|
47
47
|
|
|
48
48
|
pub fn set_on_message(
|
|
49
49
|
bus: *MessageBus,
|
|
@@ -51,18 +51,15 @@ pub const MessageBus = struct {
|
|
|
51
51
|
context: Context,
|
|
52
52
|
comptime on_message: fn (context: Context, message: *Message) void,
|
|
53
53
|
) void {
|
|
54
|
-
assert(bus.on_message_callback == null);
|
|
55
|
-
assert(bus.on_message_context == null);
|
|
56
|
-
|
|
57
54
|
bus.on_message_callback = struct {
|
|
58
|
-
fn wrapper(_context: ?*
|
|
55
|
+
fn wrapper(_context: ?*anyopaque, message: *Message) void {
|
|
59
56
|
on_message(@intToPtr(Context, @ptrToInt(_context)), message);
|
|
60
57
|
}
|
|
61
58
|
}.wrapper;
|
|
62
59
|
bus.on_message_context = context;
|
|
63
60
|
}
|
|
64
61
|
|
|
65
|
-
pub fn tick(
|
|
62
|
+
pub fn tick(_: *MessageBus) void {}
|
|
66
63
|
|
|
67
64
|
pub fn get_message(bus: *MessageBus) ?*Message {
|
|
68
65
|
return bus.pool.get_message();
|
|
@@ -4,7 +4,7 @@ const mem = std.mem;
|
|
|
4
4
|
const assert = std.debug.assert;
|
|
5
5
|
|
|
6
6
|
const config = @import("../config.zig");
|
|
7
|
-
const
|
|
7
|
+
const vsr = @import("../vsr.zig");
|
|
8
8
|
|
|
9
9
|
const MessagePool = @import("../message_pool.zig").MessagePool;
|
|
10
10
|
const Message = MessagePool.Message;
|
|
@@ -38,7 +38,7 @@ pub const Network = struct {
|
|
|
38
38
|
target: Process,
|
|
39
39
|
};
|
|
40
40
|
|
|
41
|
-
allocator:
|
|
41
|
+
allocator: std.mem.Allocator,
|
|
42
42
|
|
|
43
43
|
options: NetworkOptions,
|
|
44
44
|
packet_simulator: PacketSimulator(Packet),
|
|
@@ -47,7 +47,7 @@ pub const Network = struct {
|
|
|
47
47
|
processes: std.ArrayListUnmanaged(u128),
|
|
48
48
|
|
|
49
49
|
pub fn init(
|
|
50
|
-
allocator:
|
|
50
|
+
allocator: std.mem.Allocator,
|
|
51
51
|
replica_count: u8,
|
|
52
52
|
client_count: u8,
|
|
53
53
|
options: NetworkOptions,
|
|
@@ -136,7 +136,7 @@ pub const Network = struct {
|
|
|
136
136
|
|
|
137
137
|
fn deliver_message(packet: Packet, path: PacketSimulatorPath) void {
|
|
138
138
|
const network = packet.network;
|
|
139
|
-
|
|
139
|
+
|
|
140
140
|
const target_bus = &network.busses.items[path.target];
|
|
141
141
|
|
|
142
142
|
const message = target_bus.get_message() orelse {
|
|
@@ -159,7 +159,7 @@ pub const Network = struct {
|
|
|
159
159
|
});
|
|
160
160
|
|
|
161
161
|
if (message.header.command == .request or message.header.command == .prepare) {
|
|
162
|
-
const sector_ceil =
|
|
162
|
+
const sector_ceil = vsr.sector_ceil(message.header.size);
|
|
163
163
|
if (message.header.size != sector_ceil) {
|
|
164
164
|
assert(message.header.size < sector_ceil);
|
|
165
165
|
assert(message.buffer.len == config.message_size_max + config.sector_size);
|