tigerbeetle-node 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -4
- package/package.json +1 -1
- package/src/node.zig +2 -12
- package/src/tigerbeetle/scripts/benchmark.bat +46 -0
- package/src/tigerbeetle/scripts/install_zig.bat +2 -2
- package/src/tigerbeetle/scripts/install_zig.sh +1 -1
- package/src/tigerbeetle/scripts/vopr.sh +2 -2
- package/src/tigerbeetle/src/benchmark.zig +2 -6
- package/src/tigerbeetle/src/cli.zig +39 -18
- package/src/tigerbeetle/src/config.zig +24 -9
- package/src/tigerbeetle/src/demo.zig +1 -1
- package/src/tigerbeetle/src/io/benchmark.zig +24 -49
- package/src/tigerbeetle/src/io/darwin.zig +175 -44
- package/src/tigerbeetle/src/io/linux.zig +177 -72
- package/src/tigerbeetle/src/io/test.zig +61 -39
- package/src/tigerbeetle/src/io/windows.zig +1161 -0
- package/src/tigerbeetle/src/io.zig +2 -0
- package/src/tigerbeetle/src/main.zig +13 -8
- package/src/tigerbeetle/src/message_bus.zig +49 -61
- package/src/tigerbeetle/src/message_pool.zig +63 -57
- package/src/tigerbeetle/src/ring_buffer.zig +7 -0
- package/src/tigerbeetle/src/simulator.zig +4 -4
- package/src/tigerbeetle/src/storage.zig +0 -230
- package/src/tigerbeetle/src/test/cluster.zig +3 -6
- package/src/tigerbeetle/src/test/message_bus.zig +4 -3
- package/src/tigerbeetle/src/test/network.zig +13 -16
- package/src/tigerbeetle/src/test/state_checker.zig +3 -2
- package/src/tigerbeetle/src/tigerbeetle.zig +5 -3
- package/src/tigerbeetle/src/time.zig +58 -11
- package/src/tigerbeetle/src/vsr/client.zig +18 -32
- package/src/tigerbeetle/src/vsr/clock.zig +1 -1
- package/src/tigerbeetle/src/vsr/journal.zig +2 -6
- package/src/tigerbeetle/src/vsr/replica.zig +146 -169
- package/src/tigerbeetle/src/vsr.zig +263 -5
|
@@ -6,9 +6,11 @@ const os = std.os;
|
|
|
6
6
|
const FIFO = @import("fifo.zig").FIFO;
|
|
7
7
|
const IO_Linux = @import("io/linux.zig").IO;
|
|
8
8
|
const IO_Darwin = @import("io/darwin.zig").IO;
|
|
9
|
+
const IO_Windows = @import("io/windows.zig").IO;
|
|
9
10
|
|
|
10
11
|
pub const IO = switch (builtin.target.os.tag) {
|
|
11
12
|
.linux => IO_Linux,
|
|
13
|
+
.windows => IO_Windows,
|
|
12
14
|
.macos, .tvos, .watchos, .ios => IO_Darwin,
|
|
13
15
|
else => @compileError("IO is not supported for platform"),
|
|
14
16
|
};
|
|
@@ -20,14 +20,18 @@ const vsr = @import("vsr.zig");
|
|
|
20
20
|
const Replica = vsr.Replica(StateMachine, MessageBus, Storage, Time);
|
|
21
21
|
|
|
22
22
|
pub fn main() !void {
|
|
23
|
+
var io = try IO.init(128, 0);
|
|
24
|
+
defer io.deinit();
|
|
25
|
+
|
|
23
26
|
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
24
27
|
defer arena.deinit();
|
|
25
28
|
|
|
26
29
|
const allocator = arena.allocator();
|
|
27
30
|
|
|
28
|
-
switch (cli.parse_args(allocator)) {
|
|
29
|
-
.init => |args| try init(args.cluster, args.replica, args.dir_fd),
|
|
31
|
+
switch (try cli.parse_args(allocator)) {
|
|
32
|
+
.init => |args| try init(&io, args.cluster, args.replica, args.dir_fd),
|
|
30
33
|
.start => |args| try start(
|
|
34
|
+
&io,
|
|
31
35
|
allocator,
|
|
32
36
|
args.cluster,
|
|
33
37
|
args.replica,
|
|
@@ -42,14 +46,14 @@ const filename_fmt = "cluster_{d:0>10}_replica_{d:0>3}.tigerbeetle";
|
|
|
42
46
|
const filename_len = fmt.count(filename_fmt, .{ 0, 0 });
|
|
43
47
|
|
|
44
48
|
/// Create a .tigerbeetle data file for the given args and exit
|
|
45
|
-
fn init(cluster: u32, replica: u8, dir_fd: os.fd_t) !void {
|
|
49
|
+
fn init(io: *IO, cluster: u32, replica: u8, dir_fd: os.fd_t) !void {
|
|
46
50
|
// Add 1 for the terminating null byte
|
|
47
51
|
var buffer: [filename_len + 1]u8 = undefined;
|
|
48
52
|
const filename = fmt.bufPrintZ(&buffer, filename_fmt, .{ cluster, replica }) catch unreachable;
|
|
49
53
|
assert(filename.len == filename_len);
|
|
50
54
|
|
|
51
55
|
// TODO Expose data file size on the CLI.
|
|
52
|
-
_ = try
|
|
56
|
+
_ = try io.open_file(
|
|
53
57
|
dir_fd,
|
|
54
58
|
filename,
|
|
55
59
|
config.journal_size_max, // TODO Double-check that we have space for redundant headers.
|
|
@@ -61,6 +65,7 @@ fn init(cluster: u32, replica: u8, dir_fd: os.fd_t) !void {
|
|
|
61
65
|
|
|
62
66
|
/// Run as a replica server defined by the given args
|
|
63
67
|
fn start(
|
|
68
|
+
io: *IO,
|
|
64
69
|
allocator: mem.Allocator,
|
|
65
70
|
cluster: u32,
|
|
66
71
|
replica_index: u8,
|
|
@@ -75,26 +80,26 @@ fn start(
|
|
|
75
80
|
assert(filename.len == filename_len);
|
|
76
81
|
|
|
77
82
|
// TODO Expose data file size on the CLI.
|
|
78
|
-
const storage_fd = try
|
|
83
|
+
const storage_fd = try io.open_file(
|
|
79
84
|
dir_fd,
|
|
80
85
|
filename,
|
|
81
86
|
config.journal_size_max, // TODO Double-check that we have space for redundant headers.
|
|
82
87
|
false,
|
|
83
88
|
);
|
|
84
|
-
|
|
89
|
+
|
|
85
90
|
var state_machine = try StateMachine.init(
|
|
86
91
|
allocator,
|
|
87
92
|
config.accounts_max,
|
|
88
93
|
config.transfers_max,
|
|
89
94
|
config.commits_max,
|
|
90
95
|
);
|
|
91
|
-
var storage = try Storage.init(config.journal_size_max, storage_fd,
|
|
96
|
+
var storage = try Storage.init(config.journal_size_max, storage_fd, io);
|
|
92
97
|
var message_bus = try MessageBus.init(
|
|
93
98
|
allocator,
|
|
94
99
|
cluster,
|
|
95
100
|
addresses,
|
|
96
101
|
replica_index,
|
|
97
|
-
|
|
102
|
+
io,
|
|
98
103
|
);
|
|
99
104
|
var time: Time = .{};
|
|
100
105
|
var replica = try Replica.init(
|
|
@@ -17,14 +17,21 @@ const IO = @import("io.zig").IO;
|
|
|
17
17
|
const MessagePool = @import("message_pool.zig").MessagePool;
|
|
18
18
|
const Message = MessagePool.Message;
|
|
19
19
|
|
|
20
|
-
const SendQueue = RingBuffer(*Message, config.connection_send_queue_max);
|
|
21
|
-
|
|
22
20
|
pub const MessageBusReplica = MessageBusImpl(.replica);
|
|
23
21
|
pub const MessageBusClient = MessageBusImpl(.client);
|
|
24
22
|
|
|
25
|
-
|
|
23
|
+
fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
24
|
+
const SendQueue = RingBuffer(*Message, switch (process_type) {
|
|
25
|
+
.replica => config.connection_send_queue_max_replica,
|
|
26
|
+
// A client has at most 1 in-flight request, plus pings.
|
|
27
|
+
.client => config.connection_send_queue_max_client,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const tcp_sndbuf = switch (process_type) {
|
|
31
|
+
.replica => config.tcp_sndbuf_replica,
|
|
32
|
+
.client => config.tcp_sndbuf_client,
|
|
33
|
+
};
|
|
26
34
|
|
|
27
|
-
fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
28
35
|
return struct {
|
|
29
36
|
const Self = @This();
|
|
30
37
|
|
|
@@ -105,14 +112,14 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
105
112
|
};
|
|
106
113
|
|
|
107
114
|
var bus: Self = .{
|
|
108
|
-
.pool = try MessagePool.init(allocator),
|
|
115
|
+
.pool = try MessagePool.init(allocator, process_type),
|
|
109
116
|
.io = io,
|
|
110
117
|
.cluster = cluster,
|
|
111
118
|
.configuration = configuration,
|
|
112
119
|
.process = switch (process_type) {
|
|
113
120
|
.replica => .{
|
|
114
121
|
.replica = process,
|
|
115
|
-
.accept_fd = try init_tcp(configuration[process]),
|
|
122
|
+
.accept_fd = try init_tcp(io, configuration[process]),
|
|
116
123
|
},
|
|
117
124
|
.client => {},
|
|
118
125
|
},
|
|
@@ -150,13 +157,13 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
150
157
|
/// TODO This is required by the Client.
|
|
151
158
|
pub fn deinit(_: *Self) void {}
|
|
152
159
|
|
|
153
|
-
fn init_tcp(address: std.net.Address) !os.socket_t {
|
|
154
|
-
const fd = try
|
|
160
|
+
fn init_tcp(io: *IO, address: std.net.Address) !os.socket_t {
|
|
161
|
+
const fd = try io.open_socket(
|
|
155
162
|
address.any.family,
|
|
156
163
|
os.SOCK.STREAM,
|
|
157
164
|
os.IPPROTO.TCP,
|
|
158
165
|
);
|
|
159
|
-
errdefer os.
|
|
166
|
+
errdefer os.closeSocket(fd);
|
|
160
167
|
|
|
161
168
|
const set = struct {
|
|
162
169
|
fn set(_fd: os.socket_t, level: u32, option: u32, value: c_int) !void {
|
|
@@ -177,17 +184,17 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
177
184
|
try set(fd, os.SOL.SOCKET, os.SO.RCVBUF, config.tcp_rcvbuf);
|
|
178
185
|
}
|
|
179
186
|
|
|
180
|
-
if (
|
|
187
|
+
if (tcp_sndbuf > 0) sndbuf: {
|
|
181
188
|
if (is_linux) {
|
|
182
189
|
// Requires CAP_NET_ADMIN privilege (settle for SO_SNDBUF in case of an EPERM):
|
|
183
|
-
if (set(fd, os.SOL.SOCKET, os.SO.SNDBUFFORCE,
|
|
190
|
+
if (set(fd, os.SOL.SOCKET, os.SO.SNDBUFFORCE, tcp_sndbuf)) |_| {
|
|
184
191
|
break :sndbuf;
|
|
185
192
|
} else |err| switch (err) {
|
|
186
193
|
error.PermissionDenied => {},
|
|
187
194
|
else => |e| return e,
|
|
188
195
|
}
|
|
189
196
|
}
|
|
190
|
-
try set(fd, os.SOL.SOCKET, os.SO.SNDBUF,
|
|
197
|
+
try set(fd, os.SOL.SOCKET, os.SO.SNDBUF, tcp_sndbuf);
|
|
191
198
|
}
|
|
192
199
|
|
|
193
200
|
if (config.tcp_keepalive) {
|
|
@@ -338,7 +345,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
338
345
|
bus.process.accept_connection.?.on_accept(bus, fd);
|
|
339
346
|
}
|
|
340
347
|
|
|
341
|
-
pub fn get_message(bus: *Self)
|
|
348
|
+
pub fn get_message(bus: *Self) *Message {
|
|
342
349
|
return bus.pool.get_message();
|
|
343
350
|
}
|
|
344
351
|
|
|
@@ -399,11 +406,11 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
399
406
|
terminating,
|
|
400
407
|
} = .free,
|
|
401
408
|
/// This is guaranteed to be valid only while state is connected.
|
|
402
|
-
/// It will be reset to
|
|
403
|
-
/// connection is unused (i.e. peer == .none). We use
|
|
409
|
+
/// It will be reset to IO.INVALID_SOCKET during the shutdown process and is always IO.INVALID_SOCKET if the
|
|
410
|
+
/// connection is unused (i.e. peer == .none). We use IO.INVALID_SOCKET instead of undefined here
|
|
404
411
|
/// for safety to ensure an error if the invalid value is ever used, instead of
|
|
405
412
|
/// potentially performing an action on an active fd.
|
|
406
|
-
fd: os.socket_t =
|
|
413
|
+
fd: os.socket_t = IO.INVALID_SOCKET,
|
|
407
414
|
|
|
408
415
|
/// This completion is used for all recv operations.
|
|
409
416
|
/// It is also used for the initial connect when establishing a replica connection.
|
|
@@ -439,12 +446,12 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
439
446
|
|
|
440
447
|
assert(connection.peer == .none);
|
|
441
448
|
assert(connection.state == .free);
|
|
442
|
-
assert(connection.fd ==
|
|
449
|
+
assert(connection.fd == IO.INVALID_SOCKET);
|
|
443
450
|
|
|
444
451
|
// The first replica's network address family determines the
|
|
445
452
|
// family for all other replicas:
|
|
446
453
|
const family = bus.configuration[0].any.family;
|
|
447
|
-
connection.fd =
|
|
454
|
+
connection.fd = bus.io.open_socket(family, os.SOCK.STREAM, os.IPPROTO.TCP) catch return;
|
|
448
455
|
connection.peer = .{ .replica = replica };
|
|
449
456
|
connection.state = .connecting;
|
|
450
457
|
bus.connections_used += 1;
|
|
@@ -545,7 +552,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
545
552
|
pub fn on_accept(connection: *Connection, bus: *Self, fd: os.socket_t) void {
|
|
546
553
|
assert(connection.peer == .none);
|
|
547
554
|
assert(connection.state == .accepting);
|
|
548
|
-
assert(connection.fd ==
|
|
555
|
+
assert(connection.fd == IO.INVALID_SOCKET);
|
|
549
556
|
|
|
550
557
|
connection.peer = .unknown;
|
|
551
558
|
connection.state = .connected;
|
|
@@ -562,7 +569,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
562
569
|
|
|
563
570
|
assert(connection.peer == .unknown or connection.peer == .replica);
|
|
564
571
|
assert(connection.state == .connected);
|
|
565
|
-
assert(connection.fd !=
|
|
572
|
+
assert(connection.fd != IO.INVALID_SOCKET);
|
|
566
573
|
|
|
567
574
|
assert(connection.recv_submitted == false);
|
|
568
575
|
assert(connection.recv_message == null);
|
|
@@ -582,15 +589,14 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
582
589
|
.terminating => return,
|
|
583
590
|
.free, .accepting => unreachable,
|
|
584
591
|
}
|
|
585
|
-
connection.send_queue.
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
};
|
|
592
|
+
if (connection.send_queue.full()) {
|
|
593
|
+
log.info("message queue for peer {} full, dropping {s} message", .{
|
|
594
|
+
connection.peer,
|
|
595
|
+
@tagName(message.header.command),
|
|
596
|
+
});
|
|
597
|
+
return;
|
|
598
|
+
}
|
|
599
|
+
connection.send_queue.push_assume_capacity(message.ref());
|
|
594
600
|
// If the connection has not yet been established we can't send yet.
|
|
595
601
|
// Instead on_connect() will call send().
|
|
596
602
|
if (connection.state == .connecting) {
|
|
@@ -610,7 +616,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
610
616
|
pub fn terminate(connection: *Connection, bus: *Self, how: enum { shutdown, close }) void {
|
|
611
617
|
assert(connection.peer != .none);
|
|
612
618
|
assert(connection.state != .free);
|
|
613
|
-
assert(connection.fd !=
|
|
619
|
+
assert(connection.fd != IO.INVALID_SOCKET);
|
|
614
620
|
switch (how) {
|
|
615
621
|
.shutdown => {
|
|
616
622
|
// The shutdown syscall will cause currently in progress send/recv
|
|
@@ -618,12 +624,8 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
618
624
|
//
|
|
619
625
|
// TODO: Investigate differences between shutdown() on Linux vs Darwin.
|
|
620
626
|
// Especially how this interacts with our assumptions around pending I/O.
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
.SUCCESS => {},
|
|
624
|
-
.BADF => unreachable,
|
|
625
|
-
.INVAL => unreachable,
|
|
626
|
-
.NOTCONN => {
|
|
627
|
+
os.shutdown(connection.fd, .both) catch |err| switch (err) {
|
|
628
|
+
error.SocketNotConnected => {
|
|
627
629
|
// This should only happen if we for some reason decide to terminate
|
|
628
630
|
// a connection while a connect operation is in progress.
|
|
629
631
|
// This is fine though, we simply continue with the logic below and
|
|
@@ -638,9 +640,9 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
638
640
|
//assert(connection.recv_submitted);
|
|
639
641
|
//assert(!connection.send_submitted);
|
|
640
642
|
},
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
}
|
|
643
|
+
// Ignore all the remaining errors for now
|
|
644
|
+
error.ConnectionAborted, error.ConnectionResetByPeer, error.BlockingOperationInProgress, error.NetworkSubsystemFailed, error.SystemResources, error.Unexpected => {},
|
|
645
|
+
};
|
|
644
646
|
},
|
|
645
647
|
.close => {},
|
|
646
648
|
}
|
|
@@ -652,7 +654,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
652
654
|
fn parse_messages(connection: *Connection, bus: *Self) void {
|
|
653
655
|
assert(connection.peer != .none);
|
|
654
656
|
assert(connection.state == .connected);
|
|
655
|
-
assert(connection.fd !=
|
|
657
|
+
assert(connection.fd != IO.INVALID_SOCKET);
|
|
656
658
|
|
|
657
659
|
while (connection.parse_message(bus)) |message| {
|
|
658
660
|
defer bus.unref(message);
|
|
@@ -734,14 +736,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
734
736
|
// `references` and `header` metadata.
|
|
735
737
|
if (connection.recv_progress == header.size) return connection.recv_message.?.ref();
|
|
736
738
|
|
|
737
|
-
const message = bus.get_message()
|
|
738
|
-
// TODO Decrease the probability of this happening by:
|
|
739
|
-
// 1. using a header-only message if possible.
|
|
740
|
-
// 2. determining a true upper limit for static allocation.
|
|
741
|
-
log.err("no free buffer available to deliver message from {}", .{connection.peer});
|
|
742
|
-
connection.terminate(bus, .shutdown);
|
|
743
|
-
return null;
|
|
744
|
-
};
|
|
739
|
+
const message = bus.get_message();
|
|
745
740
|
mem.copy(u8, message.buffer, data[0..header.size]);
|
|
746
741
|
return message;
|
|
747
742
|
}
|
|
@@ -779,7 +774,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
779
774
|
|
|
780
775
|
assert(connection.peer != .none);
|
|
781
776
|
assert(connection.state == .connected);
|
|
782
|
-
assert(connection.fd !=
|
|
777
|
+
assert(connection.fd != IO.INVALID_SOCKET);
|
|
783
778
|
|
|
784
779
|
if (connection.peer != .unknown) return;
|
|
785
780
|
|
|
@@ -826,14 +821,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
826
821
|
return;
|
|
827
822
|
}
|
|
828
823
|
|
|
829
|
-
const new_message = bus.get_message()
|
|
830
|
-
// TODO Decrease the probability of this happening by:
|
|
831
|
-
// 1. using a header-only message if possible.
|
|
832
|
-
// 2. determining a true upper limit for static allocation.
|
|
833
|
-
log.err("no free buffer available to recv message from {}", .{connection.peer});
|
|
834
|
-
connection.terminate(bus, .shutdown);
|
|
835
|
-
return;
|
|
836
|
-
};
|
|
824
|
+
const new_message = bus.get_message();
|
|
837
825
|
defer bus.unref(new_message);
|
|
838
826
|
|
|
839
827
|
if (connection.recv_message) |recv_message| {
|
|
@@ -857,7 +845,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
857
845
|
fn recv(connection: *Connection, bus: *Self) void {
|
|
858
846
|
assert(connection.peer != .none);
|
|
859
847
|
assert(connection.state == .connected);
|
|
860
|
-
assert(connection.fd !=
|
|
848
|
+
assert(connection.fd != IO.INVALID_SOCKET);
|
|
861
849
|
|
|
862
850
|
assert(!connection.recv_submitted);
|
|
863
851
|
connection.recv_submitted = true;
|
|
@@ -902,7 +890,7 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
902
890
|
fn send(connection: *Connection, bus: *Self) void {
|
|
903
891
|
assert(connection.peer == .client or connection.peer == .replica);
|
|
904
892
|
assert(connection.state == .connected);
|
|
905
|
-
assert(connection.fd !=
|
|
893
|
+
assert(connection.fd != IO.INVALID_SOCKET);
|
|
906
894
|
const message = connection.send_queue.head() orelse return;
|
|
907
895
|
assert(!connection.send_submitted);
|
|
908
896
|
connection.send_submitted = true;
|
|
@@ -959,8 +947,8 @@ fn MessageBusImpl(comptime process_type: ProcessType) type {
|
|
|
959
947
|
bus.unref(message);
|
|
960
948
|
connection.recv_message = null;
|
|
961
949
|
}
|
|
962
|
-
assert(connection.fd !=
|
|
963
|
-
defer connection.fd =
|
|
950
|
+
assert(connection.fd != IO.INVALID_SOCKET);
|
|
951
|
+
defer connection.fd = IO.INVALID_SOCKET;
|
|
964
952
|
// It's OK to use the send completion here as we know that no send
|
|
965
953
|
// operation is currently in progress.
|
|
966
954
|
bus.io.close(*Self, bus, on_close, &connection.send_completion, connection.fd);
|
|
@@ -17,15 +17,55 @@ comptime {
|
|
|
17
17
|
/// message to be shifted to make space for 0 padding to vsr.sector_ceil.
|
|
18
18
|
const message_size_max_padded = config.message_size_max + config.sector_size;
|
|
19
19
|
|
|
20
|
-
///
|
|
21
|
-
///
|
|
22
|
-
|
|
20
|
+
/// The number of full-sized messages allocated at initialization by the replica message pool.
|
|
21
|
+
/// There must be enough messages to ensure that the replica can always progress, to avoid deadlock.
|
|
22
|
+
pub const messages_max_replica = messages_max: {
|
|
23
|
+
var sum: usize = 0;
|
|
24
|
+
|
|
25
|
+
sum += config.io_depth_read + config.io_depth_write; // Journal I/O
|
|
26
|
+
sum += config.clients_max; // Replica.client_table
|
|
27
|
+
sum += 1; // Replica.loopback_queue
|
|
28
|
+
sum += config.pipelining_max; // Replica.pipeline
|
|
29
|
+
sum += config.replicas_max; // Replica.do_view_change_from_all_replicas quorum (all others are bitsets)
|
|
30
|
+
sum += config.connections_max; // Connection.recv_message
|
|
31
|
+
sum += config.connections_max * config.connection_send_queue_max_replica; // Connection.send_queue
|
|
32
|
+
sum += 1; // Handle bursts (e.g. Connection.parse_message)
|
|
33
|
+
// Handle Replica.commit_op's reply:
|
|
34
|
+
// (This is separate from the burst +1 because they may occur concurrently).
|
|
35
|
+
sum += 1;
|
|
36
|
+
sum += 20; // TODO Our network simulator allows up to 20 messages for path_capacity_max.
|
|
37
|
+
|
|
38
|
+
break :messages_max sum;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/// The number of full-sized messages allocated at initialization by the client message pool.
|
|
42
|
+
pub const messages_max_client = messages_max: {
|
|
43
|
+
var sum: usize = 0;
|
|
44
|
+
|
|
45
|
+
sum += config.replicas_max; // Connection.recv_message
|
|
46
|
+
sum += config.replicas_max * config.connection_send_queue_max_client; // Connection.send_queue
|
|
47
|
+
sum += config.client_request_queue_max; // Client.request_queue
|
|
48
|
+
// Handle bursts (e.g. Connection.parse_message, or sending a ping when the send queue is full).
|
|
49
|
+
sum += 1;
|
|
50
|
+
sum += 20; // TODO Our network simulator allows up to 20 messages for path_capacity_max.
|
|
51
|
+
|
|
52
|
+
break :messages_max sum;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
comptime {
|
|
56
|
+
// These conditions are necessary (but not sufficient) to prevent deadlocks.
|
|
57
|
+
assert(messages_max_replica > config.replicas_max);
|
|
58
|
+
assert(messages_max_client > config.client_request_queue_max);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/// A pool of reference-counted Messages, memory for which is allocated only once during
|
|
62
|
+
/// initialization and reused thereafter. The messages_max values determine the size of this pool.
|
|
23
63
|
pub const MessagePool = struct {
|
|
24
64
|
pub const Message = struct {
|
|
25
65
|
// TODO: replace this with a header() function to save memory
|
|
26
66
|
header: *Header,
|
|
27
|
-
///
|
|
28
|
-
///
|
|
67
|
+
/// This buffer is aligned to config.sector_size and casting to that alignment in order
|
|
68
|
+
/// to perform Direct I/O is safe.
|
|
29
69
|
buffer: []u8,
|
|
30
70
|
references: u32 = 0,
|
|
31
71
|
next: ?*Message,
|
|
@@ -39,27 +79,23 @@ pub const MessagePool = struct {
|
|
|
39
79
|
pub fn body(message: *Message) []u8 {
|
|
40
80
|
return message.buffer[@sizeOf(Header)..message.header.size];
|
|
41
81
|
}
|
|
42
|
-
|
|
43
|
-
fn header_only(message: Message) bool {
|
|
44
|
-
const ret = message.buffer.len == @sizeOf(Header);
|
|
45
|
-
assert(ret or message.buffer.len == message_size_max_padded);
|
|
46
|
-
return ret;
|
|
47
|
-
}
|
|
48
82
|
};
|
|
49
83
|
|
|
50
84
|
/// List of currently unused messages of message_size_max_padded
|
|
51
85
|
free_list: ?*Message,
|
|
52
|
-
/// List of currently usused header-sized messages
|
|
53
|
-
header_only_free_list: ?*Message,
|
|
54
86
|
|
|
55
|
-
pub fn init(allocator: mem.Allocator) error{OutOfMemory}!MessagePool {
|
|
87
|
+
pub fn init(allocator: mem.Allocator, process_type: vsr.ProcessType) error{OutOfMemory}!MessagePool {
|
|
88
|
+
const messages_max: usize = switch (process_type) {
|
|
89
|
+
.replica => messages_max_replica,
|
|
90
|
+
.client => messages_max_client,
|
|
91
|
+
};
|
|
92
|
+
|
|
56
93
|
var ret: MessagePool = .{
|
|
57
94
|
.free_list = null,
|
|
58
|
-
.header_only_free_list = null,
|
|
59
95
|
};
|
|
60
96
|
{
|
|
61
97
|
var i: usize = 0;
|
|
62
|
-
while (i <
|
|
98
|
+
while (i < messages_max) : (i += 1) {
|
|
63
99
|
const buffer = try allocator.allocAdvanced(
|
|
64
100
|
u8,
|
|
65
101
|
config.sector_size,
|
|
@@ -75,45 +111,20 @@ pub const MessagePool = struct {
|
|
|
75
111
|
ret.free_list = message;
|
|
76
112
|
}
|
|
77
113
|
}
|
|
78
|
-
{
|
|
79
|
-
var i: usize = 0;
|
|
80
|
-
while (i < config.message_bus_headers_max) : (i += 1) {
|
|
81
|
-
const header = try allocator.create(Header);
|
|
82
|
-
const message = try allocator.create(Message);
|
|
83
|
-
message.* = .{
|
|
84
|
-
.header = header,
|
|
85
|
-
.buffer = mem.asBytes(header),
|
|
86
|
-
.next = ret.header_only_free_list,
|
|
87
|
-
};
|
|
88
|
-
ret.header_only_free_list = message;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
114
|
|
|
92
115
|
return ret;
|
|
93
116
|
}
|
|
94
117
|
|
|
95
|
-
/// Get an unused message with a buffer of config.message_size_max.
|
|
96
|
-
///
|
|
97
|
-
pub fn get_message(pool: *MessagePool)
|
|
98
|
-
const
|
|
99
|
-
pool.free_list =
|
|
100
|
-
|
|
101
|
-
assert(
|
|
102
|
-
assert(ret.references == 0);
|
|
103
|
-
ret.references = 1;
|
|
104
|
-
return ret;
|
|
105
|
-
}
|
|
118
|
+
/// Get an unused message with a buffer of config.message_size_max.
|
|
119
|
+
/// The returned message has exactly one reference.
|
|
120
|
+
pub fn get_message(pool: *MessagePool) *Message {
|
|
121
|
+
const message = pool.free_list.?;
|
|
122
|
+
pool.free_list = message.next;
|
|
123
|
+
message.next = null;
|
|
124
|
+
assert(message.references == 0);
|
|
106
125
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
pub fn get_header_only_message(pool: *MessagePool) ?*Message {
|
|
110
|
-
const ret = pool.header_only_free_list orelse return null;
|
|
111
|
-
pool.header_only_free_list = ret.next;
|
|
112
|
-
ret.next = null;
|
|
113
|
-
assert(ret.header_only());
|
|
114
|
-
assert(ret.references == 0);
|
|
115
|
-
ret.references = 1;
|
|
116
|
-
return ret;
|
|
126
|
+
message.references = 1;
|
|
127
|
+
return message;
|
|
117
128
|
}
|
|
118
129
|
|
|
119
130
|
/// Decrement the reference count of the message, possibly freeing it.
|
|
@@ -121,13 +132,8 @@ pub const MessagePool = struct {
|
|
|
121
132
|
message.references -= 1;
|
|
122
133
|
if (message.references == 0) {
|
|
123
134
|
if (builtin.mode == .Debug) mem.set(u8, message.buffer, undefined);
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
pool.header_only_free_list = message;
|
|
127
|
-
} else {
|
|
128
|
-
message.next = pool.free_list;
|
|
129
|
-
pool.free_list = message;
|
|
130
|
-
}
|
|
135
|
+
message.next = pool.free_list;
|
|
136
|
+
pool.free_list = message;
|
|
131
137
|
}
|
|
132
138
|
}
|
|
133
139
|
};
|
|
@@ -76,6 +76,13 @@ pub fn RingBuffer(comptime T: type, comptime size: usize) type {
|
|
|
76
76
|
self.advance_tail();
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
+
/// Add an element to a RingBuffer, and assert that the capacity is sufficient.
|
|
80
|
+
pub fn push_assume_capacity(self: *Self, item: T) void {
|
|
81
|
+
self.push(item) catch |err| switch (err) {
|
|
82
|
+
error.NoSpaceLeft => unreachable,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
79
86
|
/// Remove and return the next item, if any.
|
|
80
87
|
pub fn pop(self: *Self) ?T {
|
|
81
88
|
const result = self.head() orelse return null;
|
|
@@ -84,7 +84,7 @@ pub fn main() !void {
|
|
|
84
84
|
.one_way_delay_mean = 3 + random.uintLessThan(u16, 10),
|
|
85
85
|
.one_way_delay_min = random.uintLessThan(u16, 3),
|
|
86
86
|
.packet_loss_probability = random.uintLessThan(u8, 30),
|
|
87
|
-
.path_maximum_capacity =
|
|
87
|
+
.path_maximum_capacity = 2 + random.uintLessThan(u8, 19),
|
|
88
88
|
.path_clog_duration_mean = random.uintLessThan(u16, 500),
|
|
89
89
|
.path_clog_probability = random.uintLessThan(u8, 2),
|
|
90
90
|
.packet_replay_probability = random.uintLessThan(u8, 50),
|
|
@@ -213,7 +213,7 @@ pub fn main() !void {
|
|
|
213
213
|
|
|
214
214
|
assert(cluster.state_checker.convergence());
|
|
215
215
|
|
|
216
|
-
output.info("\n PASSED", .{});
|
|
216
|
+
output.info("\n PASSED ({} ticks)", .{tick});
|
|
217
217
|
}
|
|
218
218
|
|
|
219
219
|
/// Returns true, `p` percent of the time, else false.
|
|
@@ -244,7 +244,7 @@ fn send_request(random: std.rand.Random) bool {
|
|
|
244
244
|
if (client.request_queue.full()) return false;
|
|
245
245
|
if (checker_request_queue.full()) return false;
|
|
246
246
|
|
|
247
|
-
const message = client.get_message()
|
|
247
|
+
const message = client.get_message();
|
|
248
248
|
defer client.unref(message);
|
|
249
249
|
|
|
250
250
|
const body_size_max = config.message_size_max - @sizeOf(Header);
|
|
@@ -265,7 +265,7 @@ fn send_request(random: std.rand.Random) bool {
|
|
|
265
265
|
// While hashing the client ID with the request body prevents input collisions across clients,
|
|
266
266
|
// it's still possible for the same client to generate the same body, and therefore input hash.
|
|
267
267
|
const client_input = StateMachine.hash(client.id, body);
|
|
268
|
-
checker_request_queue.
|
|
268
|
+
checker_request_queue.push_assume_capacity(client_input);
|
|
269
269
|
std.log.scoped(.test_client).debug("client {} sending input={x}", .{
|
|
270
270
|
client_index,
|
|
271
271
|
client_input,
|