tigerbeetle-node 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +305 -103
- package/dist/index.d.ts +70 -67
- package/dist/index.js +70 -67
- package/dist/index.js.map +1 -1
- package/package.json +6 -6
- package/scripts/download_node_headers.sh +14 -7
- package/src/index.ts +11 -10
- package/src/node.zig +22 -20
- package/src/tigerbeetle/scripts/benchmark.bat +4 -3
- package/src/tigerbeetle/scripts/benchmark.sh +25 -10
- package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
- package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
- package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
- package/src/tigerbeetle/scripts/install.sh +20 -4
- package/src/tigerbeetle/scripts/install_zig.bat +5 -1
- package/src/tigerbeetle/scripts/install_zig.sh +32 -26
- package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
- package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
- package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
- package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
- package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +12 -3
- package/src/tigerbeetle/src/benchmark.zig +19 -9
- package/src/tigerbeetle/src/benchmark_array_search.zig +317 -0
- package/src/tigerbeetle/src/benchmarks/perf.zig +299 -0
- package/src/tigerbeetle/src/c/tb_client/context.zig +103 -0
- package/src/tigerbeetle/src/c/tb_client/packet.zig +80 -0
- package/src/tigerbeetle/src/c/tb_client/signal.zig +288 -0
- package/src/tigerbeetle/src/c/tb_client/thread.zig +328 -0
- package/src/tigerbeetle/src/c/tb_client.h +221 -0
- package/src/tigerbeetle/src/c/tb_client.zig +104 -0
- package/src/tigerbeetle/src/c/test.zig +1 -0
- package/src/tigerbeetle/src/cli.zig +143 -84
- package/src/tigerbeetle/src/config.zig +161 -20
- package/src/tigerbeetle/src/demo.zig +14 -8
- package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +2 -2
- package/src/tigerbeetle/src/ewah.zig +318 -0
- package/src/tigerbeetle/src/ewah_benchmark.zig +121 -0
- package/src/tigerbeetle/src/eytzinger_benchmark.zig +317 -0
- package/src/tigerbeetle/src/fifo.zig +17 -1
- package/src/tigerbeetle/src/io/darwin.zig +12 -10
- package/src/tigerbeetle/src/io/linux.zig +25 -9
- package/src/tigerbeetle/src/io/windows.zig +13 -9
- package/src/tigerbeetle/src/iops.zig +101 -0
- package/src/tigerbeetle/src/lsm/README.md +214 -0
- package/src/tigerbeetle/src/lsm/binary_search.zig +341 -0
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +125 -0
- package/src/tigerbeetle/src/lsm/compaction.zig +557 -0
- package/src/tigerbeetle/src/lsm/composite_key.zig +77 -0
- package/src/tigerbeetle/src/lsm/direction.zig +11 -0
- package/src/tigerbeetle/src/lsm/eytzinger.zig +587 -0
- package/src/tigerbeetle/src/lsm/forest.zig +204 -0
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +412 -0
- package/src/tigerbeetle/src/lsm/grid.zig +549 -0
- package/src/tigerbeetle/src/lsm/groove.zig +1002 -0
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +474 -0
- package/src/tigerbeetle/src/lsm/level_iterator.zig +315 -0
- package/src/tigerbeetle/src/lsm/manifest.zig +580 -0
- package/src/tigerbeetle/src/lsm/manifest_level.zig +925 -0
- package/src/tigerbeetle/src/lsm/manifest_log.zig +953 -0
- package/src/tigerbeetle/src/lsm/node_pool.zig +231 -0
- package/src/tigerbeetle/src/lsm/posted_groove.zig +387 -0
- package/src/tigerbeetle/src/lsm/segmented_array.zig +1318 -0
- package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
- package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +894 -0
- package/src/tigerbeetle/src/lsm/table.zig +967 -0
- package/src/tigerbeetle/src/lsm/table_immutable.zig +203 -0
- package/src/tigerbeetle/src/lsm/table_iterator.zig +306 -0
- package/src/tigerbeetle/src/lsm/table_mutable.zig +174 -0
- package/src/tigerbeetle/src/lsm/test.zig +423 -0
- package/src/tigerbeetle/src/lsm/tree.zig +1090 -0
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +457 -0
- package/src/tigerbeetle/src/main.zig +141 -109
- package/src/tigerbeetle/src/message_bus.zig +49 -48
- package/src/tigerbeetle/src/message_pool.zig +22 -12
- package/src/tigerbeetle/src/ring_buffer.zig +126 -30
- package/src/tigerbeetle/src/simulator.zig +205 -140
- package/src/tigerbeetle/src/state_machine.zig +1268 -721
- package/src/tigerbeetle/src/static_allocator.zig +65 -0
- package/src/tigerbeetle/src/storage.zig +40 -14
- package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
- package/src/tigerbeetle/src/test/accounting/workload.zig +819 -0
- package/src/tigerbeetle/src/test/cluster.zig +104 -88
- package/src/tigerbeetle/src/test/conductor.zig +365 -0
- package/src/tigerbeetle/src/test/fuzz.zig +121 -0
- package/src/tigerbeetle/src/test/id.zig +89 -0
- package/src/tigerbeetle/src/test/message_bus.zig +15 -24
- package/src/tigerbeetle/src/test/network.zig +26 -17
- package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
- package/src/tigerbeetle/src/test/state_checker.zig +94 -68
- package/src/tigerbeetle/src/test/state_machine.zig +135 -69
- package/src/tigerbeetle/src/test/storage.zig +78 -28
- package/src/tigerbeetle/src/tigerbeetle.zig +19 -16
- package/src/tigerbeetle/src/unit_tests.zig +15 -0
- package/src/tigerbeetle/src/util.zig +51 -0
- package/src/tigerbeetle/src/vopr.zig +494 -0
- package/src/tigerbeetle/src/vopr_hub/README.md +58 -0
- package/src/tigerbeetle/src/vopr_hub/SETUP.md +199 -0
- package/src/tigerbeetle/src/vopr_hub/go.mod +3 -0
- package/src/tigerbeetle/src/vopr_hub/main.go +1022 -0
- package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +3 -0
- package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +403 -0
- package/src/tigerbeetle/src/vsr/client.zig +34 -7
- package/src/tigerbeetle/src/vsr/journal.zig +164 -174
- package/src/tigerbeetle/src/vsr/replica.zig +1602 -651
- package/src/tigerbeetle/src/vsr/superblock.zig +1761 -0
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +255 -0
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +644 -0
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +561 -0
- package/src/tigerbeetle/src/vsr.zig +118 -170
- package/src/tigerbeetle/scripts/vopr.bat +0 -48
- package/src/tigerbeetle/scripts/vopr.sh +0 -33
|
@@ -3,41 +3,48 @@ const assert = std.debug.assert;
|
|
|
3
3
|
const fmt = std.fmt;
|
|
4
4
|
const mem = std.mem;
|
|
5
5
|
const os = std.os;
|
|
6
|
-
const log = std.log;
|
|
6
|
+
const log = std.log.scoped(.main);
|
|
7
7
|
|
|
8
8
|
const config = @import("config.zig");
|
|
9
9
|
pub const log_level: std.log.Level = @intToEnum(std.log.Level, config.log_level);
|
|
10
10
|
|
|
11
11
|
const cli = @import("cli.zig");
|
|
12
|
+
const fatal = cli.fatal;
|
|
12
13
|
|
|
13
14
|
const IO = @import("io.zig").IO;
|
|
14
15
|
const Time = @import("time.zig").Time;
|
|
15
16
|
const Storage = @import("storage.zig").Storage;
|
|
17
|
+
|
|
16
18
|
const MessageBus = @import("message_bus.zig").MessageBusReplica;
|
|
17
|
-
const
|
|
19
|
+
const MessagePool = @import("message_pool.zig").MessagePool;
|
|
20
|
+
const StateMachine = @import("state_machine.zig").StateMachineType(Storage, .{
|
|
21
|
+
.message_body_size_max = config.message_body_size_max,
|
|
22
|
+
});
|
|
18
23
|
|
|
19
24
|
const vsr = @import("vsr.zig");
|
|
20
|
-
const Replica = vsr.
|
|
25
|
+
const Replica = vsr.ReplicaType(StateMachine, MessageBus, Storage, Time);
|
|
21
26
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
const SuperBlock = vsr.SuperBlockType(Storage);
|
|
28
|
+
const superblock_zone_size = @import("vsr/superblock.zig").superblock_zone_size;
|
|
29
|
+
const data_file_size_min = @import("vsr/superblock.zig").data_file_size_min;
|
|
30
|
+
|
|
31
|
+
comptime {
|
|
32
|
+
assert(config.deployment_environment == .production or
|
|
33
|
+
config.deployment_environment == .development);
|
|
34
|
+
}
|
|
25
35
|
|
|
36
|
+
pub fn main() !void {
|
|
26
37
|
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
27
38
|
defer arena.deinit();
|
|
28
39
|
|
|
29
40
|
const allocator = arena.allocator();
|
|
30
41
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
args.replica,
|
|
38
|
-
args.addresses,
|
|
39
|
-
args.dir_fd,
|
|
40
|
-
),
|
|
42
|
+
var parse_args = try cli.parse_args(allocator);
|
|
43
|
+
defer parse_args.deinit(allocator);
|
|
44
|
+
|
|
45
|
+
switch (parse_args) {
|
|
46
|
+
.format => |*args| try Command.format(allocator, args.cluster, args.replica, args.path),
|
|
47
|
+
.start => |*args| try Command.start(&arena, args.addresses, args.memory, args.path),
|
|
41
48
|
}
|
|
42
49
|
}
|
|
43
50
|
|
|
@@ -45,100 +52,125 @@ pub fn main() !void {
|
|
|
45
52
|
const filename_fmt = "cluster_{d:0>10}_replica_{d:0>3}.tigerbeetle";
|
|
46
53
|
const filename_len = fmt.count(filename_fmt, .{ 0, 0 });
|
|
47
54
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
55
|
+
const Command = struct {
|
|
56
|
+
dir_fd: os.fd_t,
|
|
57
|
+
fd: os.fd_t,
|
|
58
|
+
io: IO,
|
|
59
|
+
storage: Storage,
|
|
60
|
+
message_pool: MessagePool,
|
|
61
|
+
|
|
62
|
+
fn init(
|
|
63
|
+
command: *Command,
|
|
64
|
+
allocator: mem.Allocator,
|
|
65
|
+
path: [:0]const u8,
|
|
66
|
+
must_create: bool,
|
|
67
|
+
) !void {
|
|
68
|
+
// TODO Resolve the parent directory properly in the presence of .. and symlinks.
|
|
69
|
+
// TODO Handle physical volumes where there is no directory to fsync.
|
|
70
|
+
const dirname = std.fs.path.dirname(path) orelse ".";
|
|
71
|
+
command.dir_fd = try IO.open_dir(dirname);
|
|
72
|
+
errdefer os.close(command.dir_fd);
|
|
73
|
+
|
|
74
|
+
const basename = std.fs.path.basename(path);
|
|
75
|
+
command.fd = try IO.open_file(command.dir_fd, basename, data_file_size_min, must_create);
|
|
76
|
+
errdefer os.close(command.fd);
|
|
77
|
+
|
|
78
|
+
command.io = try IO.init(128, 0);
|
|
79
|
+
errdefer command.io.deinit();
|
|
80
|
+
|
|
81
|
+
command.storage = try Storage.init(&command.io, command.fd);
|
|
82
|
+
errdefer command.storage.deinit();
|
|
83
|
+
|
|
84
|
+
command.message_pool = try MessagePool.init(allocator, .replica);
|
|
85
|
+
errdefer command.message_pool.deinit(allocator);
|
|
77
86
|
}
|
|
78
87
|
|
|
79
|
-
|
|
80
|
-
|
|
88
|
+
fn deinit(command: *Command, allocator: mem.Allocator) void {
|
|
89
|
+
command.message_pool.deinit(allocator);
|
|
90
|
+
command.storage.deinit();
|
|
91
|
+
command.io.deinit();
|
|
92
|
+
os.close(command.fd);
|
|
93
|
+
os.close(command.dir_fd);
|
|
94
|
+
}
|
|
81
95
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
};
|
|
96
|
-
assert(filename.len == filename_len);
|
|
97
|
-
|
|
98
|
-
// TODO Expose data file size on the CLI.
|
|
99
|
-
const storage_fd = try io.open_file(
|
|
100
|
-
dir_fd,
|
|
101
|
-
filename,
|
|
102
|
-
config.journal_size_max, // TODO Double-check that we have space for redundant headers.
|
|
103
|
-
false,
|
|
104
|
-
);
|
|
105
|
-
|
|
106
|
-
var state_machine = try StateMachine.init(
|
|
107
|
-
allocator,
|
|
108
|
-
config.accounts_max,
|
|
109
|
-
config.transfers_max,
|
|
110
|
-
config.transfers_pending_max,
|
|
111
|
-
);
|
|
112
|
-
var storage = try Storage.init(config.journal_size_max, storage_fd, io);
|
|
113
|
-
var message_bus = try MessageBus.init(
|
|
114
|
-
allocator,
|
|
115
|
-
cluster,
|
|
116
|
-
addresses,
|
|
117
|
-
replica_index,
|
|
118
|
-
io,
|
|
119
|
-
);
|
|
120
|
-
var time: Time = .{};
|
|
121
|
-
var replica = try Replica.init(
|
|
122
|
-
allocator,
|
|
123
|
-
cluster,
|
|
124
|
-
@intCast(u8, addresses.len),
|
|
125
|
-
replica_index,
|
|
126
|
-
&time,
|
|
127
|
-
&storage,
|
|
128
|
-
&message_bus,
|
|
129
|
-
&state_machine,
|
|
130
|
-
);
|
|
131
|
-
message_bus.set_on_message(*Replica, &replica, Replica.on_message);
|
|
132
|
-
|
|
133
|
-
log.info("cluster={x} replica={}: listening on {}", .{
|
|
134
|
-
cluster,
|
|
135
|
-
replica_index,
|
|
136
|
-
addresses[replica_index],
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
while (true) {
|
|
140
|
-
replica.tick();
|
|
141
|
-
message_bus.tick();
|
|
142
|
-
try io.run_for_ns(config.tick_ms * std.time.ns_per_ms);
|
|
96
|
+
pub fn format(allocator: mem.Allocator, cluster: u32, replica: u8, path: [:0]const u8) !void {
|
|
97
|
+
var command: Command = undefined;
|
|
98
|
+
try command.init(allocator, path, true);
|
|
99
|
+
defer command.deinit(allocator);
|
|
100
|
+
|
|
101
|
+
var superblock = try SuperBlock.init(
|
|
102
|
+
allocator,
|
|
103
|
+
&command.storage,
|
|
104
|
+
&command.message_pool,
|
|
105
|
+
);
|
|
106
|
+
defer superblock.deinit(allocator);
|
|
107
|
+
|
|
108
|
+
try vsr.format(Storage, allocator, cluster, replica, &command.storage, &superblock);
|
|
143
109
|
}
|
|
144
|
-
|
|
110
|
+
|
|
111
|
+
pub fn start(
|
|
112
|
+
arena: *std.heap.ArenaAllocator,
|
|
113
|
+
addresses: []std.net.Address,
|
|
114
|
+
memory: u64,
|
|
115
|
+
path: [:0]const u8,
|
|
116
|
+
) !void {
|
|
117
|
+
_ = memory; // TODO
|
|
118
|
+
|
|
119
|
+
const allocator = arena.allocator();
|
|
120
|
+
|
|
121
|
+
var command: Command = undefined;
|
|
122
|
+
try command.init(allocator, path, false);
|
|
123
|
+
defer command.deinit(allocator);
|
|
124
|
+
|
|
125
|
+
var replica: Replica = undefined;
|
|
126
|
+
try replica.open(allocator, .{
|
|
127
|
+
.replica_count = @intCast(u8, addresses.len),
|
|
128
|
+
.storage = &command.storage,
|
|
129
|
+
.message_pool = &command.message_pool,
|
|
130
|
+
.time = .{},
|
|
131
|
+
.state_machine_options = .{
|
|
132
|
+
// TODO Tune lsm_forest_node_count better.
|
|
133
|
+
.lsm_forest_node_count = 4096,
|
|
134
|
+
.cache_entries_accounts = config.cache_accounts_max,
|
|
135
|
+
.cache_entries_transfers = config.cache_transfers_max,
|
|
136
|
+
.cache_entries_posted = config.cache_transfers_pending_max,
|
|
137
|
+
},
|
|
138
|
+
.message_bus_options = .{
|
|
139
|
+
.configuration = addresses,
|
|
140
|
+
.io = &command.io,
|
|
141
|
+
},
|
|
142
|
+
}) catch |err| switch (err) {
|
|
143
|
+
error.NoAddress => fatal("all --addresses must be provided", .{}),
|
|
144
|
+
else => err,
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
// Calculate how many bytes are allocated inside `arena`.
|
|
148
|
+
// TODO This does not account for the fact that any allocations will be rounded up to the nearest page by `std.heap.page_allocator`.
|
|
149
|
+
var allocation_count: usize = 0;
|
|
150
|
+
var allocation_size: usize = 0;
|
|
151
|
+
{
|
|
152
|
+
var node_maybe = arena.state.buffer_list.first;
|
|
153
|
+
while (node_maybe) |node| {
|
|
154
|
+
allocation_count += 1;
|
|
155
|
+
allocation_size += node.data.len;
|
|
156
|
+
node_maybe = node.next;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
log.info("{}: Allocated {} bytes in {} regions during replica init", .{
|
|
160
|
+
replica.replica,
|
|
161
|
+
allocation_size,
|
|
162
|
+
allocation_count,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
log.info("{}: cluster={}: listening on {}", .{
|
|
166
|
+
replica.replica,
|
|
167
|
+
replica.cluster,
|
|
168
|
+
addresses[replica.replica],
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
while (true) {
|
|
172
|
+
replica.tick();
|
|
173
|
+
try command.io.run_for_ns(config.tick_ms * std.time.ns_per_ms);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
};
|
|
@@ -17,25 +17,30 @@ const IO = @import("io.zig").IO;
|
|
|
17
17
|
const MessagePool = @import("message_pool.zig").MessagePool;
|
|
18
18
|
const Message = MessagePool.Message;
|
|
19
19
|
|
|
20
|
-
pub const MessageBusReplica =
|
|
21
|
-
pub const MessageBusClient =
|
|
20
|
+
pub const MessageBusReplica = MessageBusType(.replica);
|
|
21
|
+
pub const MessageBusClient = MessageBusType(.client);
|
|
22
22
|
|
|
23
|
-
fn
|
|
23
|
+
fn MessageBusType(comptime process_type: vsr.ProcessType) type {
|
|
24
24
|
const SendQueue = RingBuffer(*Message, switch (process_type) {
|
|
25
25
|
.replica => config.connection_send_queue_max_replica,
|
|
26
26
|
// A client has at most 1 in-flight request, plus pings.
|
|
27
27
|
.client => config.connection_send_queue_max_client,
|
|
28
|
-
});
|
|
28
|
+
}, .array);
|
|
29
29
|
|
|
30
30
|
const tcp_sndbuf = switch (process_type) {
|
|
31
31
|
.replica => config.tcp_sndbuf_replica,
|
|
32
32
|
.client => config.tcp_sndbuf_client,
|
|
33
33
|
};
|
|
34
34
|
|
|
35
|
+
const Process = union(vsr.ProcessType) {
|
|
36
|
+
replica: u8,
|
|
37
|
+
client: u128,
|
|
38
|
+
};
|
|
39
|
+
|
|
35
40
|
return struct {
|
|
36
41
|
const Self = @This();
|
|
37
42
|
|
|
38
|
-
pool: MessagePool,
|
|
43
|
+
pool: *MessagePool,
|
|
39
44
|
io: *IO,
|
|
40
45
|
|
|
41
46
|
cluster: u32,
|
|
@@ -59,10 +64,8 @@ fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
|
59
64
|
.client => void,
|
|
60
65
|
},
|
|
61
66
|
|
|
62
|
-
/// The callback to be called when a message is received.
|
|
63
|
-
|
|
64
|
-
on_message_callback: ?fn (context: ?*anyopaque, message: *Message) void = null,
|
|
65
|
-
on_message_context: ?*anyopaque = null,
|
|
67
|
+
/// The callback to be called when a message is received.
|
|
68
|
+
on_message_callback: fn (message_bus: *Self, message: *Message) void,
|
|
66
69
|
|
|
67
70
|
/// This slice is allocated with a fixed size in the init function and never reallocated.
|
|
68
71
|
connections: []Connection,
|
|
@@ -80,49 +83,54 @@ fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
|
80
83
|
/// Seeded with the process' replica index or client ID.
|
|
81
84
|
prng: std.rand.DefaultPrng,
|
|
82
85
|
|
|
86
|
+
pub const Options = struct {
|
|
87
|
+
configuration: []std.net.Address,
|
|
88
|
+
io: *IO,
|
|
89
|
+
};
|
|
90
|
+
|
|
83
91
|
/// Initialize the MessageBus for the given cluster, configuration and replica/client process.
|
|
84
92
|
pub fn init(
|
|
85
93
|
allocator: mem.Allocator,
|
|
86
94
|
cluster: u32,
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
},
|
|
92
|
-
io: *IO,
|
|
95
|
+
process: Process,
|
|
96
|
+
message_pool: *MessagePool,
|
|
97
|
+
on_message_callback: fn (message_bus: *Self, message: *Message) void,
|
|
98
|
+
options: Options,
|
|
93
99
|
) !Self {
|
|
94
100
|
// There must be enough connections for all replicas and at least one client.
|
|
95
|
-
assert(config.connections_max > configuration.len);
|
|
101
|
+
assert(config.connections_max > options.configuration.len);
|
|
102
|
+
assert(@as(vsr.ProcessType, process) == process_type);
|
|
96
103
|
|
|
97
104
|
const connections = try allocator.alloc(Connection, config.connections_max);
|
|
98
105
|
errdefer allocator.free(connections);
|
|
99
106
|
mem.set(Connection, connections, .{});
|
|
100
107
|
|
|
101
|
-
const replicas = try allocator.alloc(?*Connection, configuration.len);
|
|
108
|
+
const replicas = try allocator.alloc(?*Connection, options.configuration.len);
|
|
102
109
|
errdefer allocator.free(replicas);
|
|
103
110
|
mem.set(?*Connection, replicas, null);
|
|
104
111
|
|
|
105
|
-
const replicas_connect_attempts = try allocator.alloc(u64, configuration.len);
|
|
112
|
+
const replicas_connect_attempts = try allocator.alloc(u64, options.configuration.len);
|
|
106
113
|
errdefer allocator.free(replicas_connect_attempts);
|
|
107
114
|
mem.set(u64, replicas_connect_attempts, 0);
|
|
108
115
|
|
|
109
116
|
const prng_seed = switch (process_type) {
|
|
110
|
-
.replica => process,
|
|
111
|
-
.client => @truncate(u64, process),
|
|
117
|
+
.replica => process.replica,
|
|
118
|
+
.client => @truncate(u64, process.client),
|
|
112
119
|
};
|
|
113
120
|
|
|
114
121
|
var bus: Self = .{
|
|
115
|
-
.pool =
|
|
116
|
-
.io = io,
|
|
122
|
+
.pool = message_pool,
|
|
123
|
+
.io = options.io,
|
|
117
124
|
.cluster = cluster,
|
|
118
|
-
.configuration = configuration,
|
|
125
|
+
.configuration = options.configuration,
|
|
119
126
|
.process = switch (process_type) {
|
|
120
127
|
.replica => .{
|
|
121
|
-
.replica = process,
|
|
122
|
-
.accept_fd = try init_tcp(io, configuration[process]),
|
|
128
|
+
.replica = process.replica,
|
|
129
|
+
.accept_fd = try init_tcp(options.io, options.configuration[process.replica]),
|
|
123
130
|
},
|
|
124
131
|
.client => {},
|
|
125
132
|
},
|
|
133
|
+
.on_message_callback = on_message_callback,
|
|
126
134
|
.connections = connections,
|
|
127
135
|
.replicas = replicas,
|
|
128
136
|
.replicas_connect_attempts = replicas_connect_attempts,
|
|
@@ -137,25 +145,8 @@ fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
|
137
145
|
return bus;
|
|
138
146
|
}
|
|
139
147
|
|
|
140
|
-
pub fn set_on_message(
|
|
141
|
-
bus: *Self,
|
|
142
|
-
comptime Context: type,
|
|
143
|
-
context: Context,
|
|
144
|
-
comptime on_message: fn (context: Context, message: *Message) void,
|
|
145
|
-
) void {
|
|
146
|
-
assert(bus.on_message_callback == null);
|
|
147
|
-
assert(bus.on_message_context == null);
|
|
148
|
-
|
|
149
|
-
bus.on_message_callback = struct {
|
|
150
|
-
fn wrapper(_context: ?*anyopaque, message: *Message) void {
|
|
151
|
-
on_message(@intToPtr(Context, @ptrToInt(_context)), message);
|
|
152
|
-
}
|
|
153
|
-
}.wrapper;
|
|
154
|
-
bus.on_message_context = context;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
148
|
/// TODO This is required by the Client.
|
|
158
|
-
pub fn deinit(_: *Self) void {}
|
|
149
|
+
pub fn deinit(_: *Self, _: std.mem.Allocator) void {}
|
|
159
150
|
|
|
160
151
|
fn init_tcp(io: *IO, address: std.net.Address) !os.socket_t {
|
|
161
152
|
const fd = try io.open_socket(
|
|
@@ -206,9 +197,9 @@ fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
|
206
197
|
}
|
|
207
198
|
}
|
|
208
199
|
|
|
209
|
-
if (config.
|
|
200
|
+
if (config.tcp_user_timeout_ms > 0) {
|
|
210
201
|
if (is_linux) {
|
|
211
|
-
try set(fd, os.IPPROTO.TCP, os.TCP.USER_TIMEOUT, config.
|
|
202
|
+
try set(fd, os.IPPROTO.TCP, os.TCP.USER_TIMEOUT, config.tcp_user_timeout_ms);
|
|
212
203
|
}
|
|
213
204
|
}
|
|
214
205
|
|
|
@@ -641,7 +632,13 @@ fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
|
641
632
|
//assert(!connection.send_submitted);
|
|
642
633
|
},
|
|
643
634
|
// Ignore all the remaining errors for now
|
|
644
|
-
error.ConnectionAborted,
|
|
635
|
+
error.ConnectionAborted,
|
|
636
|
+
error.ConnectionResetByPeer,
|
|
637
|
+
error.BlockingOperationInProgress,
|
|
638
|
+
error.NetworkSubsystemFailed,
|
|
639
|
+
error.SystemResources,
|
|
640
|
+
error.Unexpected,
|
|
641
|
+
=> {},
|
|
645
642
|
};
|
|
646
643
|
},
|
|
647
644
|
.close => {},
|
|
@@ -670,7 +667,11 @@ fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
|
670
667
|
return null;
|
|
671
668
|
}
|
|
672
669
|
|
|
673
|
-
const header = mem.bytesAsValue(
|
|
670
|
+
const header = mem.bytesAsValue(
|
|
671
|
+
Header,
|
|
672
|
+
@alignCast(@alignOf(Header), data[0..@sizeOf(Header)]),
|
|
673
|
+
);
|
|
674
|
+
|
|
674
675
|
if (!connection.recv_checked_header) {
|
|
675
676
|
if (!header.valid_checksum()) {
|
|
676
677
|
log.err("invalid header checksum received from {}", .{connection.peer});
|
|
@@ -763,7 +764,7 @@ fn MessageBusImpl(comptime process_type: vsr.ProcessType) type {
|
|
|
763
764
|
}
|
|
764
765
|
}
|
|
765
766
|
|
|
766
|
-
bus.on_message_callback
|
|
767
|
+
bus.on_message_callback(bus, message);
|
|
767
768
|
}
|
|
768
769
|
|
|
769
770
|
fn maybe_set_peer(connection: *Connection, bus: *Self, header: *const Header) void {
|
|
@@ -23,9 +23,10 @@ pub const messages_max_replica = messages_max: {
|
|
|
23
23
|
var sum: usize = 0;
|
|
24
24
|
|
|
25
25
|
sum += config.io_depth_read + config.io_depth_write; // Journal I/O
|
|
26
|
-
sum += config.clients_max; //
|
|
26
|
+
sum += config.clients_max; // SuperBlock.client_table
|
|
27
27
|
sum += 1; // Replica.loopback_queue
|
|
28
28
|
sum += config.pipeline_max; // Replica.pipeline
|
|
29
|
+
sum += 1; // Replica.commit_prepare
|
|
29
30
|
// Replica.do_view_change_from_all_replicas quorum:
|
|
30
31
|
// Replica.recovery_response_quorum is only used for recovery and does not increase the limit.
|
|
31
32
|
// All other quorums are bitsets.
|
|
@@ -67,9 +68,7 @@ pub const MessagePool = struct {
|
|
|
67
68
|
pub const Message = struct {
|
|
68
69
|
// TODO: replace this with a header() function to save memory
|
|
69
70
|
header: *Header,
|
|
70
|
-
|
|
71
|
-
/// to perform Direct I/O is safe.
|
|
72
|
-
buffer: []u8,
|
|
71
|
+
buffer: []align(config.sector_size) u8,
|
|
73
72
|
references: u32 = 0,
|
|
74
73
|
next: ?*Message,
|
|
75
74
|
|
|
@@ -79,21 +78,23 @@ pub const MessagePool = struct {
|
|
|
79
78
|
return message;
|
|
80
79
|
}
|
|
81
80
|
|
|
82
|
-
pub fn body(message: *Message) []u8 {
|
|
81
|
+
pub fn body(message: *const Message) []align(@sizeOf(Header)) u8 {
|
|
83
82
|
return message.buffer[@sizeOf(Header)..message.header.size];
|
|
84
83
|
}
|
|
85
84
|
};
|
|
86
85
|
|
|
87
|
-
/// List of currently unused messages
|
|
86
|
+
/// List of currently unused messages.
|
|
88
87
|
free_list: ?*Message,
|
|
89
88
|
|
|
90
89
|
pub fn init(allocator: mem.Allocator, process_type: vsr.ProcessType) error{OutOfMemory}!MessagePool {
|
|
91
|
-
|
|
90
|
+
return MessagePool.init_capacity(allocator, switch (process_type) {
|
|
92
91
|
.replica => messages_max_replica,
|
|
93
92
|
.client => messages_max_client,
|
|
94
|
-
};
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
95
|
|
|
96
|
-
|
|
96
|
+
pub fn init_capacity(allocator: mem.Allocator, messages_max: usize) error{OutOfMemory}!MessagePool {
|
|
97
|
+
var pool: MessagePool = .{
|
|
97
98
|
.free_list = null,
|
|
98
99
|
};
|
|
99
100
|
{
|
|
@@ -109,13 +110,22 @@ pub const MessagePool = struct {
|
|
|
109
110
|
message.* = .{
|
|
110
111
|
.header = mem.bytesAsValue(Header, buffer[0..@sizeOf(Header)]),
|
|
111
112
|
.buffer = buffer,
|
|
112
|
-
.next =
|
|
113
|
+
.next = pool.free_list,
|
|
113
114
|
};
|
|
114
|
-
|
|
115
|
+
pool.free_list = message;
|
|
115
116
|
}
|
|
116
117
|
}
|
|
117
118
|
|
|
118
|
-
return
|
|
119
|
+
return pool;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/// Frees all messages that were unused or returned to the pool via unref().
|
|
123
|
+
pub fn deinit(pool: *MessagePool, allocator: mem.Allocator) void {
|
|
124
|
+
while (pool.free_list) |message| {
|
|
125
|
+
pool.free_list = message.next;
|
|
126
|
+
allocator.free(message.buffer);
|
|
127
|
+
allocator.destroy(message);
|
|
128
|
+
}
|
|
119
129
|
}
|
|
120
130
|
|
|
121
131
|
/// Get an unused message with a buffer of config.message_size_max.
|