tigerbeetle-node 0.3.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/dist/benchmark.js +1 -1
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +22 -20
- package/dist/index.js +40 -18
- package/dist/index.js.map +1 -1
- package/dist/test.js +13 -1
- package/dist/test.js.map +1 -1
- package/package.json +12 -12
- package/scripts/postinstall.sh +2 -2
- package/src/benchmark.ts +4 -4
- package/src/index.ts +35 -9
- package/src/node.zig +139 -28
- package/src/test.ts +19 -5
- package/src/tigerbeetle/scripts/benchmark.sh +10 -3
- package/src/tigerbeetle/scripts/install.sh +2 -2
- package/src/tigerbeetle/scripts/install_zig.bat +109 -0
- package/src/tigerbeetle/scripts/install_zig.sh +21 -4
- package/src/tigerbeetle/scripts/vopr.bat +48 -0
- package/src/tigerbeetle/scripts/vopr.sh +33 -0
- package/src/tigerbeetle/src/benchmark.zig +74 -42
- package/src/tigerbeetle/src/cli.zig +136 -83
- package/src/tigerbeetle/src/config.zig +80 -26
- package/src/tigerbeetle/src/demo.zig +101 -78
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +2 -7
- package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +2 -7
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_04_create_transfers_two_phase_commit.zig +2 -5
- package/src/tigerbeetle/src/demo_05_accept_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_06_reject_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +8 -0
- package/src/tigerbeetle/src/fifo.zig +20 -11
- package/src/tigerbeetle/src/io.zig +35 -22
- package/src/tigerbeetle/src/io_darwin.zig +701 -0
- package/src/tigerbeetle/src/main.zig +72 -25
- package/src/tigerbeetle/src/message_bus.zig +379 -456
- package/src/tigerbeetle/src/message_pool.zig +3 -3
- package/src/tigerbeetle/src/ring_buffer.zig +192 -37
- package/src/tigerbeetle/src/simulator.zig +317 -0
- package/src/tigerbeetle/src/state_machine.zig +846 -38
- package/src/tigerbeetle/src/storage.zig +488 -90
- package/src/tigerbeetle/src/test/cluster.zig +221 -0
- package/src/tigerbeetle/src/test/message_bus.zig +92 -0
- package/src/tigerbeetle/src/test/network.zig +182 -0
- package/src/tigerbeetle/src/test/packet_simulator.zig +371 -0
- package/src/tigerbeetle/src/test/state_checker.zig +142 -0
- package/src/tigerbeetle/src/test/state_machine.zig +71 -0
- package/src/tigerbeetle/src/test/storage.zig +375 -0
- package/src/tigerbeetle/src/test/time.zig +84 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +6 -3
- package/src/tigerbeetle/src/time.zig +65 -0
- package/src/tigerbeetle/src/unit_tests.zig +14 -0
- package/src/tigerbeetle/src/vsr/client.zig +519 -0
- package/src/tigerbeetle/src/vsr/clock.zig +829 -0
- package/src/tigerbeetle/src/vsr/journal.zig +1368 -0
- package/src/tigerbeetle/src/vsr/marzullo.zig +306 -0
- package/src/tigerbeetle/src/vsr/replica.zig +4248 -0
- package/src/tigerbeetle/src/vsr.zig +601 -0
- package/src/tigerbeetle/LICENSE +0 -177
- package/src/tigerbeetle/README.md +0 -116
- package/src/tigerbeetle/src/client.zig +0 -319
- package/src/tigerbeetle/src/concurrent_ranges.zig +0 -162
- package/src/tigerbeetle/src/fixed_array_list.zig +0 -53
- package/src/tigerbeetle/src/io_async.zig +0 -600
- package/src/tigerbeetle/src/journal.zig +0 -567
- package/src/tigerbeetle/src/test_client.zig +0 -41
- package/src/tigerbeetle/src/test_main.zig +0 -118
- package/src/tigerbeetle/src/test_message_bus.zig +0 -132
- package/src/tigerbeetle/src/vr/journal.zig +0 -672
- package/src/tigerbeetle/src/vr/replica.zig +0 -3061
- package/src/tigerbeetle/src/vr.zig +0 -374
|
@@ -2,167 +2,220 @@ const std = @import("std");
|
|
|
2
2
|
const assert = std.debug.assert;
|
|
3
3
|
const fmt = std.fmt;
|
|
4
4
|
const mem = std.mem;
|
|
5
|
+
const meta = std.meta;
|
|
5
6
|
const net = std.net;
|
|
6
7
|
const os = std.os;
|
|
7
8
|
|
|
8
9
|
const config = @import("config.zig");
|
|
9
|
-
const
|
|
10
|
+
const vsr = @import("vsr.zig");
|
|
10
11
|
|
|
11
12
|
const usage = fmt.comptimePrint(
|
|
12
|
-
\\Usage:
|
|
13
|
+
\\Usage:
|
|
13
14
|
\\
|
|
14
|
-
\\ -h
|
|
15
|
+
\\ tigerbeetle [-h | --help]
|
|
16
|
+
\\
|
|
17
|
+
\\ tigerbeetle init [--directory=<path>] --cluster=<integer> --replica=<index>
|
|
18
|
+
\\
|
|
19
|
+
\\ tigerbeetle start [--directory=<path>] --cluster=<integer> --replica=<index> --addresses=<addresses>
|
|
20
|
+
\\
|
|
21
|
+
\\Commands:
|
|
22
|
+
\\
|
|
23
|
+
\\ init Create a new .tigerbeetle data file. Requires the --cluster and
|
|
24
|
+
\\ --replica options. The file will be created in the path set by
|
|
25
|
+
\\ the --directory option if provided. Otherwise, it will be created in
|
|
26
|
+
\\ the default {[default_directory]s}.
|
|
27
|
+
\\
|
|
28
|
+
\\ start Run a TigerBeetle replica as part of the cluster specified by the
|
|
29
|
+
\\ --cluster, --replica, and --addresses options. This requires an
|
|
30
|
+
\\ existing .tigerbeetle data file, either in the default
|
|
31
|
+
\\ {[default_directory]s} or the path set with --directory.
|
|
32
|
+
\\
|
|
33
|
+
\\Options:
|
|
34
|
+
\\
|
|
35
|
+
\\ -h, --help
|
|
15
36
|
\\ Print this help message and exit.
|
|
16
37
|
\\
|
|
17
|
-
\\
|
|
38
|
+
\\ --directory=<path>
|
|
39
|
+
\\ Set the directory used to store .tigerbeetle data files. If this option is
|
|
40
|
+
\\ omitted, the default {[default_directory]s} will be used.
|
|
41
|
+
\\
|
|
42
|
+
\\ --cluster=<integer>
|
|
43
|
+
\\ Set the cluster ID to the provided 32-bit unsigned integer.
|
|
18
44
|
\\
|
|
19
|
-
\\
|
|
20
|
-
\\ Set the
|
|
45
|
+
\\ --replica=<index>
|
|
46
|
+
\\ Set the zero-based index that will be used for this replica process.
|
|
47
|
+
\\ The value of this option will be interpreted as an index into the --addresses array.
|
|
21
48
|
\\
|
|
22
|
-
\\
|
|
49
|
+
\\ --addresses=<addresses>
|
|
23
50
|
\\ Set the addresses of all replicas in the cluster. Accepts a
|
|
24
51
|
\\ comma-separated list of IPv4 addresses with port numbers.
|
|
25
52
|
\\ Either the IPv4 address or port number, but not both, may be
|
|
26
53
|
\\ ommited in which case a default of {[default_address]s} or {[default_port]d}
|
|
27
54
|
\\ will be used.
|
|
28
55
|
\\
|
|
29
|
-
\\ --replica-index=<index>
|
|
30
|
-
\\ Set the address in the array passed to the --replica-addresses option that
|
|
31
|
-
\\ will be used for this replica process. The value of this option is
|
|
32
|
-
\\ interpreted as a zero-based index into the array.
|
|
33
|
-
\\
|
|
34
56
|
\\Examples:
|
|
35
57
|
\\
|
|
36
|
-
\\
|
|
58
|
+
\\ tigerbeetle init --cluster=0 --replica=0 --directory=/var/lib/tigerbeetle
|
|
59
|
+
\\ tigerbeetle init --cluster=0 --replica=1 --directory=/var/lib/tigerbeetle
|
|
60
|
+
\\ tigerbeetle init --cluster=0 --replica=2 --directory=/var/lib/tigerbeetle
|
|
37
61
|
\\
|
|
38
|
-
\\
|
|
62
|
+
\\ tigerbeetle start --cluster=0 --replica=0 --addresses=127.0.0.1:3003,127.0.0.1:3001,127.0.0.1:3002
|
|
63
|
+
\\ tigerbeetle start --cluster=0 --replica=1 --addresses=3003,3001,3002
|
|
64
|
+
\\ tigerbeetle start --cluster=0 --replica=2 --addresses=3003,3001,3002
|
|
39
65
|
\\
|
|
40
|
-
\\
|
|
66
|
+
\\ tigerbeetle start --cluster=1 --replica=0 --addresses=192.168.0.1,192.168.0.2,192.168.0.3
|
|
41
67
|
\\
|
|
42
68
|
, .{
|
|
69
|
+
.default_directory = config.directory,
|
|
43
70
|
.default_address = config.address,
|
|
44
71
|
.default_port = config.port,
|
|
45
72
|
});
|
|
46
73
|
|
|
47
|
-
pub const
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
74
|
+
pub const Command = union(enum) {
|
|
75
|
+
init: struct {
|
|
76
|
+
cluster: u32,
|
|
77
|
+
replica: u8,
|
|
78
|
+
dir_fd: os.fd_t,
|
|
79
|
+
},
|
|
80
|
+
start: struct {
|
|
81
|
+
cluster: u32,
|
|
82
|
+
replica: u8,
|
|
83
|
+
addresses: []net.Address,
|
|
84
|
+
dir_fd: os.fd_t,
|
|
85
|
+
},
|
|
51
86
|
};
|
|
52
87
|
|
|
53
88
|
/// Parse the command line arguments passed to the tigerbeetle binary.
|
|
54
89
|
/// Exits the program with a non-zero exit code if an error is found.
|
|
55
|
-
pub fn parse_args(allocator: *std.mem.Allocator)
|
|
90
|
+
pub fn parse_args(allocator: *std.mem.Allocator) Command {
|
|
56
91
|
var maybe_cluster: ?[]const u8 = null;
|
|
57
|
-
var maybe_configuration: ?[]const u8 = null;
|
|
58
92
|
var maybe_replica: ?[]const u8 = null;
|
|
93
|
+
var maybe_addresses: ?[]const u8 = null;
|
|
94
|
+
var maybe_directory: ?[:0]const u8 = null;
|
|
59
95
|
|
|
60
96
|
var args = std.process.args();
|
|
61
97
|
// Skip argv[0] which is the name of this executable
|
|
62
98
|
_ = args.nextPosix();
|
|
99
|
+
|
|
100
|
+
const raw_command = args.nextPosix() orelse
|
|
101
|
+
fatal("no command provided, expected 'start' or 'init'", .{});
|
|
102
|
+
if (mem.eql(u8, raw_command, "-h") or mem.eql(u8, raw_command, "--help")) {
|
|
103
|
+
std.io.getStdOut().writeAll(usage) catch os.exit(1);
|
|
104
|
+
os.exit(0);
|
|
105
|
+
}
|
|
106
|
+
const command = meta.stringToEnum(meta.Tag(Command), raw_command) orelse
|
|
107
|
+
fatal("unknown command '{s}', expected 'start' or 'init'", .{raw_command});
|
|
108
|
+
|
|
63
109
|
while (args.nextPosix()) |arg| {
|
|
64
|
-
if (mem.startsWith(u8, arg, "--cluster
|
|
65
|
-
maybe_cluster = parse_flag("--cluster
|
|
66
|
-
} else if (mem.startsWith(u8, arg, "--replica
|
|
67
|
-
|
|
68
|
-
} else if (mem.startsWith(u8, arg, "--
|
|
69
|
-
|
|
110
|
+
if (mem.startsWith(u8, arg, "--cluster")) {
|
|
111
|
+
maybe_cluster = parse_flag("--cluster", arg);
|
|
112
|
+
} else if (mem.startsWith(u8, arg, "--replica")) {
|
|
113
|
+
maybe_replica = parse_flag("--replica", arg);
|
|
114
|
+
} else if (mem.startsWith(u8, arg, "--addresses")) {
|
|
115
|
+
maybe_addresses = parse_flag("--addresses", arg);
|
|
116
|
+
} else if (mem.startsWith(u8, arg, "--directory")) {
|
|
117
|
+
maybe_directory = parse_flag("--directory", arg);
|
|
70
118
|
} else if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) {
|
|
71
119
|
std.io.getStdOut().writeAll(usage) catch os.exit(1);
|
|
72
120
|
os.exit(0);
|
|
73
121
|
} else {
|
|
74
|
-
|
|
122
|
+
fatal("unexpected argument: '{s}'", .{arg});
|
|
75
123
|
}
|
|
76
124
|
}
|
|
77
125
|
|
|
78
|
-
const raw_cluster = maybe_cluster orelse
|
|
79
|
-
|
|
80
|
-
const raw_configuration = maybe_configuration orelse
|
|
81
|
-
print_error_exit("required argument: --replica-addresses", .{});
|
|
82
|
-
const raw_replica = maybe_replica orelse
|
|
83
|
-
print_error_exit("required argument: --replica-index", .{});
|
|
126
|
+
const raw_cluster = maybe_cluster orelse fatal("required argument: --cluster", .{});
|
|
127
|
+
const raw_replica = maybe_replica orelse fatal("required argument: --replica", .{});
|
|
84
128
|
|
|
85
129
|
const cluster = parse_cluster(raw_cluster);
|
|
86
|
-
const
|
|
87
|
-
const replica = parse_replica(raw_replica, @intCast(u16, configuration.len));
|
|
130
|
+
const replica = parse_replica(raw_replica);
|
|
88
131
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
.
|
|
92
|
-
|
|
93
|
-
|
|
132
|
+
const dir_path = maybe_directory orelse config.directory;
|
|
133
|
+
const dir_fd = os.openZ(dir_path, os.O_CLOEXEC | os.O_RDONLY, 0) catch |err|
|
|
134
|
+
fatal("failed to open directory '{s}': {}", .{ dir_path, err });
|
|
135
|
+
|
|
136
|
+
switch (command) {
|
|
137
|
+
.init => {
|
|
138
|
+
if (maybe_addresses != null) {
|
|
139
|
+
fatal("--addresses: supported only by 'start' command", .{});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return .{ .init = .{
|
|
143
|
+
.cluster = cluster,
|
|
144
|
+
.replica = replica,
|
|
145
|
+
.dir_fd = dir_fd,
|
|
146
|
+
} };
|
|
147
|
+
},
|
|
148
|
+
.start => {
|
|
149
|
+
const raw_addresses = maybe_addresses orelse
|
|
150
|
+
fatal("required argument: --addresses", .{});
|
|
151
|
+
const addresses = parse_addresses(allocator, raw_addresses);
|
|
152
|
+
|
|
153
|
+
if (replica >= addresses.len) {
|
|
154
|
+
fatal("--replica: value greater than length of --addresses array", .{});
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return .{ .start = .{
|
|
158
|
+
.cluster = cluster,
|
|
159
|
+
.replica = replica,
|
|
160
|
+
.addresses = addresses,
|
|
161
|
+
.dir_fd = dir_fd,
|
|
162
|
+
} };
|
|
163
|
+
},
|
|
164
|
+
}
|
|
94
165
|
}
|
|
95
166
|
|
|
96
167
|
/// Format and print an error message followed by the usage string to stderr,
|
|
97
168
|
/// then exit with an exit code of 1.
|
|
98
|
-
fn
|
|
169
|
+
fn fatal(comptime fmt_string: []const u8, args: anytype) noreturn {
|
|
99
170
|
const stderr = std.io.getStdErr().writer();
|
|
100
|
-
stderr.print("error: " ++ fmt_string ++ "\n
|
|
171
|
+
stderr.print("error: " ++ fmt_string ++ "\n", args) catch {};
|
|
101
172
|
os.exit(1);
|
|
102
173
|
}
|
|
103
174
|
|
|
104
175
|
/// Parse e.g. `--cluster=1a2b3c` into `1a2b3c` with error handling.
|
|
105
|
-
fn parse_flag(comptime flag: []const u8, arg: []const u8) []const u8 {
|
|
176
|
+
fn parse_flag(comptime flag: []const u8, arg: [:0]const u8) [:0]const u8 {
|
|
106
177
|
const value = arg[flag.len..];
|
|
107
178
|
if (value.len < 2) {
|
|
108
|
-
|
|
179
|
+
fatal("{s} argument requires a value", .{flag});
|
|
109
180
|
}
|
|
110
181
|
if (value[0] != '=') {
|
|
111
|
-
|
|
182
|
+
fatal("expected '=' after {s} but found '{c}'", .{ flag, value[0] });
|
|
112
183
|
}
|
|
113
184
|
return value[1..];
|
|
114
185
|
}
|
|
115
186
|
|
|
116
|
-
fn parse_cluster(raw_cluster: []const u8)
|
|
117
|
-
const cluster = fmt.parseUnsigned(
|
|
118
|
-
error.Overflow =>
|
|
119
|
-
|
|
120
|
-
, .{}),
|
|
121
|
-
error.InvalidCharacter => print_error_exit(
|
|
122
|
-
\\--cluster-id: value contains an invalid character
|
|
123
|
-
, .{}),
|
|
187
|
+
fn parse_cluster(raw_cluster: []const u8) u32 {
|
|
188
|
+
const cluster = fmt.parseUnsigned(u32, raw_cluster, 10) catch |err| switch (err) {
|
|
189
|
+
error.Overflow => fatal("--cluster: value exceeds a 32-bit unsigned integer", .{}),
|
|
190
|
+
error.InvalidCharacter => fatal("--cluster: value contains an invalid character", .{}),
|
|
124
191
|
};
|
|
125
|
-
if (cluster == 0) {
|
|
126
|
-
print_error_exit("--cluster-id: a value of 0 is not permitted", .{});
|
|
127
|
-
}
|
|
128
192
|
return cluster;
|
|
129
193
|
}
|
|
130
194
|
|
|
131
|
-
/// Parse and allocate the
|
|
132
|
-
fn
|
|
133
|
-
return
|
|
134
|
-
error.AddressHasTrailingComma => {
|
|
135
|
-
print_error_exit("--replica-addresses: invalid trailing comma", .{});
|
|
136
|
-
},
|
|
195
|
+
/// Parse and allocate the addresses returning a slice into that array.
|
|
196
|
+
fn parse_addresses(allocator: *std.mem.Allocator, raw_addresses: []const u8) []net.Address {
|
|
197
|
+
return vsr.parse_addresses(allocator, raw_addresses) catch |err| switch (err) {
|
|
198
|
+
error.AddressHasTrailingComma => fatal("--addresses: invalid trailing comma", .{}),
|
|
137
199
|
error.AddressLimitExceeded => {
|
|
138
|
-
|
|
200
|
+
fatal("--addresses: too many addresses, at most {d} are allowed", .{
|
|
139
201
|
config.replicas_max,
|
|
140
202
|
});
|
|
141
203
|
},
|
|
142
204
|
error.AddressHasMoreThanOneColon => {
|
|
143
|
-
|
|
205
|
+
fatal("--addresses: invalid address with more than one colon", .{});
|
|
144
206
|
},
|
|
145
|
-
error.PortOverflow =>
|
|
146
|
-
error.PortInvalid =>
|
|
147
|
-
error.AddressInvalid =>
|
|
148
|
-
error.OutOfMemory =>
|
|
207
|
+
error.PortOverflow => fatal("--addresses: port exceeds 65535", .{}),
|
|
208
|
+
error.PortInvalid => fatal("--addresses: invalid port", .{}),
|
|
209
|
+
error.AddressInvalid => fatal("--addresses: invalid IPv4 address", .{}),
|
|
210
|
+
error.OutOfMemory => fatal("--addresses: out of memory", .{}),
|
|
149
211
|
};
|
|
150
212
|
}
|
|
151
213
|
|
|
152
|
-
fn parse_replica(raw_replica: []const u8
|
|
153
|
-
comptime assert(config.replicas_max <= std.math.maxInt(
|
|
154
|
-
const replica = fmt.parseUnsigned(
|
|
155
|
-
error.Overflow =>
|
|
156
|
-
|
|
157
|
-
, .{}),
|
|
158
|
-
error.InvalidCharacter => print_error_exit(
|
|
159
|
-
\\--replica-index: value contains an invalid character
|
|
160
|
-
, .{}),
|
|
214
|
+
fn parse_replica(raw_replica: []const u8) u8 {
|
|
215
|
+
comptime assert(config.replicas_max <= std.math.maxInt(u8));
|
|
216
|
+
const replica = fmt.parseUnsigned(u8, raw_replica, 10) catch |err| switch (err) {
|
|
217
|
+
error.Overflow => fatal("--replica: value exceeds an 8-bit unsigned integer", .{}),
|
|
218
|
+
error.InvalidCharacter => fatal("--replica: value contains an invalid character", .{}),
|
|
161
219
|
};
|
|
162
|
-
if (replica >= configuration_len) {
|
|
163
|
-
print_error_exit(
|
|
164
|
-
\\--replica-index: value greater than length of address array
|
|
165
|
-
, .{});
|
|
166
|
-
}
|
|
167
220
|
return replica;
|
|
168
221
|
}
|
|
@@ -5,28 +5,35 @@ pub const deployment_environment = .development;
|
|
|
5
5
|
pub const log_level = 6;
|
|
6
6
|
|
|
7
7
|
/// The maximum number of replicas allowed in a cluster.
|
|
8
|
-
pub const replicas_max =
|
|
8
|
+
pub const replicas_max = 6;
|
|
9
9
|
|
|
10
|
-
/// The
|
|
11
|
-
///
|
|
12
|
-
///
|
|
13
|
-
///
|
|
14
|
-
|
|
10
|
+
/// The maximum number of clients allowed per cluster, where each client has a unique 128-bit ID.
|
|
11
|
+
/// This impacts the amount of memory allocated at initialization by the server.
|
|
12
|
+
/// This determines the size of the VR client table used to cache replies to clients by client ID.
|
|
13
|
+
/// Each client has one entry in the VR client table to store the latest `message_size_max` reply.
|
|
14
|
+
pub const clients_max = 32;
|
|
15
|
+
|
|
16
|
+
/// The minimum number of nodes required to form a quorum for replication:
|
|
17
|
+
/// Majority quorums are only required across view change and replication phases (not within).
|
|
18
|
+
/// As per Flexible Paxos, provided `quorum_replication + quorum_view_change > replicas`:
|
|
19
|
+
/// 1. you may increase `quorum_view_change` above a majority, so that
|
|
20
|
+
/// 2. you can decrease `quorum_replication` below a majority, to optimize the common case.
|
|
15
21
|
/// This improves latency by reducing the number of nodes required for synchronous replication.
|
|
16
22
|
/// This reduces redundancy only in the short term, asynchronous replication will still continue.
|
|
17
|
-
|
|
18
|
-
|
|
23
|
+
/// The size of the replication quorum is limited to the minimum of this value and actual majority.
|
|
24
|
+
/// The size of the view change quorum will then be automatically inferred from quorum_replication.
|
|
25
|
+
pub const quorum_replication_max = 3;
|
|
19
26
|
|
|
20
|
-
/// The default server port to listen on if not specified in `--
|
|
27
|
+
/// The default server port to listen on if not specified in `--addresses`:
|
|
21
28
|
pub const port = 3001;
|
|
22
29
|
|
|
23
|
-
/// The default network interface address to listen on if not specified in `--
|
|
30
|
+
/// The default network interface address to listen on if not specified in `--addresses`:
|
|
24
31
|
/// WARNING: Binding to all interfaces with "0.0.0.0" is dangerous and opens the server to anyone.
|
|
25
32
|
/// Bind to the "127.0.0.1" loopback address to accept local connections as a safe default only.
|
|
26
33
|
pub const address = "127.0.0.1";
|
|
27
34
|
|
|
28
|
-
/// Where
|
|
29
|
-
pub const
|
|
35
|
+
/// Where data files should be persisted by default:
|
|
36
|
+
pub const directory = "/var/lib/tigerbeetle";
|
|
30
37
|
|
|
31
38
|
/// The maximum number of accounts to store in memory:
|
|
32
39
|
/// This impacts the amount of memory allocated at initialization by the server.
|
|
@@ -54,7 +61,7 @@ pub const commits_max = transfers_max;
|
|
|
54
61
|
/// This also enables us to detect filesystem inode corruption that would change the journal size.
|
|
55
62
|
pub const journal_size_max = switch (deployment_environment) {
|
|
56
63
|
.production => 128 * 1024 * 1024 * 1024,
|
|
57
|
-
else =>
|
|
64
|
+
else => 128 * 1024 * 1024,
|
|
58
65
|
};
|
|
59
66
|
|
|
60
67
|
/// The maximum number of batch entries in the journal file:
|
|
@@ -66,33 +73,37 @@ pub const journal_headers_max = switch (deployment_environment) {
|
|
|
66
73
|
else => 16384,
|
|
67
74
|
};
|
|
68
75
|
|
|
69
|
-
/// The maximum number of connections that can be
|
|
70
|
-
pub const connections_max =
|
|
76
|
+
/// The maximum number of connections that can be held open by the server at any time:
|
|
77
|
+
pub const connections_max = replicas_max + clients_max;
|
|
71
78
|
|
|
72
79
|
/// The maximum size of a message in bytes:
|
|
73
80
|
/// This is also the limit of all inflight data across multiple pipelined requests per connection.
|
|
74
|
-
/// We may have one request of up to
|
|
81
|
+
/// We may have one request of up to 2 MiB inflight or 2 pipelined requests of up to 1 MiB inflight.
|
|
75
82
|
/// This impacts sequential disk write throughput, the larger the buffer the better.
|
|
76
|
-
///
|
|
83
|
+
/// 2 MiB is 16,384 transfers, and a reasonable choice for sequential disk write throughput.
|
|
77
84
|
/// However, this impacts bufferbloat and head-of-line blocking latency for pipelined requests.
|
|
78
|
-
/// For a 1 Gbps NIC = 125 MiB/s throughput:
|
|
79
|
-
/// This
|
|
80
|
-
pub const message_size_max =
|
|
85
|
+
/// For a 1 Gbps NIC = 125 MiB/s throughput: 2 MiB / 125 * 1000ms = 16ms for the next request.
|
|
86
|
+
/// This impacts the amount of memory allocated at initialization by the server.
|
|
87
|
+
pub const message_size_max = 1 * 1024 * 1024;
|
|
81
88
|
|
|
82
89
|
/// The number of full-sized messages allocated at initialization by the message bus.
|
|
83
90
|
pub const message_bus_messages_max = connections_max * 4;
|
|
84
91
|
/// The number of header-sized messages allocated at initialization by the message bus.
|
|
85
92
|
/// These are much smaller/cheaper and we can therefore have many of them.
|
|
86
|
-
pub const message_bus_headers_max = connections_max * connection_send_queue_max;
|
|
93
|
+
pub const message_bus_headers_max = connections_max * connection_send_queue_max * 2;
|
|
94
|
+
|
|
95
|
+
/// The maximum number of Viewstamped Replication prepare messages that can be inflight at a time.
|
|
96
|
+
/// This is immutable once assigned per cluster, as replicas need to know how many operations might
|
|
97
|
+
/// possibly be uncommitted during a view change, and this must be constant for all replicas.
|
|
98
|
+
pub const pipelining_max = clients_max;
|
|
87
99
|
|
|
88
100
|
/// The minimum and maximum amount of time in milliseconds to wait before initiating a connection.
|
|
89
|
-
/// Exponential backoff and
|
|
90
|
-
|
|
91
|
-
pub const
|
|
92
|
-
pub const connection_delay_max = 1000;
|
|
101
|
+
/// Exponential backoff and jitter are applied within this range.
|
|
102
|
+
pub const connection_delay_min_ms = 50;
|
|
103
|
+
pub const connection_delay_max_ms = 1000;
|
|
93
104
|
|
|
94
105
|
/// The maximum number of outgoing messages that may be queued on a connection.
|
|
95
|
-
pub const connection_send_queue_max =
|
|
106
|
+
pub const connection_send_queue_max = pipelining_max;
|
|
96
107
|
|
|
97
108
|
/// The maximum number of connections in the kernel's complete connection queue pending an accept():
|
|
98
109
|
/// If the backlog argument is greater than the value in `/proc/sys/net/core/somaxconn`, then it is
|
|
@@ -161,6 +172,49 @@ pub const sector_size = 4096;
|
|
|
161
172
|
/// when they were never written to disk.
|
|
162
173
|
pub const direct_io = true;
|
|
163
174
|
|
|
175
|
+
/// The maximum number of concurrent read I/O operations to allow at once.
|
|
176
|
+
pub const io_depth_read = 8;
|
|
177
|
+
/// The maximum number of concurrent write I/O operations to allow at once.
|
|
178
|
+
pub const io_depth_write = 8;
|
|
179
|
+
|
|
164
180
|
/// The number of milliseconds between each replica tick, the basic unit of time in TigerBeetle.
|
|
165
181
|
/// Used to regulate heartbeats, retries and timeouts, all specified as multiples of a tick.
|
|
166
182
|
pub const tick_ms = 10;
|
|
183
|
+
|
|
184
|
+
/// The conservative round-trip time at startup when there is no network knowledge.
|
|
185
|
+
/// Adjusted dynamically thereafter for RTT-sensitive timeouts according to network congestion.
|
|
186
|
+
/// This should be set higher rather than lower to avoid flooding the network at startup.
|
|
187
|
+
pub const rtt_ticks = 300 / tick_ms;
|
|
188
|
+
|
|
189
|
+
/// The multiple of round-trip time for RTT-sensitive timeouts.
|
|
190
|
+
pub const rtt_multiple = 2;
|
|
191
|
+
|
|
192
|
+
/// The min/max bounds of exponential backoff (and jitter) to add to RTT-sensitive timeouts.
|
|
193
|
+
pub const backoff_min_ticks = 100 / tick_ms;
|
|
194
|
+
pub const backoff_max_ticks = 10000 / tick_ms;
|
|
195
|
+
|
|
196
|
+
/// The maximum skew between two clocks to allow when considering them to be in agreement.
|
|
197
|
+
/// The principle is that no two clocks tick exactly alike but some clocks more or less agree.
|
|
198
|
+
/// The maximum skew across the cluster as a whole is this value times the total number of clocks.
|
|
199
|
+
/// The cluster will be unavailable if the majority of clocks are all further than this value apart.
|
|
200
|
+
/// Decreasing this reduces the probability of reaching agreement on synchronized time.
|
|
201
|
+
/// Increasing this reduces the accuracy of synchronized time.
|
|
202
|
+
pub const clock_offset_tolerance_max_ms = 10000;
|
|
203
|
+
|
|
204
|
+
/// The amount of time before the clock's synchronized epoch is expired.
|
|
205
|
+
/// If the epoch is expired before it can be replaced with a new synchronized epoch, then this most
|
|
206
|
+
/// likely indicates either a network partition or else too many clock faults across the cluster.
|
|
207
|
+
/// A new synchronized epoch will be installed as soon as these conditions resolve.
|
|
208
|
+
pub const clock_epoch_max_ms = 60000;
|
|
209
|
+
|
|
210
|
+
/// The amount of time to wait for enough accurate samples before synchronizing the clock.
|
|
211
|
+
/// The more samples we can take per remote clock source, the more accurate our estimation becomes.
|
|
212
|
+
/// This impacts cluster startup time as the leader must first wait for synchronization to complete.
|
|
213
|
+
pub const clock_synchronization_window_min_ms = 2000;
|
|
214
|
+
|
|
215
|
+
/// The amount of time without agreement before the clock window is expired and a new window opened.
|
|
216
|
+
/// This happens where some samples have been collected but not enough to reach agreement.
|
|
217
|
+
/// The quality of samples degrades as they age so at some point we throw them away and start over.
|
|
218
|
+
/// This eliminates the impact of gradual clock drift on our clock offset (clock skew) measurements.
|
|
219
|
+
/// If a window expires because of this then it is likely that the clock epoch will also be expired.
|
|
220
|
+
pub const clock_synchronization_window_max_ms = 20000;
|