tigerbeetle-node 0.10.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +302 -101
- package/dist/index.d.ts +70 -72
- package/dist/index.js +70 -72
- package/dist/index.js.map +1 -1
- package/package.json +9 -8
- package/scripts/download_node_headers.sh +14 -7
- package/src/index.ts +6 -10
- package/src/node.zig +6 -3
- package/src/tigerbeetle/scripts/benchmark.sh +4 -4
- package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
- package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
- package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
- package/src/tigerbeetle/scripts/install.sh +19 -4
- package/src/tigerbeetle/scripts/install_zig.bat +5 -1
- package/src/tigerbeetle/scripts/install_zig.sh +24 -14
- package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
- package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
- package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
- package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
- package/src/tigerbeetle/scripts/validate_docs.sh +17 -0
- package/src/tigerbeetle/src/benchmark.zig +29 -13
- package/src/tigerbeetle/src/c/tb_client/context.zig +248 -47
- package/src/tigerbeetle/src/c/tb_client/echo_client.zig +108 -0
- package/src/tigerbeetle/src/c/tb_client/packet.zig +2 -2
- package/src/tigerbeetle/src/c/tb_client/signal.zig +2 -4
- package/src/tigerbeetle/src/c/tb_client/thread.zig +17 -257
- package/src/tigerbeetle/src/c/tb_client.h +118 -84
- package/src/tigerbeetle/src/c/tb_client.zig +88 -23
- package/src/tigerbeetle/src/c/tb_client_header_test.zig +135 -0
- package/src/tigerbeetle/src/c/test.zig +371 -1
- package/src/tigerbeetle/src/cli.zig +37 -7
- package/src/tigerbeetle/src/config.zig +58 -17
- package/src/tigerbeetle/src/demo.zig +5 -2
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +1 -1
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +13 -0
- package/src/tigerbeetle/src/ewah.zig +11 -33
- package/src/tigerbeetle/src/ewah_benchmark.zig +8 -9
- package/src/tigerbeetle/src/io/linux.zig +1 -1
- package/src/tigerbeetle/src/lsm/README.md +308 -0
- package/src/tigerbeetle/src/lsm/binary_search.zig +137 -10
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +43 -0
- package/src/tigerbeetle/src/lsm/compaction.zig +376 -397
- package/src/tigerbeetle/src/lsm/composite_key.zig +2 -0
- package/src/tigerbeetle/src/lsm/eytzinger.zig +1 -1
- package/src/tigerbeetle/src/{eytzinger_benchmark.zig → lsm/eytzinger_benchmark.zig} +34 -21
- package/src/tigerbeetle/src/lsm/forest.zig +21 -447
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +414 -0
- package/src/tigerbeetle/src/lsm/grid.zig +170 -76
- package/src/tigerbeetle/src/lsm/groove.zig +197 -133
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +40 -18
- package/src/tigerbeetle/src/lsm/level_iterator.zig +28 -9
- package/src/tigerbeetle/src/lsm/manifest.zig +93 -180
- package/src/tigerbeetle/src/lsm/manifest_level.zig +161 -454
- package/src/tigerbeetle/src/lsm/manifest_log.zig +243 -356
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +665 -0
- package/src/tigerbeetle/src/lsm/node_pool.zig +4 -0
- package/src/tigerbeetle/src/lsm/posted_groove.zig +65 -76
- package/src/tigerbeetle/src/lsm/segmented_array.zig +580 -251
- package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
- package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +62 -12
- package/src/tigerbeetle/src/lsm/table.zig +115 -68
- package/src/tigerbeetle/src/lsm/table_immutable.zig +30 -23
- package/src/tigerbeetle/src/lsm/table_iterator.zig +27 -17
- package/src/tigerbeetle/src/lsm/table_mutable.zig +63 -12
- package/src/tigerbeetle/src/lsm/test.zig +61 -56
- package/src/tigerbeetle/src/lsm/tree.zig +450 -407
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +461 -0
- package/src/tigerbeetle/src/main.zig +83 -8
- package/src/tigerbeetle/src/message_bus.zig +20 -9
- package/src/tigerbeetle/src/message_pool.zig +22 -19
- package/src/tigerbeetle/src/ring_buffer.zig +7 -3
- package/src/tigerbeetle/src/simulator.zig +179 -119
- package/src/tigerbeetle/src/state_machine.zig +381 -246
- package/src/tigerbeetle/src/static_allocator.zig +65 -0
- package/src/tigerbeetle/src/storage.zig +3 -7
- package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
- package/src/tigerbeetle/src/test/accounting/workload.zig +823 -0
- package/src/tigerbeetle/src/test/cluster.zig +33 -81
- package/src/tigerbeetle/src/test/conductor.zig +366 -0
- package/src/tigerbeetle/src/test/fuzz.zig +121 -0
- package/src/tigerbeetle/src/test/id.zig +89 -0
- package/src/tigerbeetle/src/test/network.zig +45 -19
- package/src/tigerbeetle/src/test/packet_simulator.zig +40 -29
- package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
- package/src/tigerbeetle/src/test/state_checker.zig +91 -69
- package/src/tigerbeetle/src/test/state_machine.zig +11 -35
- package/src/tigerbeetle/src/test/storage.zig +470 -106
- package/src/tigerbeetle/src/test/storage_checker.zig +204 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +15 -16
- package/src/tigerbeetle/src/unit_tests.zig +13 -1
- package/src/tigerbeetle/src/util.zig +97 -11
- package/src/tigerbeetle/src/vopr.zig +495 -0
- package/src/tigerbeetle/src/vsr/client.zig +21 -3
- package/src/tigerbeetle/src/vsr/journal.zig +293 -212
- package/src/tigerbeetle/src/vsr/replica.zig +1086 -515
- package/src/tigerbeetle/src/vsr/superblock.zig +382 -637
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +14 -16
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +416 -153
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +332 -0
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +349 -0
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +62 -12
- package/src/tigerbeetle/src/vsr/superblock_quorums.zig +394 -0
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +312 -0
- package/src/tigerbeetle/src/vsr.zig +94 -60
- package/src/tigerbeetle/scripts/vopr.bat +0 -48
- package/src/tigerbeetle/scripts/vopr.sh +0 -33
- package/src/tigerbeetle/src/benchmark_array_search.zig +0 -317
- package/src/tigerbeetle/src/benchmarks/perf.zig +0 -299
|
@@ -23,7 +23,7 @@ pub const messages_max_replica = messages_max: {
|
|
|
23
23
|
var sum: usize = 0;
|
|
24
24
|
|
|
25
25
|
sum += config.io_depth_read + config.io_depth_write; // Journal I/O
|
|
26
|
-
sum += config.clients_max; //
|
|
26
|
+
sum += config.clients_max; // SuperBlock.client_table
|
|
27
27
|
sum += 1; // Replica.loopback_queue
|
|
28
28
|
sum += config.pipeline_max; // Replica.pipeline
|
|
29
29
|
sum += 1; // Replica.commit_prepare
|
|
@@ -37,7 +37,6 @@ pub const messages_max_replica = messages_max: {
|
|
|
37
37
|
// Handle Replica.commit_op's reply:
|
|
38
38
|
// (This is separate from the burst +1 because they may occur concurrently).
|
|
39
39
|
sum += 1;
|
|
40
|
-
sum += 20; // TODO Our network simulator allows up to 20 messages for path_capacity_max.
|
|
41
40
|
|
|
42
41
|
break :messages_max sum;
|
|
43
42
|
};
|
|
@@ -51,7 +50,6 @@ pub const messages_max_client = messages_max: {
|
|
|
51
50
|
sum += config.client_request_queue_max; // Client.request_queue
|
|
52
51
|
// Handle bursts (e.g. Connection.parse_message, or sending a ping when the send queue is full).
|
|
53
52
|
sum += 1;
|
|
54
|
-
sum += 20; // TODO Our network simulator allows up to 20 messages for path_capacity_max.
|
|
55
53
|
|
|
56
54
|
break :messages_max sum;
|
|
57
55
|
};
|
|
@@ -68,9 +66,7 @@ pub const MessagePool = struct {
|
|
|
68
66
|
pub const Message = struct {
|
|
69
67
|
// TODO: replace this with a header() function to save memory
|
|
70
68
|
header: *Header,
|
|
71
|
-
|
|
72
|
-
/// to perform Direct I/O is safe.
|
|
73
|
-
buffer: []u8,
|
|
69
|
+
buffer: []align(config.sector_size) u8,
|
|
74
70
|
references: u32 = 0,
|
|
75
71
|
next: ?*Message,
|
|
76
72
|
|
|
@@ -80,25 +76,27 @@ pub const MessagePool = struct {
|
|
|
80
76
|
return message;
|
|
81
77
|
}
|
|
82
78
|
|
|
83
|
-
pub fn body(message: *Message) []align(@
|
|
84
|
-
return @
|
|
85
|
-
@alignOf(Header),
|
|
86
|
-
message.buffer[@sizeOf(Header)..message.header.size],
|
|
87
|
-
);
|
|
79
|
+
pub fn body(message: *const Message) []align(@sizeOf(Header)) u8 {
|
|
80
|
+
return message.buffer[@sizeOf(Header)..message.header.size];
|
|
88
81
|
}
|
|
89
82
|
};
|
|
90
83
|
|
|
91
|
-
/// List of currently unused messages
|
|
84
|
+
/// List of currently unused messages.
|
|
92
85
|
free_list: ?*Message,
|
|
93
86
|
|
|
87
|
+
messages_max: usize,
|
|
88
|
+
|
|
94
89
|
pub fn init(allocator: mem.Allocator, process_type: vsr.ProcessType) error{OutOfMemory}!MessagePool {
|
|
95
|
-
|
|
90
|
+
return MessagePool.init_capacity(allocator, switch (process_type) {
|
|
96
91
|
.replica => messages_max_replica,
|
|
97
92
|
.client => messages_max_client,
|
|
98
|
-
};
|
|
93
|
+
});
|
|
94
|
+
}
|
|
99
95
|
|
|
100
|
-
|
|
96
|
+
pub fn init_capacity(allocator: mem.Allocator, messages_max: usize) error{OutOfMemory}!MessagePool {
|
|
97
|
+
var pool: MessagePool = .{
|
|
101
98
|
.free_list = null,
|
|
99
|
+
.messages_max = messages_max,
|
|
102
100
|
};
|
|
103
101
|
{
|
|
104
102
|
var i: usize = 0;
|
|
@@ -113,22 +111,27 @@ pub const MessagePool = struct {
|
|
|
113
111
|
message.* = .{
|
|
114
112
|
.header = mem.bytesAsValue(Header, buffer[0..@sizeOf(Header)]),
|
|
115
113
|
.buffer = buffer,
|
|
116
|
-
.next =
|
|
114
|
+
.next = pool.free_list,
|
|
117
115
|
};
|
|
118
|
-
|
|
116
|
+
pool.free_list = message;
|
|
119
117
|
}
|
|
120
118
|
}
|
|
121
119
|
|
|
122
|
-
return
|
|
120
|
+
return pool;
|
|
123
121
|
}
|
|
124
|
-
|
|
122
|
+
|
|
125
123
|
/// Frees all messages that were unused or returned to the pool via unref().
|
|
126
124
|
pub fn deinit(pool: *MessagePool, allocator: mem.Allocator) void {
|
|
125
|
+
var free_count: usize = 0;
|
|
127
126
|
while (pool.free_list) |message| {
|
|
128
127
|
pool.free_list = message.next;
|
|
129
128
|
allocator.free(message.buffer);
|
|
130
129
|
allocator.destroy(message);
|
|
130
|
+
free_count += 1;
|
|
131
131
|
}
|
|
132
|
+
// If the MessagePool is being deinitialized, all messages should have already been
|
|
133
|
+
// released to the pool.
|
|
134
|
+
assert(free_count == pool.messages_max);
|
|
132
135
|
}
|
|
133
136
|
|
|
134
137
|
/// Get an unused message with a buffer of config.message_size_max.
|
|
@@ -3,15 +3,19 @@ const assert = std.debug.assert;
|
|
|
3
3
|
const math = std.math;
|
|
4
4
|
const mem = std.mem;
|
|
5
5
|
|
|
6
|
+
const util = @import("util.zig");
|
|
7
|
+
|
|
6
8
|
/// A First In, First Out ring buffer holding at most `count_max` elements.
|
|
7
9
|
pub fn RingBuffer(
|
|
8
10
|
comptime T: type,
|
|
9
|
-
comptime
|
|
11
|
+
comptime count_max_: usize,
|
|
10
12
|
comptime buffer_type: enum { array, pointer },
|
|
11
13
|
) type {
|
|
12
14
|
return struct {
|
|
13
15
|
const Self = @This();
|
|
14
16
|
|
|
17
|
+
pub const count_max = count_max_;
|
|
18
|
+
|
|
15
19
|
buffer: switch (buffer_type) {
|
|
16
20
|
.array => [count_max]T,
|
|
17
21
|
.pointer => *[count_max]T,
|
|
@@ -145,8 +149,8 @@ pub fn RingBuffer(
|
|
|
145
149
|
const pre_wrap_count = math.min(items.len, self.buffer.len - pre_wrap_start);
|
|
146
150
|
const post_wrap_count = items.len - pre_wrap_count;
|
|
147
151
|
|
|
148
|
-
|
|
149
|
-
|
|
152
|
+
util.copy_disjoint(.inexact, T, self.buffer[pre_wrap_start..], items[0..pre_wrap_count]);
|
|
153
|
+
util.copy_disjoint(.exact, T, self.buffer[0..post_wrap_count], items[pre_wrap_count..]);
|
|
150
154
|
|
|
151
155
|
self.count += items.len;
|
|
152
156
|
}
|
|
@@ -3,16 +3,24 @@ const builtin = @import("builtin");
|
|
|
3
3
|
const assert = std.debug.assert;
|
|
4
4
|
const mem = std.mem;
|
|
5
5
|
|
|
6
|
+
const tb = @import("tigerbeetle.zig");
|
|
6
7
|
const config = @import("config.zig");
|
|
8
|
+
const vsr = @import("vsr.zig");
|
|
9
|
+
const Header = vsr.Header;
|
|
7
10
|
|
|
8
11
|
const Client = @import("test/cluster.zig").Client;
|
|
9
12
|
const Cluster = @import("test/cluster.zig").Cluster;
|
|
10
13
|
const ClusterOptions = @import("test/cluster.zig").ClusterOptions;
|
|
11
|
-
const Header = @import("vsr.zig").Header;
|
|
12
14
|
const Replica = @import("test/cluster.zig").Replica;
|
|
13
|
-
const StateChecker = @import("test/state_checker.zig").StateChecker;
|
|
14
15
|
const StateMachine = @import("test/cluster.zig").StateMachine;
|
|
16
|
+
const StateChecker = @import("test/state_checker.zig").StateChecker;
|
|
17
|
+
const StorageChecker = @import("test/storage_checker.zig").StorageChecker;
|
|
15
18
|
const PartitionMode = @import("test/packet_simulator.zig").PartitionMode;
|
|
19
|
+
const MessageBus = @import("test/message_bus.zig").MessageBus;
|
|
20
|
+
const auditor = @import("test/accounting/auditor.zig");
|
|
21
|
+
const Workload = @import("test/accounting/workload.zig").WorkloadType(StateMachine);
|
|
22
|
+
const Conductor = @import("test/conductor.zig").ConductorType(Client, MessageBus, StateMachine, Workload);
|
|
23
|
+
const IdPermutation = @import("test/id.zig").IdPermutation;
|
|
16
24
|
|
|
17
25
|
/// The `log` namespace in this root file is required to implement our custom `log` function.
|
|
18
26
|
const output = std.log.scoped(.state_checker);
|
|
@@ -21,21 +29,25 @@ const output = std.log.scoped(.state_checker);
|
|
|
21
29
|
/// This will run much slower but will trace all logic across the cluster.
|
|
22
30
|
const log_state_transitions_only = builtin.mode != .Debug;
|
|
23
31
|
|
|
24
|
-
const
|
|
25
|
-
const log_faults = std.log.scoped(.faults);
|
|
32
|
+
const log_simulator = std.log.scoped(.simulator);
|
|
26
33
|
|
|
27
34
|
/// You can fine tune your log levels even further (debug/info/notice/warn/err/crit/alert/emerg):
|
|
28
35
|
pub const log_level: std.log.Level = if (log_state_transitions_only) .info else .debug;
|
|
29
36
|
|
|
30
37
|
/// Modifies compile-time constants on "config.zig".
|
|
31
38
|
pub const deployment_environment = .simulation;
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
}
|
|
39
|
+
|
|
40
|
+
const cluster_id = 0;
|
|
35
41
|
|
|
36
42
|
var cluster: *Cluster = undefined;
|
|
43
|
+
var state_checker: *StateChecker = undefined;
|
|
44
|
+
var storage_checker: *StorageChecker = undefined;
|
|
37
45
|
|
|
38
46
|
pub fn main() !void {
|
|
47
|
+
comptime {
|
|
48
|
+
assert(config.deployment_environment == .simulation);
|
|
49
|
+
}
|
|
50
|
+
|
|
39
51
|
// This must be initialized at runtime as stderr is not comptime known on e.g. Windows.
|
|
40
52
|
log_buffer.unbuffered_writer = std.io.getStdErr().writer();
|
|
41
53
|
|
|
@@ -76,17 +88,25 @@ pub fn main() !void {
|
|
|
76
88
|
const client_count = 1 + random.uintLessThan(u8, config.clients_max);
|
|
77
89
|
const node_count = replica_count + client_count;
|
|
78
90
|
|
|
79
|
-
const ticks_max =
|
|
91
|
+
const ticks_max = 50_000_000;
|
|
80
92
|
const request_probability = 1 + random.uintLessThan(u8, 99);
|
|
81
93
|
const idle_on_probability = random.uintLessThan(u8, 20);
|
|
82
94
|
const idle_off_probability = 10 + random.uintLessThan(u8, 10);
|
|
83
95
|
|
|
96
|
+
// TODO: When block recovery and state transfer are implemented, remove this flag to allow
|
|
97
|
+
// crashes to coexist with WAL wraps.
|
|
98
|
+
const requests_committed_max: usize = config.journal_slot_count * 3;
|
|
99
|
+
|
|
84
100
|
const cluster_options: ClusterOptions = .{
|
|
85
|
-
.cluster =
|
|
101
|
+
.cluster = cluster_id,
|
|
86
102
|
.replica_count = replica_count,
|
|
87
103
|
.client_count = client_count,
|
|
104
|
+
// TODO Compute an upper-bound for this based on requests_committed_max.
|
|
105
|
+
.grid_size_max = 1024 * 1024 * 256,
|
|
88
106
|
.seed = random.int(u64),
|
|
89
|
-
.on_change_state =
|
|
107
|
+
.on_change_state = on_replica_change_state,
|
|
108
|
+
.on_compact = on_replica_compact,
|
|
109
|
+
.on_checkpoint = on_replica_checkpoint,
|
|
90
110
|
.network_options = .{
|
|
91
111
|
.packet_simulator_options = .{
|
|
92
112
|
.replica_count = replica_count,
|
|
@@ -117,21 +137,75 @@ pub fn main() !void {
|
|
|
117
137
|
.write_latency_mean = 3 + random.uintLessThan(u16, 100),
|
|
118
138
|
.read_fault_probability = random.uintLessThan(u8, 10),
|
|
119
139
|
.write_fault_probability = random.uintLessThan(u8, 10),
|
|
140
|
+
// TODO Allow WAL faults on crash when replica_count=1 when redundant-header-repair
|
|
141
|
+
// is implemented after recovering with decision=fix. Otherwise we can end up with
|
|
142
|
+
// multiple crashes faulting first a redundant headers, then a prepare, upgrading
|
|
143
|
+
// a decision=fix to decision=vsr.
|
|
144
|
+
.crash_fault_probability = if (replica_count == 1) 0 else 80 + random.uintLessThan(u8, 21),
|
|
145
|
+
.faulty_superblock = true,
|
|
120
146
|
},
|
|
121
147
|
.health_options = .{
|
|
122
|
-
.crash_probability = 0.
|
|
148
|
+
.crash_probability = 0.000001,
|
|
123
149
|
.crash_stability = random.uintLessThan(u32, 1_000),
|
|
124
|
-
.restart_probability = 0.
|
|
150
|
+
.restart_probability = 0.0001,
|
|
125
151
|
.restart_stability = random.uintLessThan(u32, 1_000),
|
|
126
152
|
},
|
|
127
153
|
.state_machine_options = .{
|
|
128
|
-
|
|
129
|
-
.
|
|
130
|
-
.
|
|
131
|
-
.
|
|
154
|
+
// TODO What should these fields be set to? Can they be randomized (and with what constraints)?
|
|
155
|
+
.lsm_forest_node_count = 4096,
|
|
156
|
+
.cache_entries_accounts = 2048,
|
|
157
|
+
.cache_entries_transfers = 2048,
|
|
158
|
+
.cache_entries_posted = 2048,
|
|
132
159
|
},
|
|
133
160
|
};
|
|
134
161
|
|
|
162
|
+
const workload_options: Workload.Options = .{
|
|
163
|
+
.auditor_options = .{
|
|
164
|
+
.accounts_max = 2 + random.uintLessThan(usize, 128),
|
|
165
|
+
.account_id_permutation = random_id_permutation(random),
|
|
166
|
+
.client_count = client_count,
|
|
167
|
+
.transfers_pending_max = 256,
|
|
168
|
+
.in_flight_max = Conductor.stalled_queue_capacity,
|
|
169
|
+
},
|
|
170
|
+
.transfer_id_permutation = random_id_permutation(random),
|
|
171
|
+
.operations = .{
|
|
172
|
+
.create_accounts = 1 + random.uintLessThan(usize, 10),
|
|
173
|
+
.create_transfers = 1 + random.uintLessThan(usize, 100),
|
|
174
|
+
.lookup_accounts = 1 + random.uintLessThan(usize, 20),
|
|
175
|
+
.lookup_transfers = 1 + random.uintLessThan(usize, 20),
|
|
176
|
+
},
|
|
177
|
+
.create_account_invalid_probability = 1,
|
|
178
|
+
.create_transfer_invalid_probability = 1,
|
|
179
|
+
.create_transfer_limit_probability = random.uintLessThan(u8, 101),
|
|
180
|
+
.create_transfer_pending_probability = 1 + random.uintLessThan(u8, 100),
|
|
181
|
+
.create_transfer_post_probability = 1 + random.uintLessThan(u8, 50),
|
|
182
|
+
.create_transfer_void_probability = 1 + random.uintLessThan(u8, 50),
|
|
183
|
+
.lookup_account_invalid_probability = 1,
|
|
184
|
+
.lookup_transfer = .{
|
|
185
|
+
.delivered = 1 + random.uintLessThan(usize, 10),
|
|
186
|
+
.sending = 1 + random.uintLessThan(usize, 10),
|
|
187
|
+
},
|
|
188
|
+
.lookup_transfer_span_mean = 10 + random.uintLessThan(usize, 1000),
|
|
189
|
+
.account_limit_probability = random.uintLessThan(u8, 80),
|
|
190
|
+
.linked_valid_probability = random.uintLessThan(u8, 101),
|
|
191
|
+
// 100% chance because this only applies to consecutive invalid transfers, which are rare.
|
|
192
|
+
.linked_invalid_probability = 100,
|
|
193
|
+
// TODO(Timeouts): When timeouts are implemented in the StateMachine, change this to the
|
|
194
|
+
// (commented out) value so that timeouts can actually trigger.
|
|
195
|
+
.pending_timeout_mean = std.math.maxInt(u64) / 2,
|
|
196
|
+
// .pending_timeout_mean = 1 + random.uintLessThan(usize, 1_000_000_000 / 4),
|
|
197
|
+
.accounts_batch_size_min = 0,
|
|
198
|
+
.accounts_batch_size_span = 1 + random.uintLessThan(
|
|
199
|
+
usize,
|
|
200
|
+
StateMachine.constants.batch_max.create_accounts,
|
|
201
|
+
),
|
|
202
|
+
.transfers_batch_size_min = 0,
|
|
203
|
+
.transfers_batch_size_span = 1 + random.uintLessThan(
|
|
204
|
+
usize,
|
|
205
|
+
StateMachine.constants.batch_max.create_transfers,
|
|
206
|
+
),
|
|
207
|
+
};
|
|
208
|
+
|
|
135
209
|
output.info(
|
|
136
210
|
\\
|
|
137
211
|
\\ SEED={}
|
|
@@ -163,10 +237,6 @@ pub fn main() !void {
|
|
|
163
237
|
\\ crash_stability={} ticks
|
|
164
238
|
\\ restart_probability={d}%
|
|
165
239
|
\\ restart_stability={} ticks
|
|
166
|
-
\\ prefetch_mean={} ticks
|
|
167
|
-
\\ compact_mean={} ticks
|
|
168
|
-
\\ checkpoint_mean={} ticks
|
|
169
|
-
\\
|
|
170
240
|
, .{
|
|
171
241
|
seed,
|
|
172
242
|
replica_count,
|
|
@@ -196,19 +266,46 @@ pub fn main() !void {
|
|
|
196
266
|
cluster_options.health_options.crash_stability,
|
|
197
267
|
cluster_options.health_options.restart_probability * 100,
|
|
198
268
|
cluster_options.health_options.restart_stability,
|
|
199
|
-
cluster_options.state_machine_options.prefetch_mean,
|
|
200
|
-
cluster_options.state_machine_options.compact_mean,
|
|
201
|
-
cluster_options.state_machine_options.checkpoint_mean,
|
|
202
269
|
});
|
|
203
270
|
|
|
204
271
|
cluster = try Cluster.create(allocator, random, cluster_options);
|
|
205
272
|
defer cluster.destroy();
|
|
206
273
|
|
|
207
|
-
|
|
208
|
-
defer
|
|
274
|
+
var workload = try Workload.init(allocator, random, workload_options);
|
|
275
|
+
defer workload.deinit(allocator);
|
|
209
276
|
|
|
210
|
-
var
|
|
211
|
-
|
|
277
|
+
var conductor = try Conductor.init(allocator, random, &workload, .{
|
|
278
|
+
.cluster = cluster_id,
|
|
279
|
+
.replica_count = replica_count,
|
|
280
|
+
.client_count = client_count,
|
|
281
|
+
.message_bus_options = .{ .network = &cluster.network },
|
|
282
|
+
.requests_max = requests_committed_max,
|
|
283
|
+
.request_probability = request_probability,
|
|
284
|
+
.idle_on_probability = idle_on_probability,
|
|
285
|
+
.idle_off_probability = idle_off_probability,
|
|
286
|
+
});
|
|
287
|
+
defer conductor.deinit(allocator);
|
|
288
|
+
|
|
289
|
+
for (conductor.clients) |*client| {
|
|
290
|
+
cluster.network.link(client.message_bus.process, &client.message_bus);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
state_checker = try allocator.create(StateChecker);
|
|
294
|
+
defer allocator.destroy(state_checker);
|
|
295
|
+
|
|
296
|
+
state_checker.* = try StateChecker.init(
|
|
297
|
+
allocator,
|
|
298
|
+
cluster_id,
|
|
299
|
+
cluster.replicas,
|
|
300
|
+
conductor.clients,
|
|
301
|
+
);
|
|
302
|
+
defer state_checker.deinit();
|
|
303
|
+
|
|
304
|
+
storage_checker = try allocator.create(StorageChecker);
|
|
305
|
+
defer allocator.destroy(storage_checker);
|
|
306
|
+
|
|
307
|
+
storage_checker.* = StorageChecker.init(allocator);
|
|
308
|
+
defer storage_checker.deinit();
|
|
212
309
|
|
|
213
310
|
// The minimum number of healthy replicas required for a crashed replica to be able to recover.
|
|
214
311
|
const replica_normal_min = replicas: {
|
|
@@ -226,8 +323,9 @@ pub fn main() !void {
|
|
|
226
323
|
storage.faulty = replica_normal_min <= i;
|
|
227
324
|
}
|
|
228
325
|
|
|
326
|
+
// The maximum number of transitions from calling `client.request()`, not including
|
|
327
|
+
// `register` messages.
|
|
229
328
|
// TODO When storage is supported, run more transitions than fit in the journal.
|
|
230
|
-
const transitions_max = config.journal_slot_count / 2;
|
|
231
329
|
var tick: u64 = 0;
|
|
232
330
|
while (tick < ticks_max) : (tick += 1) {
|
|
233
331
|
const health_options = &cluster.options.health_options;
|
|
@@ -247,27 +345,30 @@ pub fn main() !void {
|
|
|
247
345
|
// complete the VSR recovery protocol either.
|
|
248
346
|
if (cluster.health[replica] == .up and crashes == 0) {
|
|
249
347
|
if (storage.faulty) {
|
|
250
|
-
|
|
348
|
+
log_simulator.debug("{}: disable storage faults", .{replica});
|
|
251
349
|
storage.faulty = false;
|
|
252
350
|
}
|
|
253
351
|
} else {
|
|
254
352
|
// When a journal recovers for the first time, enable its storage faults.
|
|
255
353
|
// Future crashes will recover in the presence of faults.
|
|
256
354
|
if (!storage.faulty) {
|
|
257
|
-
|
|
355
|
+
log_simulator.debug("{}: enable storage faults", .{replica});
|
|
258
356
|
storage.faulty = true;
|
|
259
357
|
}
|
|
260
358
|
}
|
|
261
359
|
}
|
|
262
|
-
storage.tick();
|
|
263
360
|
}
|
|
264
361
|
|
|
265
|
-
for (cluster.replicas) |*replica| {
|
|
362
|
+
for (cluster.replicas) |*replica, index| {
|
|
266
363
|
switch (cluster.health[replica.replica]) {
|
|
267
364
|
.up => |*ticks| {
|
|
268
365
|
ticks.* -|= 1;
|
|
269
366
|
replica.tick();
|
|
270
|
-
cluster.
|
|
367
|
+
cluster.storages[index].tick();
|
|
368
|
+
|
|
369
|
+
state_checker.check_state(replica.replica) catch |err| {
|
|
370
|
+
fatal(.correctness, "state checker error: {}", .{err});
|
|
371
|
+
};
|
|
271
372
|
|
|
272
373
|
if (ticks.* != 0) continue;
|
|
273
374
|
if (crashes == 0) continue;
|
|
@@ -278,7 +379,7 @@ pub fn main() !void {
|
|
|
278
379
|
}
|
|
279
380
|
|
|
280
381
|
if (!try cluster.crash_replica(replica.replica)) continue;
|
|
281
|
-
|
|
382
|
+
log_simulator.debug("{}: crash replica", .{replica.replica});
|
|
282
383
|
crashes -= 1;
|
|
283
384
|
},
|
|
284
385
|
.down => |*ticks| {
|
|
@@ -289,59 +390,48 @@ pub fn main() !void {
|
|
|
289
390
|
assert(replica.status == .recovering);
|
|
290
391
|
if (ticks.* == 0 and chance_f64(random, health_options.restart_probability)) {
|
|
291
392
|
cluster.health[replica.replica] = .{ .up = health_options.restart_stability };
|
|
292
|
-
|
|
393
|
+
log_simulator.debug("{}: restart replica", .{replica.replica});
|
|
293
394
|
}
|
|
294
395
|
},
|
|
295
396
|
}
|
|
296
397
|
}
|
|
297
398
|
|
|
298
399
|
cluster.network.packet_simulator.tick(cluster.health);
|
|
400
|
+
conductor.tick();
|
|
299
401
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
cluster.replica_up_count() == replica_count)
|
|
305
|
-
{
|
|
306
|
-
break;
|
|
307
|
-
}
|
|
308
|
-
continue;
|
|
309
|
-
} else {
|
|
310
|
-
assert(cluster.state_checker.transitions < transitions_max);
|
|
402
|
+
if (state_checker.convergence() and conductor.done() and
|
|
403
|
+
cluster.replica_up_count() == replica_count)
|
|
404
|
+
{
|
|
405
|
+
break;
|
|
311
406
|
}
|
|
312
|
-
|
|
313
|
-
if (requests_sent < transitions_max) {
|
|
314
|
-
if (idle) {
|
|
315
|
-
if (chance(random, idle_off_probability)) idle = false;
|
|
316
|
-
} else {
|
|
317
|
-
if (chance(random, request_probability)) {
|
|
318
|
-
if (send_request(random)) requests_sent += 1;
|
|
319
|
-
}
|
|
320
|
-
if (chance(random, idle_on_probability)) idle = true;
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
if (cluster.state_checker.transitions < transitions_max) {
|
|
407
|
+
} else {
|
|
326
408
|
output.err("you can reproduce this failure with seed={}", .{seed});
|
|
327
|
-
|
|
409
|
+
fatal(.liveness, "unable to complete requests_committed_max before ticks_max", .{});
|
|
328
410
|
}
|
|
329
411
|
|
|
330
|
-
assert(
|
|
412
|
+
assert(state_checker.convergence());
|
|
413
|
+
assert(conductor.done());
|
|
331
414
|
|
|
332
415
|
output.info("\n PASSED ({} ticks)", .{tick});
|
|
333
416
|
}
|
|
334
417
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
418
|
+
pub const ExitCode = enum(u8) {
|
|
419
|
+
ok = 0,
|
|
420
|
+
crash = 127, // Any assertion crash will be given an exit code of 127 by default.
|
|
421
|
+
liveness = 128,
|
|
422
|
+
correctness = 129,
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
/// Print an error message and then exit with an exit code.
|
|
426
|
+
fn fatal(exit_code: ExitCode, comptime fmt_string: []const u8, args: anytype) noreturn {
|
|
427
|
+
output.err(fmt_string, args);
|
|
428
|
+
std.os.exit(@enumToInt(exit_code));
|
|
339
429
|
}
|
|
340
430
|
|
|
341
431
|
/// Returns true, `p` percent of the time, else false.
|
|
342
432
|
fn chance_f64(random: std.rand.Random, p: f64) bool {
|
|
343
433
|
assert(p <= 100.0);
|
|
344
|
-
return random.float(f64) < p;
|
|
434
|
+
return random.float(f64) * 100.0 < p;
|
|
345
435
|
}
|
|
346
436
|
|
|
347
437
|
/// Returns the next argument for the simulator or null (if none available)
|
|
@@ -350,62 +440,22 @@ fn args_next(args: *std.process.ArgIterator, allocator: std.mem.Allocator) ?[:0]
|
|
|
350
440
|
return err_or_bytes catch @panic("Unable to extract next value from args");
|
|
351
441
|
}
|
|
352
442
|
|
|
353
|
-
fn
|
|
354
|
-
|
|
443
|
+
fn on_replica_change_state(replica: *const Replica) void {
|
|
444
|
+
state_checker.check_state(replica.replica) catch |err| {
|
|
445
|
+
fatal(.correctness, "state checker error: {}", .{err});
|
|
446
|
+
};
|
|
355
447
|
}
|
|
356
448
|
|
|
357
|
-
fn
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
const client = &cluster.clients[client_index];
|
|
361
|
-
const checker_request_queue = &cluster.state_checker.client_requests[client_index];
|
|
362
|
-
|
|
363
|
-
// Ensure that we don't shortchange testing of the full client request queue length:
|
|
364
|
-
assert(client.request_queue.buffer.len <= checker_request_queue.buffer.len);
|
|
365
|
-
if (client.request_queue.full()) return false;
|
|
366
|
-
if (checker_request_queue.full()) return false;
|
|
367
|
-
|
|
368
|
-
const message = client.get_message();
|
|
369
|
-
defer client.unref(message);
|
|
370
|
-
|
|
371
|
-
const body_size_max = config.message_size_max - @sizeOf(Header);
|
|
372
|
-
const body_size: u32 = switch (random.uintLessThan(u8, 100)) {
|
|
373
|
-
0...10 => 0,
|
|
374
|
-
11...89 => random.uintLessThan(u32, body_size_max),
|
|
375
|
-
90...99 => body_size_max,
|
|
376
|
-
else => unreachable,
|
|
449
|
+
fn on_replica_compact(replica: *const Replica) void {
|
|
450
|
+
storage_checker.replica_compact(replica) catch |err| {
|
|
451
|
+
fatal(.correctness, "storage checker error: {}", .{err});
|
|
377
452
|
};
|
|
378
|
-
|
|
379
|
-
const body = message.buffer[@sizeOf(Header)..][0..body_size];
|
|
380
|
-
if (chance(random, 10)) {
|
|
381
|
-
std.mem.set(u8, body, 0);
|
|
382
|
-
} else {
|
|
383
|
-
random.bytes(body);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// While hashing the client ID with the request body prevents input collisions across clients,
|
|
387
|
-
// it's still possible for the same client to generate the same body, and therefore input hash.
|
|
388
|
-
const client_input = StateMachine.hash(client.id, body);
|
|
389
|
-
checker_request_queue.push_assume_capacity(client_input);
|
|
390
|
-
std.log.scoped(.test_client).debug("client {} sending input={x}", .{
|
|
391
|
-
client_index,
|
|
392
|
-
client_input,
|
|
393
|
-
});
|
|
394
|
-
|
|
395
|
-
client.request(0, client_callback, .hash, message, body_size);
|
|
396
|
-
|
|
397
|
-
return true;
|
|
398
453
|
}
|
|
399
454
|
|
|
400
|
-
fn
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
) void {
|
|
405
|
-
_ = operation;
|
|
406
|
-
_ = results catch unreachable;
|
|
407
|
-
|
|
408
|
-
assert(user_data == 0);
|
|
455
|
+
fn on_replica_checkpoint(replica: *const Replica) void {
|
|
456
|
+
storage_checker.replica_checkpoint(replica) catch |err| {
|
|
457
|
+
fatal(.correctness, "storage checker error: {}", .{err});
|
|
458
|
+
};
|
|
409
459
|
}
|
|
410
460
|
|
|
411
461
|
/// Returns a random partitioning mode, excluding .custom
|
|
@@ -416,7 +466,17 @@ fn random_partition_mode(random: std.rand.Random) PartitionMode {
|
|
|
416
466
|
return @intToEnum(PartitionMode, enumAsInt);
|
|
417
467
|
}
|
|
418
468
|
|
|
419
|
-
fn
|
|
469
|
+
fn random_id_permutation(random: std.rand.Random) IdPermutation {
|
|
470
|
+
return switch (random.uintLessThan(usize, 4)) {
|
|
471
|
+
0 => .{ .identity = {} },
|
|
472
|
+
1 => .{ .inversion = {} },
|
|
473
|
+
2 => .{ .zigzag = {} },
|
|
474
|
+
3 => .{ .random = random.int(u64) },
|
|
475
|
+
else => unreachable,
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
pub fn parse_seed(bytes: []const u8) u64 {
|
|
420
480
|
return std.fmt.parseUnsigned(u64, bytes, 10) catch |err| switch (err) {
|
|
421
481
|
error.Overflow => @panic("seed exceeds a 64-bit unsigned integer"),
|
|
422
482
|
error.InvalidCharacter => @panic("seed contains an invalid character"),
|