tigerbeetle-node 0.11.12 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +212 -196
- package/dist/bin/aarch64-linux-gnu/client.node +0 -0
- package/dist/bin/aarch64-linux-musl/client.node +0 -0
- package/dist/bin/aarch64-macos/client.node +0 -0
- package/dist/bin/x86_64-linux-gnu/client.node +0 -0
- package/dist/bin/x86_64-linux-musl/client.node +0 -0
- package/dist/bin/x86_64-macos/client.node +0 -0
- package/dist/index.js +33 -1
- package/dist/index.js.map +1 -1
- package/package-lock.json +66 -0
- package/package.json +8 -17
- package/src/index.ts +56 -1
- package/src/node.zig +10 -9
- package/dist/.client.node.sha256 +0 -1
- package/scripts/build_lib.sh +0 -61
- package/scripts/download_node_headers.sh +0 -32
- package/src/tigerbeetle/scripts/benchmark.bat +0 -48
- package/src/tigerbeetle/scripts/benchmark.sh +0 -66
- package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
- package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
- package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
- package/src/tigerbeetle/scripts/install.bat +0 -7
- package/src/tigerbeetle/scripts/install.sh +0 -21
- package/src/tigerbeetle/scripts/install_zig.bat +0 -113
- package/src/tigerbeetle/scripts/install_zig.sh +0 -90
- package/src/tigerbeetle/scripts/lint.zig +0 -199
- package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
- package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -48
- package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
- package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
- package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
- package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
- package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
- package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
- package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
- package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
- package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
- package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
- package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
- package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
- package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
- package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
- package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
- package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
- package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
- package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
- package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
- package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
- package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
- package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
- package/src/tigerbeetle/src/benchmark.zig +0 -314
- package/src/tigerbeetle/src/config.zig +0 -234
- package/src/tigerbeetle/src/constants.zig +0 -436
- package/src/tigerbeetle/src/ewah.zig +0 -286
- package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
- package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
- package/src/tigerbeetle/src/fifo.zig +0 -120
- package/src/tigerbeetle/src/io/benchmark.zig +0 -213
- package/src/tigerbeetle/src/io/darwin.zig +0 -814
- package/src/tigerbeetle/src/io/linux.zig +0 -1062
- package/src/tigerbeetle/src/io/test.zig +0 -643
- package/src/tigerbeetle/src/io/windows.zig +0 -1183
- package/src/tigerbeetle/src/io.zig +0 -34
- package/src/tigerbeetle/src/iops.zig +0 -107
- package/src/tigerbeetle/src/lsm/README.md +0 -308
- package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
- package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
- package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
- package/src/tigerbeetle/src/lsm/direction.zig +0 -11
- package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
- package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
- package/src/tigerbeetle/src/lsm/forest.zig +0 -204
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -401
- package/src/tigerbeetle/src/lsm/grid.zig +0 -573
- package/src/tigerbeetle/src/lsm/groove.zig +0 -972
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
- package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
- package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
- package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -877
- package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
- package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
- package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
- package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -378
- package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1328
- package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
- package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
- package/src/tigerbeetle/src/lsm/table.zig +0 -1031
- package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -203
- package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
- package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -220
- package/src/tigerbeetle/src/lsm/test.zig +0 -438
- package/src/tigerbeetle/src/lsm/tree.zig +0 -1193
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -474
- package/src/tigerbeetle/src/message_bus.zig +0 -1012
- package/src/tigerbeetle/src/message_pool.zig +0 -156
- package/src/tigerbeetle/src/ring_buffer.zig +0 -399
- package/src/tigerbeetle/src/simulator.zig +0 -569
- package/src/tigerbeetle/src/state_machine/auditor.zig +0 -577
- package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
- package/src/tigerbeetle/src/state_machine.zig +0 -1881
- package/src/tigerbeetle/src/static_allocator.zig +0 -65
- package/src/tigerbeetle/src/stdx.zig +0 -162
- package/src/tigerbeetle/src/storage.zig +0 -393
- package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
- package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
- package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
- package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
- package/src/tigerbeetle/src/testing/cluster.zig +0 -443
- package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
- package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
- package/src/tigerbeetle/src/testing/id.zig +0 -99
- package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -364
- package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
- package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
- package/src/tigerbeetle/src/testing/state_machine.zig +0 -249
- package/src/tigerbeetle/src/testing/storage.zig +0 -757
- package/src/tigerbeetle/src/testing/table.zig +0 -247
- package/src/tigerbeetle/src/testing/time.zig +0 -84
- package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
- package/src/tigerbeetle/src/time.zig +0 -112
- package/src/tigerbeetle/src/tracer.zig +0 -529
- package/src/tigerbeetle/src/unit_tests.zig +0 -42
- package/src/tigerbeetle/src/vopr.zig +0 -495
- package/src/tigerbeetle/src/vsr/README.md +0 -209
- package/src/tigerbeetle/src/vsr/client.zig +0 -544
- package/src/tigerbeetle/src/vsr/clock.zig +0 -853
- package/src/tigerbeetle/src/vsr/journal.zig +0 -2413
- package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
- package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
- package/src/tigerbeetle/src/vsr/replica.zig +0 -6381
- package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
- package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
- package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
- package/src/tigerbeetle/src/vsr.zig +0 -1352
|
@@ -1,569 +0,0 @@
|
|
|
1
|
-
const std = @import("std");
|
|
2
|
-
const builtin = @import("builtin");
|
|
3
|
-
const assert = std.debug.assert;
|
|
4
|
-
const mem = std.mem;
|
|
5
|
-
|
|
6
|
-
const tb = @import("tigerbeetle.zig");
|
|
7
|
-
const constants = @import("constants.zig");
|
|
8
|
-
const vsr = @import("vsr.zig");
|
|
9
|
-
const Header = vsr.Header;
|
|
10
|
-
|
|
11
|
-
const state_machine = @import("vsr_simulator_options").state_machine;
|
|
12
|
-
const StateMachineType = switch (state_machine) {
|
|
13
|
-
.accounting => @import("state_machine.zig").StateMachineType,
|
|
14
|
-
.testing => @import("testing/state_machine.zig").StateMachineType,
|
|
15
|
-
};
|
|
16
|
-
|
|
17
|
-
const Client = @import("testing/cluster.zig").Client;
|
|
18
|
-
const Cluster = @import("testing/cluster.zig").ClusterType(StateMachineType);
|
|
19
|
-
const Replica = @import("testing/cluster.zig").Replica;
|
|
20
|
-
const StateMachine = Cluster.StateMachine;
|
|
21
|
-
const Failure = @import("testing/cluster.zig").Failure;
|
|
22
|
-
const PartitionMode = @import("testing/packet_simulator.zig").PartitionMode;
|
|
23
|
-
const ReplySequence = @import("testing/reply_sequence.zig").ReplySequence;
|
|
24
|
-
const IdPermutation = @import("testing/id.zig").IdPermutation;
|
|
25
|
-
const Message = @import("message_pool.zig").MessagePool.Message;
|
|
26
|
-
|
|
27
|
-
/// The `log` namespace in this root file is required to implement our custom `log` function.
|
|
28
|
-
const output = std.log.scoped(.state_checker);
|
|
29
|
-
|
|
30
|
-
/// Set this to `false` if you want to see how literally everything works.
|
|
31
|
-
/// This will run much slower but will trace all logic across the cluster.
|
|
32
|
-
const log_state_transitions_only = builtin.mode != .Debug;
|
|
33
|
-
|
|
34
|
-
const log_simulator = std.log.scoped(.simulator);
|
|
35
|
-
|
|
36
|
-
pub const tigerbeetle_config = @import("config.zig").configs.test_min;
|
|
37
|
-
|
|
38
|
-
/// You can fine tune your log levels even further (debug/info/warn/err):
|
|
39
|
-
pub const log_level: std.log.Level = if (log_state_transitions_only) .info else .debug;
|
|
40
|
-
|
|
41
|
-
const cluster_id = 0;
|
|
42
|
-
|
|
43
|
-
pub fn main() !void {
|
|
44
|
-
// This must be initialized at runtime as stderr is not comptime known on e.g. Windows.
|
|
45
|
-
log_buffer.unbuffered_writer = std.io.getStdErr().writer();
|
|
46
|
-
|
|
47
|
-
// TODO Use std.testing.allocator when all deinit() leaks are fixed.
|
|
48
|
-
const allocator = std.heap.page_allocator;
|
|
49
|
-
|
|
50
|
-
var args = std.process.args();
|
|
51
|
-
|
|
52
|
-
// Skip argv[0] which is the name of this executable:
|
|
53
|
-
_ = args_next(&args, allocator);
|
|
54
|
-
|
|
55
|
-
const seed_random = std.crypto.random.int(u64);
|
|
56
|
-
const seed = seed_from_arg: {
|
|
57
|
-
const arg_two = args_next(&args, allocator) orelse break :seed_from_arg seed_random;
|
|
58
|
-
defer allocator.free(arg_two);
|
|
59
|
-
break :seed_from_arg parse_seed(arg_two);
|
|
60
|
-
};
|
|
61
|
-
|
|
62
|
-
if (builtin.mode == .ReleaseFast or builtin.mode == .ReleaseSmall) {
|
|
63
|
-
// We do not support ReleaseFast or ReleaseSmall because they disable assertions.
|
|
64
|
-
@panic("the simulator must be run with -OReleaseSafe");
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
if (seed == seed_random) {
|
|
68
|
-
if (builtin.mode != .ReleaseSafe) {
|
|
69
|
-
// If no seed is provided, than Debug is too slow and ReleaseSafe is much faster.
|
|
70
|
-
@panic("no seed provided: the simulator must be run with -OReleaseSafe");
|
|
71
|
-
}
|
|
72
|
-
if (log_level == .debug) {
|
|
73
|
-
output.warn("no seed provided: full debug logs are enabled, this will be slow", .{});
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
var prng = std.rand.DefaultPrng.init(seed);
|
|
78
|
-
const random = prng.random();
|
|
79
|
-
|
|
80
|
-
const replica_count = 1 + random.uintLessThan(u8, constants.replicas_max);
|
|
81
|
-
const client_count = 1 + random.uintLessThan(u8, constants.clients_max);
|
|
82
|
-
|
|
83
|
-
const cluster_options = Cluster.Options{
|
|
84
|
-
.cluster_id = cluster_id,
|
|
85
|
-
.replica_count = replica_count,
|
|
86
|
-
.client_count = client_count,
|
|
87
|
-
.storage_size_limit = vsr.sector_floor(
|
|
88
|
-
constants.storage_size_max - random.uintLessThan(u64, constants.storage_size_max / 10),
|
|
89
|
-
),
|
|
90
|
-
.seed = random.int(u64),
|
|
91
|
-
.network = .{
|
|
92
|
-
.replica_count = replica_count,
|
|
93
|
-
.client_count = client_count,
|
|
94
|
-
|
|
95
|
-
.seed = random.int(u64),
|
|
96
|
-
.one_way_delay_mean = 3 + random.uintLessThan(u16, 10),
|
|
97
|
-
.one_way_delay_min = random.uintLessThan(u16, 3),
|
|
98
|
-
.packet_loss_probability = random.uintLessThan(u8, 30),
|
|
99
|
-
.path_maximum_capacity = 2 + random.uintLessThan(u8, 19),
|
|
100
|
-
.path_clog_duration_mean = random.uintLessThan(u16, 500),
|
|
101
|
-
.path_clog_probability = random.uintLessThan(u8, 2),
|
|
102
|
-
.packet_replay_probability = random.uintLessThan(u8, 50),
|
|
103
|
-
|
|
104
|
-
.partition_mode = random_partition_mode(random),
|
|
105
|
-
.partition_probability = random.uintLessThan(u8, 3),
|
|
106
|
-
.unpartition_probability = 1 + random.uintLessThan(u8, 10),
|
|
107
|
-
.partition_stability = 100 + random.uintLessThan(u32, 100),
|
|
108
|
-
.unpartition_stability = random.uintLessThan(u32, 20),
|
|
109
|
-
},
|
|
110
|
-
.storage = .{
|
|
111
|
-
.seed = random.int(u64),
|
|
112
|
-
.read_latency_min = random.uintLessThan(u16, 3),
|
|
113
|
-
.read_latency_mean = 3 + random.uintLessThan(u16, 10),
|
|
114
|
-
.write_latency_min = random.uintLessThan(u16, 3),
|
|
115
|
-
.write_latency_mean = 3 + random.uintLessThan(u16, 100),
|
|
116
|
-
.read_fault_probability = random.uintLessThan(u8, 10),
|
|
117
|
-
.write_fault_probability = random.uintLessThan(u8, 10),
|
|
118
|
-
.crash_fault_probability = 80 + random.uintLessThan(u8, 21),
|
|
119
|
-
},
|
|
120
|
-
.storage_fault_atlas = .{
|
|
121
|
-
.faulty_superblock = true,
|
|
122
|
-
.faulty_wal_headers = replica_count > 1,
|
|
123
|
-
.faulty_wal_prepares = replica_count > 1,
|
|
124
|
-
},
|
|
125
|
-
.state_machine = switch (state_machine) {
|
|
126
|
-
.testing => .{},
|
|
127
|
-
.accounting => .{
|
|
128
|
-
.lsm_forest_node_count = 4096,
|
|
129
|
-
.cache_entries_accounts = if (random.boolean()) 0 else 2048,
|
|
130
|
-
.cache_entries_transfers = 0,
|
|
131
|
-
.cache_entries_posted = if (random.boolean()) 0 else 2048,
|
|
132
|
-
},
|
|
133
|
-
},
|
|
134
|
-
};
|
|
135
|
-
|
|
136
|
-
const workload_options = StateMachine.Workload.Options.generate(random, .{
|
|
137
|
-
.client_count = client_count,
|
|
138
|
-
// TODO(DJ) Once Workload no longer needs in_flight_max, make stalled_queue_capacity private.
|
|
139
|
-
// Also maybe make it dynamic (computed from the client_count instead of clients_max).
|
|
140
|
-
.in_flight_max = ReplySequence.stalled_queue_capacity,
|
|
141
|
-
});
|
|
142
|
-
|
|
143
|
-
const simulator_options = Simulator.Options{
|
|
144
|
-
.cluster = cluster_options,
|
|
145
|
-
.workload = workload_options,
|
|
146
|
-
.replica_crash_probability = 0.00002,
|
|
147
|
-
.replica_crash_stability = random.uintLessThan(u32, 1_000),
|
|
148
|
-
.replica_restart_probability = 0.0001,
|
|
149
|
-
.replica_restart_stability = random.uintLessThan(u32, 1_000),
|
|
150
|
-
.requests_max = constants.journal_slot_count * 3,
|
|
151
|
-
.request_probability = 1 + random.uintLessThan(u8, 99),
|
|
152
|
-
.request_idle_on_probability = random.uintLessThan(u8, 20),
|
|
153
|
-
.request_idle_off_probability = 10 + random.uintLessThan(u8, 10),
|
|
154
|
-
};
|
|
155
|
-
|
|
156
|
-
output.info(
|
|
157
|
-
\\
|
|
158
|
-
\\ SEED={}
|
|
159
|
-
\\
|
|
160
|
-
\\ replicas={}
|
|
161
|
-
\\ clients={}
|
|
162
|
-
\\ request_probability={}%
|
|
163
|
-
\\ idle_on_probability={}%
|
|
164
|
-
\\ idle_off_probability={}%
|
|
165
|
-
\\ one_way_delay_mean={} ticks
|
|
166
|
-
\\ one_way_delay_min={} ticks
|
|
167
|
-
\\ packet_loss_probability={}%
|
|
168
|
-
\\ path_maximum_capacity={} messages
|
|
169
|
-
\\ path_clog_duration_mean={} ticks
|
|
170
|
-
\\ path_clog_probability={}%
|
|
171
|
-
\\ packet_replay_probability={}%
|
|
172
|
-
\\ partition_mode={}
|
|
173
|
-
\\ partition_probability={}%
|
|
174
|
-
\\ unpartition_probability={}%
|
|
175
|
-
\\ partition_stability={} ticks
|
|
176
|
-
\\ unpartition_stability={} ticks
|
|
177
|
-
\\ read_latency_min={}
|
|
178
|
-
\\ read_latency_mean={}
|
|
179
|
-
\\ write_latency_min={}
|
|
180
|
-
\\ write_latency_mean={}
|
|
181
|
-
\\ read_fault_probability={}%
|
|
182
|
-
\\ write_fault_probability={}%
|
|
183
|
-
\\ crash_probability={d}%
|
|
184
|
-
\\ crash_stability={} ticks
|
|
185
|
-
\\ restart_probability={d}%
|
|
186
|
-
\\ restart_stability={} ticks
|
|
187
|
-
, .{
|
|
188
|
-
seed,
|
|
189
|
-
cluster_options.replica_count,
|
|
190
|
-
cluster_options.client_count,
|
|
191
|
-
simulator_options.request_probability,
|
|
192
|
-
simulator_options.request_idle_on_probability,
|
|
193
|
-
simulator_options.request_idle_off_probability,
|
|
194
|
-
cluster_options.network.one_way_delay_mean,
|
|
195
|
-
cluster_options.network.one_way_delay_min,
|
|
196
|
-
cluster_options.network.packet_loss_probability,
|
|
197
|
-
cluster_options.network.path_maximum_capacity,
|
|
198
|
-
cluster_options.network.path_clog_duration_mean,
|
|
199
|
-
cluster_options.network.path_clog_probability,
|
|
200
|
-
cluster_options.network.packet_replay_probability,
|
|
201
|
-
cluster_options.network.partition_mode,
|
|
202
|
-
cluster_options.network.partition_probability,
|
|
203
|
-
cluster_options.network.unpartition_probability,
|
|
204
|
-
cluster_options.network.partition_stability,
|
|
205
|
-
cluster_options.network.unpartition_stability,
|
|
206
|
-
cluster_options.storage.read_latency_min,
|
|
207
|
-
cluster_options.storage.read_latency_mean,
|
|
208
|
-
cluster_options.storage.write_latency_min,
|
|
209
|
-
cluster_options.storage.write_latency_mean,
|
|
210
|
-
cluster_options.storage.read_fault_probability,
|
|
211
|
-
cluster_options.storage.write_fault_probability,
|
|
212
|
-
simulator_options.replica_crash_probability * 100,
|
|
213
|
-
simulator_options.replica_crash_stability,
|
|
214
|
-
simulator_options.replica_restart_probability * 100,
|
|
215
|
-
simulator_options.replica_restart_stability,
|
|
216
|
-
});
|
|
217
|
-
|
|
218
|
-
var simulator = try Simulator.init(allocator, random, simulator_options);
|
|
219
|
-
defer simulator.deinit(allocator);
|
|
220
|
-
|
|
221
|
-
const ticks_max = 50_000_000;
|
|
222
|
-
var tick: u64 = 0;
|
|
223
|
-
while (tick < ticks_max) : (tick += 1) {
|
|
224
|
-
simulator.tick();
|
|
225
|
-
if (simulator.done()) break;
|
|
226
|
-
} else {
|
|
227
|
-
output.err("you can reproduce this failure with seed={}", .{seed});
|
|
228
|
-
fatal(.liveness, "unable to complete requests_committed_max before ticks_max", .{});
|
|
229
|
-
}
|
|
230
|
-
assert(simulator.done());
|
|
231
|
-
|
|
232
|
-
output.info("\n PASSED ({} ticks)", .{tick});
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
pub const Simulator = struct {
|
|
236
|
-
pub const Options = struct {
|
|
237
|
-
cluster: Cluster.Options,
|
|
238
|
-
workload: StateMachine.Workload.Options,
|
|
239
|
-
|
|
240
|
-
/// Probability per tick that a crash will occur.
|
|
241
|
-
replica_crash_probability: f64,
|
|
242
|
-
/// Minimum duration of a crash.
|
|
243
|
-
replica_crash_stability: u32,
|
|
244
|
-
/// Probability per tick that a crashed replica will recovery.
|
|
245
|
-
replica_restart_probability: f64,
|
|
246
|
-
/// Minimum time a replica is up until it is crashed again.
|
|
247
|
-
replica_restart_stability: u32,
|
|
248
|
-
|
|
249
|
-
/// The total number of requests to send. Does not count `register` messages.
|
|
250
|
-
requests_max: usize,
|
|
251
|
-
request_probability: u8, // percent
|
|
252
|
-
request_idle_on_probability: u8, // percent
|
|
253
|
-
request_idle_off_probability: u8, // percent
|
|
254
|
-
};
|
|
255
|
-
|
|
256
|
-
random: std.rand.Random,
|
|
257
|
-
options: Options,
|
|
258
|
-
cluster: *Cluster,
|
|
259
|
-
workload: StateMachine.Workload,
|
|
260
|
-
|
|
261
|
-
/// Protect a replica from fast successive crash/restarts.
|
|
262
|
-
replica_stability: []usize,
|
|
263
|
-
reply_sequence: ReplySequence,
|
|
264
|
-
|
|
265
|
-
/// Total number of requests sent, including those that have not been delivered.
|
|
266
|
-
/// Does not include `register` messages.
|
|
267
|
-
requests_sent: usize = 0,
|
|
268
|
-
requests_idle: bool = false,
|
|
269
|
-
|
|
270
|
-
pub fn init(allocator: std.mem.Allocator, random: std.rand.Random, options: Options) !Simulator {
|
|
271
|
-
assert(options.replica_crash_probability < 1.0);
|
|
272
|
-
assert(options.replica_crash_probability >= 0.0);
|
|
273
|
-
assert(options.replica_restart_probability < 1.0);
|
|
274
|
-
assert(options.replica_restart_probability >= 0.0);
|
|
275
|
-
assert(options.requests_max > 0);
|
|
276
|
-
assert(options.request_probability > 0);
|
|
277
|
-
assert(options.request_probability <= 100);
|
|
278
|
-
assert(options.request_idle_on_probability <= 100);
|
|
279
|
-
assert(options.request_idle_off_probability > 0);
|
|
280
|
-
assert(options.request_idle_off_probability <= 100);
|
|
281
|
-
|
|
282
|
-
var cluster = try Cluster.init(allocator, on_cluster_reply, options.cluster);
|
|
283
|
-
errdefer cluster.deinit();
|
|
284
|
-
|
|
285
|
-
var workload = try StateMachine.Workload.init(allocator, random, options.workload);
|
|
286
|
-
errdefer workload.deinit(allocator);
|
|
287
|
-
|
|
288
|
-
var replica_stability = try allocator.alloc(usize, options.cluster.replica_count);
|
|
289
|
-
errdefer allocator.free(replica_stability);
|
|
290
|
-
std.mem.set(usize, replica_stability, 0);
|
|
291
|
-
|
|
292
|
-
var reply_sequence = try ReplySequence.init(allocator);
|
|
293
|
-
errdefer reply_sequence.deinit(allocator);
|
|
294
|
-
|
|
295
|
-
return Simulator{
|
|
296
|
-
.random = random,
|
|
297
|
-
.options = options,
|
|
298
|
-
.cluster = cluster,
|
|
299
|
-
.workload = workload,
|
|
300
|
-
.replica_stability = replica_stability,
|
|
301
|
-
.reply_sequence = reply_sequence,
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
pub fn deinit(simulator: *Simulator, allocator: std.mem.Allocator) void {
|
|
306
|
-
allocator.free(simulator.replica_stability);
|
|
307
|
-
simulator.reply_sequence.deinit(allocator);
|
|
308
|
-
simulator.workload.deinit(allocator);
|
|
309
|
-
simulator.cluster.deinit();
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
pub fn done(simulator: *Simulator) bool {
|
|
313
|
-
assert(simulator.requests_sent <= simulator.options.requests_max);
|
|
314
|
-
|
|
315
|
-
if (!simulator.cluster.state_checker.convergence()) return false;
|
|
316
|
-
if (!simulator.reply_sequence.empty()) return false;
|
|
317
|
-
if (simulator.requests_sent < simulator.options.requests_max) return false;
|
|
318
|
-
|
|
319
|
-
for (simulator.cluster.replica_health) |health| {
|
|
320
|
-
if (health == .down) return false;
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
for (simulator.cluster.clients) |*client| {
|
|
324
|
-
if (client.request_queue.count > 0) return false;
|
|
325
|
-
}
|
|
326
|
-
return true;
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
pub fn tick(simulator: *Simulator) void {
|
|
330
|
-
// TODO(Zig): Remove (see on_cluster_reply()).
|
|
331
|
-
simulator.cluster.context = simulator;
|
|
332
|
-
|
|
333
|
-
simulator.cluster.tick();
|
|
334
|
-
simulator.tick_requests();
|
|
335
|
-
simulator.tick_crash();
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
fn on_cluster_reply(
|
|
339
|
-
cluster: *Cluster,
|
|
340
|
-
reply_client: usize,
|
|
341
|
-
request: *Message,
|
|
342
|
-
reply: *Message,
|
|
343
|
-
) void {
|
|
344
|
-
// TODO(Zig) Use @returnAddress to initialzie the cluster, then this can just use @fieldParentPtr().
|
|
345
|
-
const simulator = @ptrCast(*Simulator, @alignCast(@alignOf(Simulator), cluster.context.?));
|
|
346
|
-
simulator.reply_sequence.insert(reply_client, request, reply);
|
|
347
|
-
|
|
348
|
-
while (simulator.reply_sequence.peek()) |commit| {
|
|
349
|
-
defer simulator.reply_sequence.next();
|
|
350
|
-
|
|
351
|
-
const commit_client = simulator.cluster.clients[commit.client_index];
|
|
352
|
-
assert(commit.reply.references == 1);
|
|
353
|
-
assert(commit.reply.header.command == .reply);
|
|
354
|
-
assert(commit.reply.header.client == commit_client.id);
|
|
355
|
-
assert(commit.reply.header.request == commit.request.header.request);
|
|
356
|
-
assert(commit.reply.header.operation == commit.request.header.operation);
|
|
357
|
-
|
|
358
|
-
assert(commit.request.references == 1);
|
|
359
|
-
assert(commit.request.header.command == .request);
|
|
360
|
-
assert(commit.request.header.client == commit_client.id);
|
|
361
|
-
|
|
362
|
-
log_simulator.debug("consume_stalled_replies: op={} operation={} client={} request={}", .{
|
|
363
|
-
commit.reply.header.op,
|
|
364
|
-
commit.reply.header.operation,
|
|
365
|
-
commit.request.header.client,
|
|
366
|
-
commit.request.header.request,
|
|
367
|
-
});
|
|
368
|
-
|
|
369
|
-
if (commit.request.header.operation != .register) {
|
|
370
|
-
simulator.workload.on_reply(
|
|
371
|
-
commit.client_index,
|
|
372
|
-
commit.reply.header.operation,
|
|
373
|
-
commit.reply.header.timestamp,
|
|
374
|
-
commit.request.body(),
|
|
375
|
-
commit.reply.body(),
|
|
376
|
-
);
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
/// Maybe send a request from one of the cluster's clients.
|
|
382
|
-
fn tick_requests(simulator: *Simulator) void {
|
|
383
|
-
if (simulator.requests_idle) {
|
|
384
|
-
if (chance(simulator.random, simulator.options.request_idle_off_probability)) {
|
|
385
|
-
simulator.requests_idle = false;
|
|
386
|
-
}
|
|
387
|
-
} else {
|
|
388
|
-
if (chance(simulator.random, simulator.options.request_idle_on_probability)) {
|
|
389
|
-
simulator.requests_idle = true;
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
if (simulator.requests_idle) return;
|
|
394
|
-
if (simulator.requests_sent == simulator.options.requests_max) return;
|
|
395
|
-
if (!chance(simulator.random, simulator.options.request_probability)) return;
|
|
396
|
-
|
|
397
|
-
const client_index =
|
|
398
|
-
simulator.random.uintLessThan(usize, simulator.options.cluster.client_count);
|
|
399
|
-
var client = &simulator.cluster.clients[client_index];
|
|
400
|
-
|
|
401
|
-
// Make sure that there is capacity in the client's request queue so that we never trigger
|
|
402
|
-
// error.TooManyOutstandingRequests.
|
|
403
|
-
if (client.request_queue.count + 1 > constants.client_request_queue_max) return;
|
|
404
|
-
|
|
405
|
-
// Messages aren't added to the ReplySequence until a reply arrives.
|
|
406
|
-
// Before sending a new message, make sure there will definitely be room for it.
|
|
407
|
-
var reserved: usize = 0;
|
|
408
|
-
for (simulator.cluster.clients) |*c| {
|
|
409
|
-
// Count the number of clients that are still waiting for a `register` to complete,
|
|
410
|
-
// since they may start one at any time.
|
|
411
|
-
reserved += @boolToInt(c.session == 0);
|
|
412
|
-
// Count the number of requests queued.
|
|
413
|
-
reserved += c.request_queue.count;
|
|
414
|
-
}
|
|
415
|
-
// +1 for the potential request — is there room in the sequencer's queue?
|
|
416
|
-
if (reserved + 1 > simulator.reply_sequence.free()) return;
|
|
417
|
-
|
|
418
|
-
var request_message = client.get_message();
|
|
419
|
-
defer client.unref(request_message);
|
|
420
|
-
|
|
421
|
-
const request_metadata = simulator.workload.build_request(
|
|
422
|
-
client_index,
|
|
423
|
-
@alignCast(
|
|
424
|
-
@alignOf(vsr.Header),
|
|
425
|
-
request_message.buffer[@sizeOf(vsr.Header)..constants.message_size_max],
|
|
426
|
-
),
|
|
427
|
-
);
|
|
428
|
-
assert(request_metadata.size <= constants.message_size_max - @sizeOf(vsr.Header));
|
|
429
|
-
|
|
430
|
-
simulator.cluster.request(
|
|
431
|
-
client_index,
|
|
432
|
-
request_metadata.operation,
|
|
433
|
-
request_message,
|
|
434
|
-
request_metadata.size,
|
|
435
|
-
);
|
|
436
|
-
// Since we already checked the client's request queue for free space, `client.request()`
|
|
437
|
-
// should always queue the request.
|
|
438
|
-
assert(request_message == client.request_queue.tail_ptr().?.message);
|
|
439
|
-
assert(request_message.header.size == @sizeOf(vsr.Header) + request_metadata.size);
|
|
440
|
-
assert(request_message.header.operation.cast(StateMachine) == request_metadata.operation);
|
|
441
|
-
|
|
442
|
-
simulator.requests_sent += 1;
|
|
443
|
-
assert(simulator.requests_sent <= simulator.options.requests_max);
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
fn tick_crash(simulator: *Simulator) void {
|
|
447
|
-
const recoverable_count_min =
|
|
448
|
-
vsr.quorums(simulator.options.cluster.replica_count).view_change;
|
|
449
|
-
var recoverable_count: usize = 0;
|
|
450
|
-
for (simulator.cluster.replicas) |*replica| {
|
|
451
|
-
recoverable_count += @boolToInt(replica.status != .recovering_head);
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
for (simulator.cluster.replicas) |*replica| {
|
|
455
|
-
simulator.replica_stability[replica.replica] -|= 1;
|
|
456
|
-
const stability = simulator.replica_stability[replica.replica];
|
|
457
|
-
if (stability > 0) continue;
|
|
458
|
-
|
|
459
|
-
switch (simulator.cluster.replica_health[replica.replica]) {
|
|
460
|
-
.up => {
|
|
461
|
-
const storage = &simulator.cluster.storages[replica.replica];
|
|
462
|
-
const replica_writes = storage.writes.count();
|
|
463
|
-
const crash_probability = simulator.options.replica_crash_probability *
|
|
464
|
-
@as(f64, if (replica_writes == 0) 1.0 else 10.0);
|
|
465
|
-
if (!chance_f64(simulator.random, crash_probability)) continue;
|
|
466
|
-
|
|
467
|
-
const fault = recoverable_count > recoverable_count_min;
|
|
468
|
-
replica.superblock.storage.faulty = fault;
|
|
469
|
-
|
|
470
|
-
if (!fault) {
|
|
471
|
-
// The journal writes redundant headers of faulty ops as zeroes to ensure
|
|
472
|
-
// that they remain faulty after a crash/recover. Since that fault cannot
|
|
473
|
-
// be disabled by `storage.faulty`, we must manually repair it here to
|
|
474
|
-
// ensure a cluster cannot become stuck in status=recovering_head.
|
|
475
|
-
// See recover_slots() for more detail.
|
|
476
|
-
const offset = vsr.Zone.wal_headers.offset(0);
|
|
477
|
-
const size = vsr.Zone.wal_headers.size().?;
|
|
478
|
-
const headers_bytes = storage.memory[offset..][0..size];
|
|
479
|
-
const headers = mem.bytesAsSlice(vsr.Header, headers_bytes);
|
|
480
|
-
for (headers) |*h, slot| {
|
|
481
|
-
if (h.checksum == 0) h.* = storage.wal_prepares()[slot].header;
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
log_simulator.debug("{}: crash replica (faults={})", .{ replica.replica, fault });
|
|
486
|
-
simulator.cluster.crash_replica(replica.replica) catch unreachable;
|
|
487
|
-
replica.superblock.storage.faulty = true;
|
|
488
|
-
|
|
489
|
-
recoverable_count -= @boolToInt(replica.status == .recovering_head);
|
|
490
|
-
assert(replica.status != .recovering_head or fault);
|
|
491
|
-
|
|
492
|
-
simulator.replica_stability[replica.replica] =
|
|
493
|
-
simulator.options.replica_crash_stability;
|
|
494
|
-
},
|
|
495
|
-
.down => {
|
|
496
|
-
if (chance_f64(simulator.random, simulator.options.replica_restart_probability)) {
|
|
497
|
-
simulator.cluster.restart_replica(replica.replica);
|
|
498
|
-
log_simulator.debug("{}: restart replica", .{replica.replica});
|
|
499
|
-
simulator.replica_stability[replica.replica] =
|
|
500
|
-
simulator.options.replica_restart_stability;
|
|
501
|
-
}
|
|
502
|
-
},
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
};
|
|
507
|
-
|
|
508
|
-
/// Print an error message and then exit with an exit code.
|
|
509
|
-
fn fatal(failure: Failure, comptime fmt_string: []const u8, args: anytype) noreturn {
|
|
510
|
-
output.err(fmt_string, args);
|
|
511
|
-
std.os.exit(@enumToInt(failure));
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
/// Returns true, `p` percent of the time, else false.
|
|
515
|
-
fn chance(random: std.rand.Random, p: u8) bool {
|
|
516
|
-
assert(p <= 100);
|
|
517
|
-
return random.uintLessThanBiased(u8, 100) < p;
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
/// Returns true, `p` percent of the time, else false.
|
|
521
|
-
fn chance_f64(random: std.rand.Random, p: f64) bool {
|
|
522
|
-
assert(p <= 100.0);
|
|
523
|
-
return random.float(f64) * 100.0 < p;
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
/// Returns the next argument for the simulator or null (if none available)
|
|
527
|
-
fn args_next(args: *std.process.ArgIterator, allocator: std.mem.Allocator) ?[:0]const u8 {
|
|
528
|
-
const err_or_bytes = args.next(allocator) orelse return null;
|
|
529
|
-
return err_or_bytes catch @panic("Unable to extract next value from args");
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
/// Returns a random partitioning mode.
|
|
533
|
-
fn random_partition_mode(random: std.rand.Random) PartitionMode {
|
|
534
|
-
const typeInfo = @typeInfo(PartitionMode).Enum;
|
|
535
|
-
var enumAsInt = random.uintAtMost(typeInfo.tag_type, typeInfo.fields.len - 1);
|
|
536
|
-
return @intToEnum(PartitionMode, enumAsInt);
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
pub fn parse_seed(bytes: []const u8) u64 {
|
|
540
|
-
return std.fmt.parseUnsigned(u64, bytes, 10) catch |err| switch (err) {
|
|
541
|
-
error.Overflow => @panic("seed exceeds a 64-bit unsigned integer"),
|
|
542
|
-
error.InvalidCharacter => @panic("seed contains an invalid character"),
|
|
543
|
-
};
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
var log_buffer: std.io.BufferedWriter(4096, std.fs.File.Writer) = .{
|
|
547
|
-
// This is initialized in main(), as std.io.getStdErr() is not comptime known on e.g. Windows.
|
|
548
|
-
.unbuffered_writer = undefined,
|
|
549
|
-
};
|
|
550
|
-
|
|
551
|
-
pub fn log(
|
|
552
|
-
comptime level: std.log.Level,
|
|
553
|
-
comptime scope: @TypeOf(.EnumLiteral),
|
|
554
|
-
comptime format: []const u8,
|
|
555
|
-
args: anytype,
|
|
556
|
-
) void {
|
|
557
|
-
if (log_state_transitions_only and scope != .state_checker) return;
|
|
558
|
-
|
|
559
|
-
const prefix_default = "[" ++ @tagName(level) ++ "] " ++ "(" ++ @tagName(scope) ++ "): ";
|
|
560
|
-
const prefix = if (log_state_transitions_only) "" else prefix_default;
|
|
561
|
-
|
|
562
|
-
// Print the message to stderr using a buffer to avoid many small write() syscalls when
|
|
563
|
-
// providing many format arguments. Silently ignore failure.
|
|
564
|
-
log_buffer.writer().print(prefix ++ format ++ "\n", args) catch {};
|
|
565
|
-
|
|
566
|
-
// Flush the buffer before returning to ensure, for example, that a log message
|
|
567
|
-
// immediately before a failing assertion is fully printed.
|
|
568
|
-
log_buffer.flush() catch {};
|
|
569
|
-
}
|