tigerbeetle-node 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +302 -101
  2. package/dist/index.d.ts +70 -72
  3. package/dist/index.js +70 -72
  4. package/dist/index.js.map +1 -1
  5. package/package.json +9 -8
  6. package/scripts/download_node_headers.sh +14 -7
  7. package/src/index.ts +6 -10
  8. package/src/node.zig +6 -3
  9. package/src/tigerbeetle/scripts/benchmark.sh +4 -4
  10. package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
  11. package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
  12. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
  13. package/src/tigerbeetle/scripts/install.sh +19 -4
  14. package/src/tigerbeetle/scripts/install_zig.bat +5 -1
  15. package/src/tigerbeetle/scripts/install_zig.sh +24 -14
  16. package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
  17. package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
  18. package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
  19. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
  20. package/src/tigerbeetle/scripts/validate_docs.sh +17 -0
  21. package/src/tigerbeetle/src/benchmark.zig +29 -13
  22. package/src/tigerbeetle/src/c/tb_client/context.zig +248 -47
  23. package/src/tigerbeetle/src/c/tb_client/echo_client.zig +108 -0
  24. package/src/tigerbeetle/src/c/tb_client/packet.zig +2 -2
  25. package/src/tigerbeetle/src/c/tb_client/signal.zig +2 -4
  26. package/src/tigerbeetle/src/c/tb_client/thread.zig +17 -257
  27. package/src/tigerbeetle/src/c/tb_client.h +118 -84
  28. package/src/tigerbeetle/src/c/tb_client.zig +88 -23
  29. package/src/tigerbeetle/src/c/tb_client_header_test.zig +135 -0
  30. package/src/tigerbeetle/src/c/test.zig +371 -1
  31. package/src/tigerbeetle/src/cli.zig +37 -7
  32. package/src/tigerbeetle/src/config.zig +58 -17
  33. package/src/tigerbeetle/src/demo.zig +5 -2
  34. package/src/tigerbeetle/src/demo_01_create_accounts.zig +1 -1
  35. package/src/tigerbeetle/src/demo_03_create_transfers.zig +13 -0
  36. package/src/tigerbeetle/src/ewah.zig +11 -33
  37. package/src/tigerbeetle/src/ewah_benchmark.zig +8 -9
  38. package/src/tigerbeetle/src/io/linux.zig +1 -1
  39. package/src/tigerbeetle/src/lsm/README.md +308 -0
  40. package/src/tigerbeetle/src/lsm/binary_search.zig +137 -10
  41. package/src/tigerbeetle/src/lsm/bloom_filter.zig +43 -0
  42. package/src/tigerbeetle/src/lsm/compaction.zig +376 -397
  43. package/src/tigerbeetle/src/lsm/composite_key.zig +2 -0
  44. package/src/tigerbeetle/src/lsm/eytzinger.zig +1 -1
  45. package/src/tigerbeetle/src/{eytzinger_benchmark.zig → lsm/eytzinger_benchmark.zig} +34 -21
  46. package/src/tigerbeetle/src/lsm/forest.zig +21 -447
  47. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +414 -0
  48. package/src/tigerbeetle/src/lsm/grid.zig +170 -76
  49. package/src/tigerbeetle/src/lsm/groove.zig +197 -133
  50. package/src/tigerbeetle/src/lsm/k_way_merge.zig +40 -18
  51. package/src/tigerbeetle/src/lsm/level_iterator.zig +28 -9
  52. package/src/tigerbeetle/src/lsm/manifest.zig +93 -180
  53. package/src/tigerbeetle/src/lsm/manifest_level.zig +161 -454
  54. package/src/tigerbeetle/src/lsm/manifest_log.zig +243 -356
  55. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +665 -0
  56. package/src/tigerbeetle/src/lsm/node_pool.zig +4 -0
  57. package/src/tigerbeetle/src/lsm/posted_groove.zig +65 -76
  58. package/src/tigerbeetle/src/lsm/segmented_array.zig +580 -251
  59. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
  60. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
  61. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +62 -12
  62. package/src/tigerbeetle/src/lsm/table.zig +115 -68
  63. package/src/tigerbeetle/src/lsm/table_immutable.zig +30 -23
  64. package/src/tigerbeetle/src/lsm/table_iterator.zig +27 -17
  65. package/src/tigerbeetle/src/lsm/table_mutable.zig +63 -12
  66. package/src/tigerbeetle/src/lsm/test.zig +61 -56
  67. package/src/tigerbeetle/src/lsm/tree.zig +450 -407
  68. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +461 -0
  69. package/src/tigerbeetle/src/main.zig +83 -8
  70. package/src/tigerbeetle/src/message_bus.zig +20 -9
  71. package/src/tigerbeetle/src/message_pool.zig +22 -19
  72. package/src/tigerbeetle/src/ring_buffer.zig +7 -3
  73. package/src/tigerbeetle/src/simulator.zig +179 -119
  74. package/src/tigerbeetle/src/state_machine.zig +381 -246
  75. package/src/tigerbeetle/src/static_allocator.zig +65 -0
  76. package/src/tigerbeetle/src/storage.zig +3 -7
  77. package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
  78. package/src/tigerbeetle/src/test/accounting/workload.zig +823 -0
  79. package/src/tigerbeetle/src/test/cluster.zig +33 -81
  80. package/src/tigerbeetle/src/test/conductor.zig +366 -0
  81. package/src/tigerbeetle/src/test/fuzz.zig +121 -0
  82. package/src/tigerbeetle/src/test/id.zig +89 -0
  83. package/src/tigerbeetle/src/test/network.zig +45 -19
  84. package/src/tigerbeetle/src/test/packet_simulator.zig +40 -29
  85. package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
  86. package/src/tigerbeetle/src/test/state_checker.zig +91 -69
  87. package/src/tigerbeetle/src/test/state_machine.zig +11 -35
  88. package/src/tigerbeetle/src/test/storage.zig +470 -106
  89. package/src/tigerbeetle/src/test/storage_checker.zig +204 -0
  90. package/src/tigerbeetle/src/tigerbeetle.zig +15 -16
  91. package/src/tigerbeetle/src/unit_tests.zig +13 -1
  92. package/src/tigerbeetle/src/util.zig +97 -11
  93. package/src/tigerbeetle/src/vopr.zig +495 -0
  94. package/src/tigerbeetle/src/vsr/client.zig +21 -3
  95. package/src/tigerbeetle/src/vsr/journal.zig +293 -212
  96. package/src/tigerbeetle/src/vsr/replica.zig +1086 -515
  97. package/src/tigerbeetle/src/vsr/superblock.zig +382 -637
  98. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +14 -16
  99. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +416 -153
  100. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +332 -0
  101. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +349 -0
  102. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +62 -12
  103. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +394 -0
  104. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +312 -0
  105. package/src/tigerbeetle/src/vsr.zig +94 -60
  106. package/src/tigerbeetle/scripts/vopr.bat +0 -48
  107. package/src/tigerbeetle/scripts/vopr.sh +0 -33
  108. package/src/tigerbeetle/src/benchmark_array_search.zig +0 -317
  109. package/src/tigerbeetle/src/benchmarks/perf.zig +0 -299
@@ -23,7 +23,7 @@ pub const messages_max_replica = messages_max: {
23
23
  var sum: usize = 0;
24
24
 
25
25
  sum += config.io_depth_read + config.io_depth_write; // Journal I/O
26
- sum += config.clients_max; // Replica.client_table
26
+ sum += config.clients_max; // SuperBlock.client_table
27
27
  sum += 1; // Replica.loopback_queue
28
28
  sum += config.pipeline_max; // Replica.pipeline
29
29
  sum += 1; // Replica.commit_prepare
@@ -37,7 +37,6 @@ pub const messages_max_replica = messages_max: {
37
37
  // Handle Replica.commit_op's reply:
38
38
  // (This is separate from the burst +1 because they may occur concurrently).
39
39
  sum += 1;
40
- sum += 20; // TODO Our network simulator allows up to 20 messages for path_capacity_max.
41
40
 
42
41
  break :messages_max sum;
43
42
  };
@@ -51,7 +50,6 @@ pub const messages_max_client = messages_max: {
51
50
  sum += config.client_request_queue_max; // Client.request_queue
52
51
  // Handle bursts (e.g. Connection.parse_message, or sending a ping when the send queue is full).
53
52
  sum += 1;
54
- sum += 20; // TODO Our network simulator allows up to 20 messages for path_capacity_max.
55
53
 
56
54
  break :messages_max sum;
57
55
  };
@@ -68,9 +66,7 @@ pub const MessagePool = struct {
68
66
  pub const Message = struct {
69
67
  // TODO: replace this with a header() function to save memory
70
68
  header: *Header,
71
- /// This buffer is aligned to config.sector_size and casting to that alignment in order
72
- /// to perform Direct I/O is safe.
73
- buffer: []u8,
69
+ buffer: []align(config.sector_size) u8,
74
70
  references: u32 = 0,
75
71
  next: ?*Message,
76
72
 
@@ -80,25 +76,27 @@ pub const MessagePool = struct {
80
76
  return message;
81
77
  }
82
78
 
83
- pub fn body(message: *Message) []align(@alignOf(Header)) u8 {
84
- return @alignCast(
85
- @alignOf(Header),
86
- message.buffer[@sizeOf(Header)..message.header.size],
87
- );
79
+ pub fn body(message: *const Message) []align(@sizeOf(Header)) u8 {
80
+ return message.buffer[@sizeOf(Header)..message.header.size];
88
81
  }
89
82
  };
90
83
 
91
- /// List of currently unused messages of message_size_max_padded
84
+ /// List of currently unused messages.
92
85
  free_list: ?*Message,
93
86
 
87
+ messages_max: usize,
88
+
94
89
  pub fn init(allocator: mem.Allocator, process_type: vsr.ProcessType) error{OutOfMemory}!MessagePool {
95
- const messages_max: usize = switch (process_type) {
90
+ return MessagePool.init_capacity(allocator, switch (process_type) {
96
91
  .replica => messages_max_replica,
97
92
  .client => messages_max_client,
98
- };
93
+ });
94
+ }
99
95
 
100
- var ret: MessagePool = .{
96
+ pub fn init_capacity(allocator: mem.Allocator, messages_max: usize) error{OutOfMemory}!MessagePool {
97
+ var pool: MessagePool = .{
101
98
  .free_list = null,
99
+ .messages_max = messages_max,
102
100
  };
103
101
  {
104
102
  var i: usize = 0;
@@ -113,22 +111,27 @@ pub const MessagePool = struct {
113
111
  message.* = .{
114
112
  .header = mem.bytesAsValue(Header, buffer[0..@sizeOf(Header)]),
115
113
  .buffer = buffer,
116
- .next = ret.free_list,
114
+ .next = pool.free_list,
117
115
  };
118
- ret.free_list = message;
116
+ pool.free_list = message;
119
117
  }
120
118
  }
121
119
 
122
- return ret;
120
+ return pool;
123
121
  }
124
-
122
+
125
123
  /// Frees all messages that were unused or returned to the pool via unref().
126
124
  pub fn deinit(pool: *MessagePool, allocator: mem.Allocator) void {
125
+ var free_count: usize = 0;
127
126
  while (pool.free_list) |message| {
128
127
  pool.free_list = message.next;
129
128
  allocator.free(message.buffer);
130
129
  allocator.destroy(message);
130
+ free_count += 1;
131
131
  }
132
+ // If the MessagePool is being deinitialized, all messages should have already been
133
+ // released to the pool.
134
+ assert(free_count == pool.messages_max);
132
135
  }
133
136
 
134
137
  /// Get an unused message with a buffer of config.message_size_max.
@@ -3,15 +3,19 @@ const assert = std.debug.assert;
3
3
  const math = std.math;
4
4
  const mem = std.mem;
5
5
 
6
+ const util = @import("util.zig");
7
+
6
8
  /// A First In, First Out ring buffer holding at most `count_max` elements.
7
9
  pub fn RingBuffer(
8
10
  comptime T: type,
9
- comptime count_max: usize,
11
+ comptime count_max_: usize,
10
12
  comptime buffer_type: enum { array, pointer },
11
13
  ) type {
12
14
  return struct {
13
15
  const Self = @This();
14
16
 
17
+ pub const count_max = count_max_;
18
+
15
19
  buffer: switch (buffer_type) {
16
20
  .array => [count_max]T,
17
21
  .pointer => *[count_max]T,
@@ -145,8 +149,8 @@ pub fn RingBuffer(
145
149
  const pre_wrap_count = math.min(items.len, self.buffer.len - pre_wrap_start);
146
150
  const post_wrap_count = items.len - pre_wrap_count;
147
151
 
148
- mem.copy(T, self.buffer[pre_wrap_start..], items[0..pre_wrap_count]);
149
- mem.copy(T, self.buffer[0..post_wrap_count], items[pre_wrap_count..]);
152
+ util.copy_disjoint(.inexact, T, self.buffer[pre_wrap_start..], items[0..pre_wrap_count]);
153
+ util.copy_disjoint(.exact, T, self.buffer[0..post_wrap_count], items[pre_wrap_count..]);
150
154
 
151
155
  self.count += items.len;
152
156
  }
@@ -3,16 +3,24 @@ const builtin = @import("builtin");
3
3
  const assert = std.debug.assert;
4
4
  const mem = std.mem;
5
5
 
6
+ const tb = @import("tigerbeetle.zig");
6
7
  const config = @import("config.zig");
8
+ const vsr = @import("vsr.zig");
9
+ const Header = vsr.Header;
7
10
 
8
11
  const Client = @import("test/cluster.zig").Client;
9
12
  const Cluster = @import("test/cluster.zig").Cluster;
10
13
  const ClusterOptions = @import("test/cluster.zig").ClusterOptions;
11
- const Header = @import("vsr.zig").Header;
12
14
  const Replica = @import("test/cluster.zig").Replica;
13
- const StateChecker = @import("test/state_checker.zig").StateChecker;
14
15
  const StateMachine = @import("test/cluster.zig").StateMachine;
16
+ const StateChecker = @import("test/state_checker.zig").StateChecker;
17
+ const StorageChecker = @import("test/storage_checker.zig").StorageChecker;
15
18
  const PartitionMode = @import("test/packet_simulator.zig").PartitionMode;
19
+ const MessageBus = @import("test/message_bus.zig").MessageBus;
20
+ const auditor = @import("test/accounting/auditor.zig");
21
+ const Workload = @import("test/accounting/workload.zig").WorkloadType(StateMachine);
22
+ const Conductor = @import("test/conductor.zig").ConductorType(Client, MessageBus, StateMachine, Workload);
23
+ const IdPermutation = @import("test/id.zig").IdPermutation;
16
24
 
17
25
  /// The `log` namespace in this root file is required to implement our custom `log` function.
18
26
  const output = std.log.scoped(.state_checker);
@@ -21,21 +29,25 @@ const output = std.log.scoped(.state_checker);
21
29
  /// This will run much slower but will trace all logic across the cluster.
22
30
  const log_state_transitions_only = builtin.mode != .Debug;
23
31
 
24
- const log_health = std.log.scoped(.health);
25
- const log_faults = std.log.scoped(.faults);
32
+ const log_simulator = std.log.scoped(.simulator);
26
33
 
27
34
  /// You can fine tune your log levels even further (debug/info/notice/warn/err/crit/alert/emerg):
28
35
  pub const log_level: std.log.Level = if (log_state_transitions_only) .info else .debug;
29
36
 
30
37
  /// Modifies compile-time constants on "config.zig".
31
38
  pub const deployment_environment = .simulation;
32
- comptime {
33
- assert(config.deployment_environment == .simulation);
34
- }
39
+
40
+ const cluster_id = 0;
35
41
 
36
42
  var cluster: *Cluster = undefined;
43
+ var state_checker: *StateChecker = undefined;
44
+ var storage_checker: *StorageChecker = undefined;
37
45
 
38
46
  pub fn main() !void {
47
+ comptime {
48
+ assert(config.deployment_environment == .simulation);
49
+ }
50
+
39
51
  // This must be initialized at runtime as stderr is not comptime known on e.g. Windows.
40
52
  log_buffer.unbuffered_writer = std.io.getStdErr().writer();
41
53
 
@@ -76,17 +88,25 @@ pub fn main() !void {
76
88
  const client_count = 1 + random.uintLessThan(u8, config.clients_max);
77
89
  const node_count = replica_count + client_count;
78
90
 
79
- const ticks_max = 100_000_000;
91
+ const ticks_max = 50_000_000;
80
92
  const request_probability = 1 + random.uintLessThan(u8, 99);
81
93
  const idle_on_probability = random.uintLessThan(u8, 20);
82
94
  const idle_off_probability = 10 + random.uintLessThan(u8, 10);
83
95
 
96
+ // TODO: When block recovery and state transfer are implemented, remove this flag to allow
97
+ // crashes to coexist with WAL wraps.
98
+ const requests_committed_max: usize = config.journal_slot_count * 3;
99
+
84
100
  const cluster_options: ClusterOptions = .{
85
- .cluster = 0,
101
+ .cluster = cluster_id,
86
102
  .replica_count = replica_count,
87
103
  .client_count = client_count,
104
+ // TODO Compute an upper-bound for this based on requests_committed_max.
105
+ .grid_size_max = 1024 * 1024 * 256,
88
106
  .seed = random.int(u64),
89
- .on_change_state = on_change_replica,
107
+ .on_change_state = on_replica_change_state,
108
+ .on_compact = on_replica_compact,
109
+ .on_checkpoint = on_replica_checkpoint,
90
110
  .network_options = .{
91
111
  .packet_simulator_options = .{
92
112
  .replica_count = replica_count,
@@ -117,21 +137,75 @@ pub fn main() !void {
117
137
  .write_latency_mean = 3 + random.uintLessThan(u16, 100),
118
138
  .read_fault_probability = random.uintLessThan(u8, 10),
119
139
  .write_fault_probability = random.uintLessThan(u8, 10),
140
+ // TODO Allow WAL faults on crash when replica_count=1 when redundant-header-repair
141
+ // is implemented after recovering with decision=fix. Otherwise we can end up with
142
+ // multiple crashes faulting first a redundant headers, then a prepare, upgrading
143
+ // a decision=fix to decision=vsr.
144
+ .crash_fault_probability = if (replica_count == 1) 0 else 80 + random.uintLessThan(u8, 21),
145
+ .faulty_superblock = true,
120
146
  },
121
147
  .health_options = .{
122
- .crash_probability = 0.0001,
148
+ .crash_probability = 0.000001,
123
149
  .crash_stability = random.uintLessThan(u32, 1_000),
124
- .restart_probability = 0.01,
150
+ .restart_probability = 0.0001,
125
151
  .restart_stability = random.uintLessThan(u32, 1_000),
126
152
  },
127
153
  .state_machine_options = .{
128
- .seed = random.int(u64),
129
- .prefetch_mean = 5 + random.uintLessThan(u64, 10),
130
- .compact_mean = 5 + random.uintLessThan(u64, 10),
131
- .checkpoint_mean = 5 + random.uintLessThan(u64, 10),
154
+ // TODO What should these fields be set to? Can they be randomized (and with what constraints)?
155
+ .lsm_forest_node_count = 4096,
156
+ .cache_entries_accounts = 2048,
157
+ .cache_entries_transfers = 2048,
158
+ .cache_entries_posted = 2048,
132
159
  },
133
160
  };
134
161
 
162
+ const workload_options: Workload.Options = .{
163
+ .auditor_options = .{
164
+ .accounts_max = 2 + random.uintLessThan(usize, 128),
165
+ .account_id_permutation = random_id_permutation(random),
166
+ .client_count = client_count,
167
+ .transfers_pending_max = 256,
168
+ .in_flight_max = Conductor.stalled_queue_capacity,
169
+ },
170
+ .transfer_id_permutation = random_id_permutation(random),
171
+ .operations = .{
172
+ .create_accounts = 1 + random.uintLessThan(usize, 10),
173
+ .create_transfers = 1 + random.uintLessThan(usize, 100),
174
+ .lookup_accounts = 1 + random.uintLessThan(usize, 20),
175
+ .lookup_transfers = 1 + random.uintLessThan(usize, 20),
176
+ },
177
+ .create_account_invalid_probability = 1,
178
+ .create_transfer_invalid_probability = 1,
179
+ .create_transfer_limit_probability = random.uintLessThan(u8, 101),
180
+ .create_transfer_pending_probability = 1 + random.uintLessThan(u8, 100),
181
+ .create_transfer_post_probability = 1 + random.uintLessThan(u8, 50),
182
+ .create_transfer_void_probability = 1 + random.uintLessThan(u8, 50),
183
+ .lookup_account_invalid_probability = 1,
184
+ .lookup_transfer = .{
185
+ .delivered = 1 + random.uintLessThan(usize, 10),
186
+ .sending = 1 + random.uintLessThan(usize, 10),
187
+ },
188
+ .lookup_transfer_span_mean = 10 + random.uintLessThan(usize, 1000),
189
+ .account_limit_probability = random.uintLessThan(u8, 80),
190
+ .linked_valid_probability = random.uintLessThan(u8, 101),
191
+ // 100% chance because this only applies to consecutive invalid transfers, which are rare.
192
+ .linked_invalid_probability = 100,
193
+ // TODO(Timeouts): When timeouts are implemented in the StateMachine, change this to the
194
+ // (commented out) value so that timeouts can actually trigger.
195
+ .pending_timeout_mean = std.math.maxInt(u64) / 2,
196
+ // .pending_timeout_mean = 1 + random.uintLessThan(usize, 1_000_000_000 / 4),
197
+ .accounts_batch_size_min = 0,
198
+ .accounts_batch_size_span = 1 + random.uintLessThan(
199
+ usize,
200
+ StateMachine.constants.batch_max.create_accounts,
201
+ ),
202
+ .transfers_batch_size_min = 0,
203
+ .transfers_batch_size_span = 1 + random.uintLessThan(
204
+ usize,
205
+ StateMachine.constants.batch_max.create_transfers,
206
+ ),
207
+ };
208
+
135
209
  output.info(
136
210
  \\
137
211
  \\ SEED={}
@@ -163,10 +237,6 @@ pub fn main() !void {
163
237
  \\ crash_stability={} ticks
164
238
  \\ restart_probability={d}%
165
239
  \\ restart_stability={} ticks
166
- \\ prefetch_mean={} ticks
167
- \\ compact_mean={} ticks
168
- \\ checkpoint_mean={} ticks
169
- \\
170
240
  , .{
171
241
  seed,
172
242
  replica_count,
@@ -196,19 +266,46 @@ pub fn main() !void {
196
266
  cluster_options.health_options.crash_stability,
197
267
  cluster_options.health_options.restart_probability * 100,
198
268
  cluster_options.health_options.restart_stability,
199
- cluster_options.state_machine_options.prefetch_mean,
200
- cluster_options.state_machine_options.compact_mean,
201
- cluster_options.state_machine_options.checkpoint_mean,
202
269
  });
203
270
 
204
271
  cluster = try Cluster.create(allocator, random, cluster_options);
205
272
  defer cluster.destroy();
206
273
 
207
- cluster.state_checker = try StateChecker.init(allocator, cluster);
208
- defer cluster.state_checker.deinit();
274
+ var workload = try Workload.init(allocator, random, workload_options);
275
+ defer workload.deinit(allocator);
209
276
 
210
- var requests_sent: u64 = 0;
211
- var idle = false;
277
+ var conductor = try Conductor.init(allocator, random, &workload, .{
278
+ .cluster = cluster_id,
279
+ .replica_count = replica_count,
280
+ .client_count = client_count,
281
+ .message_bus_options = .{ .network = &cluster.network },
282
+ .requests_max = requests_committed_max,
283
+ .request_probability = request_probability,
284
+ .idle_on_probability = idle_on_probability,
285
+ .idle_off_probability = idle_off_probability,
286
+ });
287
+ defer conductor.deinit(allocator);
288
+
289
+ for (conductor.clients) |*client| {
290
+ cluster.network.link(client.message_bus.process, &client.message_bus);
291
+ }
292
+
293
+ state_checker = try allocator.create(StateChecker);
294
+ defer allocator.destroy(state_checker);
295
+
296
+ state_checker.* = try StateChecker.init(
297
+ allocator,
298
+ cluster_id,
299
+ cluster.replicas,
300
+ conductor.clients,
301
+ );
302
+ defer state_checker.deinit();
303
+
304
+ storage_checker = try allocator.create(StorageChecker);
305
+ defer allocator.destroy(storage_checker);
306
+
307
+ storage_checker.* = StorageChecker.init(allocator);
308
+ defer storage_checker.deinit();
212
309
 
213
310
  // The minimum number of healthy replicas required for a crashed replica to be able to recover.
214
311
  const replica_normal_min = replicas: {
@@ -226,8 +323,9 @@ pub fn main() !void {
226
323
  storage.faulty = replica_normal_min <= i;
227
324
  }
228
325
 
326
+ // The maximum number of transitions from calling `client.request()`, not including
327
+ // `register` messages.
229
328
  // TODO When storage is supported, run more transitions than fit in the journal.
230
- const transitions_max = config.journal_slot_count / 2;
231
329
  var tick: u64 = 0;
232
330
  while (tick < ticks_max) : (tick += 1) {
233
331
  const health_options = &cluster.options.health_options;
@@ -247,27 +345,30 @@ pub fn main() !void {
247
345
  // complete the VSR recovery protocol either.
248
346
  if (cluster.health[replica] == .up and crashes == 0) {
249
347
  if (storage.faulty) {
250
- log_faults.debug("{}: disable storage faults", .{replica});
348
+ log_simulator.debug("{}: disable storage faults", .{replica});
251
349
  storage.faulty = false;
252
350
  }
253
351
  } else {
254
352
  // When a journal recovers for the first time, enable its storage faults.
255
353
  // Future crashes will recover in the presence of faults.
256
354
  if (!storage.faulty) {
257
- log_faults.debug("{}: enable storage faults", .{replica});
355
+ log_simulator.debug("{}: enable storage faults", .{replica});
258
356
  storage.faulty = true;
259
357
  }
260
358
  }
261
359
  }
262
- storage.tick();
263
360
  }
264
361
 
265
- for (cluster.replicas) |*replica| {
362
+ for (cluster.replicas) |*replica, index| {
266
363
  switch (cluster.health[replica.replica]) {
267
364
  .up => |*ticks| {
268
365
  ticks.* -|= 1;
269
366
  replica.tick();
270
- cluster.state_checker.check_state(replica.replica);
367
+ cluster.storages[index].tick();
368
+
369
+ state_checker.check_state(replica.replica) catch |err| {
370
+ fatal(.correctness, "state checker error: {}", .{err});
371
+ };
271
372
 
272
373
  if (ticks.* != 0) continue;
273
374
  if (crashes == 0) continue;
@@ -278,7 +379,7 @@ pub fn main() !void {
278
379
  }
279
380
 
280
381
  if (!try cluster.crash_replica(replica.replica)) continue;
281
- log_health.debug("{}: crash replica", .{replica.replica});
382
+ log_simulator.debug("{}: crash replica", .{replica.replica});
282
383
  crashes -= 1;
283
384
  },
284
385
  .down => |*ticks| {
@@ -289,59 +390,48 @@ pub fn main() !void {
289
390
  assert(replica.status == .recovering);
290
391
  if (ticks.* == 0 and chance_f64(random, health_options.restart_probability)) {
291
392
  cluster.health[replica.replica] = .{ .up = health_options.restart_stability };
292
- log_health.debug("{}: restart replica", .{replica.replica});
393
+ log_simulator.debug("{}: restart replica", .{replica.replica});
293
394
  }
294
395
  },
295
396
  }
296
397
  }
297
398
 
298
399
  cluster.network.packet_simulator.tick(cluster.health);
400
+ conductor.tick();
299
401
 
300
- for (cluster.clients) |*client| client.tick();
301
-
302
- if (cluster.state_checker.transitions == transitions_max) {
303
- if (cluster.state_checker.convergence() and
304
- cluster.replica_up_count() == replica_count)
305
- {
306
- break;
307
- }
308
- continue;
309
- } else {
310
- assert(cluster.state_checker.transitions < transitions_max);
402
+ if (state_checker.convergence() and conductor.done() and
403
+ cluster.replica_up_count() == replica_count)
404
+ {
405
+ break;
311
406
  }
312
-
313
- if (requests_sent < transitions_max) {
314
- if (idle) {
315
- if (chance(random, idle_off_probability)) idle = false;
316
- } else {
317
- if (chance(random, request_probability)) {
318
- if (send_request(random)) requests_sent += 1;
319
- }
320
- if (chance(random, idle_on_probability)) idle = true;
321
- }
322
- }
323
- }
324
-
325
- if (cluster.state_checker.transitions < transitions_max) {
407
+ } else {
326
408
  output.err("you can reproduce this failure with seed={}", .{seed});
327
- @panic("unable to complete transitions_max before ticks_max");
409
+ fatal(.liveness, "unable to complete requests_committed_max before ticks_max", .{});
328
410
  }
329
411
 
330
- assert(cluster.state_checker.convergence());
412
+ assert(state_checker.convergence());
413
+ assert(conductor.done());
331
414
 
332
415
  output.info("\n PASSED ({} ticks)", .{tick});
333
416
  }
334
417
 
335
- /// Returns true, `p` percent of the time, else false.
336
- fn chance(random: std.rand.Random, p: u8) bool {
337
- assert(p <= 100);
338
- return random.uintLessThan(u8, 100) < p;
418
+ pub const ExitCode = enum(u8) {
419
+ ok = 0,
420
+ crash = 127, // Any assertion crash will be given an exit code of 127 by default.
421
+ liveness = 128,
422
+ correctness = 129,
423
+ };
424
+
425
+ /// Print an error message and then exit with an exit code.
426
+ fn fatal(exit_code: ExitCode, comptime fmt_string: []const u8, args: anytype) noreturn {
427
+ output.err(fmt_string, args);
428
+ std.os.exit(@enumToInt(exit_code));
339
429
  }
340
430
 
341
431
  /// Returns true, `p` percent of the time, else false.
342
432
  fn chance_f64(random: std.rand.Random, p: f64) bool {
343
433
  assert(p <= 100.0);
344
- return random.float(f64) < p;
434
+ return random.float(f64) * 100.0 < p;
345
435
  }
346
436
 
347
437
  /// Returns the next argument for the simulator or null (if none available)
@@ -350,62 +440,22 @@ fn args_next(args: *std.process.ArgIterator, allocator: std.mem.Allocator) ?[:0]
350
440
  return err_or_bytes catch @panic("Unable to extract next value from args");
351
441
  }
352
442
 
353
- fn on_change_replica(replica: *Replica) void {
354
- cluster.state_checker.check_state(replica.replica);
443
+ fn on_replica_change_state(replica: *const Replica) void {
444
+ state_checker.check_state(replica.replica) catch |err| {
445
+ fatal(.correctness, "state checker error: {}", .{err});
446
+ };
355
447
  }
356
448
 
357
- fn send_request(random: std.rand.Random) bool {
358
- const client_index = random.uintLessThan(u8, cluster.options.client_count);
359
-
360
- const client = &cluster.clients[client_index];
361
- const checker_request_queue = &cluster.state_checker.client_requests[client_index];
362
-
363
- // Ensure that we don't shortchange testing of the full client request queue length:
364
- assert(client.request_queue.buffer.len <= checker_request_queue.buffer.len);
365
- if (client.request_queue.full()) return false;
366
- if (checker_request_queue.full()) return false;
367
-
368
- const message = client.get_message();
369
- defer client.unref(message);
370
-
371
- const body_size_max = config.message_size_max - @sizeOf(Header);
372
- const body_size: u32 = switch (random.uintLessThan(u8, 100)) {
373
- 0...10 => 0,
374
- 11...89 => random.uintLessThan(u32, body_size_max),
375
- 90...99 => body_size_max,
376
- else => unreachable,
449
+ fn on_replica_compact(replica: *const Replica) void {
450
+ storage_checker.replica_compact(replica) catch |err| {
451
+ fatal(.correctness, "storage checker error: {}", .{err});
377
452
  };
378
-
379
- const body = message.buffer[@sizeOf(Header)..][0..body_size];
380
- if (chance(random, 10)) {
381
- std.mem.set(u8, body, 0);
382
- } else {
383
- random.bytes(body);
384
- }
385
-
386
- // While hashing the client ID with the request body prevents input collisions across clients,
387
- // it's still possible for the same client to generate the same body, and therefore input hash.
388
- const client_input = StateMachine.hash(client.id, body);
389
- checker_request_queue.push_assume_capacity(client_input);
390
- std.log.scoped(.test_client).debug("client {} sending input={x}", .{
391
- client_index,
392
- client_input,
393
- });
394
-
395
- client.request(0, client_callback, .hash, message, body_size);
396
-
397
- return true;
398
453
  }
399
454
 
400
- fn client_callback(
401
- user_data: u128,
402
- operation: StateMachine.Operation,
403
- results: Client.Error![]const u8,
404
- ) void {
405
- _ = operation;
406
- _ = results catch unreachable;
407
-
408
- assert(user_data == 0);
455
+ fn on_replica_checkpoint(replica: *const Replica) void {
456
+ storage_checker.replica_checkpoint(replica) catch |err| {
457
+ fatal(.correctness, "storage checker error: {}", .{err});
458
+ };
409
459
  }
410
460
 
411
461
  /// Returns a random partitioning mode, excluding .custom
@@ -416,7 +466,17 @@ fn random_partition_mode(random: std.rand.Random) PartitionMode {
416
466
  return @intToEnum(PartitionMode, enumAsInt);
417
467
  }
418
468
 
419
- fn parse_seed(bytes: []const u8) u64 {
469
+ fn random_id_permutation(random: std.rand.Random) IdPermutation {
470
+ return switch (random.uintLessThan(usize, 4)) {
471
+ 0 => .{ .identity = {} },
472
+ 1 => .{ .inversion = {} },
473
+ 2 => .{ .zigzag = {} },
474
+ 3 => .{ .random = random.int(u64) },
475
+ else => unreachable,
476
+ };
477
+ }
478
+
479
+ pub fn parse_seed(bytes: []const u8) u64 {
420
480
  return std.fmt.parseUnsigned(u64, bytes, 10) catch |err| switch (err) {
421
481
  error.Overflow => @panic("seed exceeds a 64-bit unsigned integer"),
422
482
  error.InvalidCharacter => @panic("seed contains an invalid character"),