tigerbeetle-node 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +21 -7
  2. package/dist/benchmark.js +1 -1
  3. package/dist/benchmark.js.map +1 -1
  4. package/dist/index.d.ts +22 -20
  5. package/dist/index.js +40 -18
  6. package/dist/index.js.map +1 -1
  7. package/dist/test.js +13 -1
  8. package/dist/test.js.map +1 -1
  9. package/package.json +12 -12
  10. package/scripts/postinstall.sh +2 -2
  11. package/src/benchmark.ts +4 -4
  12. package/src/index.ts +35 -9
  13. package/src/node.zig +139 -28
  14. package/src/test.ts +19 -5
  15. package/src/tigerbeetle/scripts/benchmark.sh +10 -3
  16. package/src/tigerbeetle/scripts/install.sh +2 -2
  17. package/src/tigerbeetle/scripts/install_zig.bat +109 -0
  18. package/src/tigerbeetle/scripts/install_zig.sh +21 -4
  19. package/src/tigerbeetle/scripts/vopr.bat +48 -0
  20. package/src/tigerbeetle/scripts/vopr.sh +33 -0
  21. package/src/tigerbeetle/src/benchmark.zig +74 -42
  22. package/src/tigerbeetle/src/cli.zig +136 -83
  23. package/src/tigerbeetle/src/config.zig +80 -26
  24. package/src/tigerbeetle/src/demo.zig +101 -78
  25. package/src/tigerbeetle/src/demo_01_create_accounts.zig +2 -7
  26. package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +2 -7
  27. package/src/tigerbeetle/src/demo_03_create_transfers.zig +2 -7
  28. package/src/tigerbeetle/src/demo_04_create_transfers_two_phase_commit.zig +2 -5
  29. package/src/tigerbeetle/src/demo_05_accept_transfers.zig +2 -7
  30. package/src/tigerbeetle/src/demo_06_reject_transfers.zig +2 -7
  31. package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +8 -0
  32. package/src/tigerbeetle/src/fifo.zig +20 -11
  33. package/src/tigerbeetle/src/io.zig +35 -22
  34. package/src/tigerbeetle/src/io_darwin.zig +701 -0
  35. package/src/tigerbeetle/src/main.zig +72 -25
  36. package/src/tigerbeetle/src/message_bus.zig +379 -456
  37. package/src/tigerbeetle/src/message_pool.zig +3 -3
  38. package/src/tigerbeetle/src/ring_buffer.zig +192 -37
  39. package/src/tigerbeetle/src/simulator.zig +317 -0
  40. package/src/tigerbeetle/src/state_machine.zig +846 -38
  41. package/src/tigerbeetle/src/storage.zig +488 -90
  42. package/src/tigerbeetle/src/test/cluster.zig +221 -0
  43. package/src/tigerbeetle/src/test/message_bus.zig +92 -0
  44. package/src/tigerbeetle/src/test/network.zig +182 -0
  45. package/src/tigerbeetle/src/test/packet_simulator.zig +371 -0
  46. package/src/tigerbeetle/src/test/state_checker.zig +142 -0
  47. package/src/tigerbeetle/src/test/state_machine.zig +71 -0
  48. package/src/tigerbeetle/src/test/storage.zig +375 -0
  49. package/src/tigerbeetle/src/test/time.zig +84 -0
  50. package/src/tigerbeetle/src/tigerbeetle.zig +6 -3
  51. package/src/tigerbeetle/src/time.zig +65 -0
  52. package/src/tigerbeetle/src/unit_tests.zig +14 -0
  53. package/src/tigerbeetle/src/vsr/client.zig +519 -0
  54. package/src/tigerbeetle/src/vsr/clock.zig +829 -0
  55. package/src/tigerbeetle/src/vsr/journal.zig +1368 -0
  56. package/src/tigerbeetle/src/vsr/marzullo.zig +306 -0
  57. package/src/tigerbeetle/src/vsr/replica.zig +4248 -0
  58. package/src/tigerbeetle/src/vsr.zig +601 -0
  59. package/src/tigerbeetle/LICENSE +0 -177
  60. package/src/tigerbeetle/README.md +0 -116
  61. package/src/tigerbeetle/src/client.zig +0 -319
  62. package/src/tigerbeetle/src/concurrent_ranges.zig +0 -162
  63. package/src/tigerbeetle/src/fixed_array_list.zig +0 -53
  64. package/src/tigerbeetle/src/io_async.zig +0 -600
  65. package/src/tigerbeetle/src/journal.zig +0 -567
  66. package/src/tigerbeetle/src/test_client.zig +0 -41
  67. package/src/tigerbeetle/src/test_main.zig +0 -118
  68. package/src/tigerbeetle/src/test_message_bus.zig +0 -132
  69. package/src/tigerbeetle/src/vr/journal.zig +0 -672
  70. package/src/tigerbeetle/src/vr/replica.zig +0 -3061
  71. package/src/tigerbeetle/src/vr.zig +0 -374
@@ -0,0 +1,829 @@
1
+ const std = @import("std");
2
+ const assert = std.debug.assert;
3
+ const log = std.log.scoped(.clock);
4
+
5
+ const config = @import("../config.zig");
6
+
7
+ const clock_offset_tolerance_max: u64 = config.clock_offset_tolerance_max_ms * std.time.ns_per_ms;
8
+ const epoch_max: u64 = config.clock_epoch_max_ms * std.time.ns_per_ms;
9
+ const window_min: u64 = config.clock_synchronization_window_min_ms * std.time.ns_per_ms;
10
+ const window_max: u64 = config.clock_synchronization_window_max_ms * std.time.ns_per_ms;
11
+
12
+ const Marzullo = @import("marzullo.zig").Marzullo;
13
+
14
+ pub fn Clock(comptime Time: type) type {
15
+ return struct {
16
+ const Self = @This();
17
+
18
+ const Sample = struct {
19
+ /// The relative difference between our wall clock reading and that of the remote clock source.
20
+ clock_offset: i64,
21
+ one_way_delay: u64,
22
+ };
23
+
24
+ const Epoch = struct {
25
+ /// The best clock offset sample per remote clock source (with minimum one way delay) collected
26
+ /// over the course of a window period of several seconds.
27
+ sources: []?Sample,
28
+
29
+ /// The total number of samples learned while synchronizing this epoch.
30
+ samples: usize,
31
+
32
+ /// The monotonic clock timestamp when this epoch began. We use this to measure elapsed time.
33
+ monotonic: u64,
34
+
35
+ /// The wall clock timestamp when this epoch began. We add the elapsed monotonic time to this
36
+ /// plus the synchronized clock offset to arrive at a synchronized realtime timestamp. We
37
+ /// capture this realtime when starting the epoch, before we take any samples, to guard against
38
+ /// any jumps in the system's realtime clock from impacting our measurements.
39
+ realtime: i64,
40
+
41
+ /// Once we have enough source clock offset samples in agreement, the epoch is synchronized.
42
+ /// We then have lower and upper bounds on the true cluster time, and can install this epoch for
43
+ /// subsequent clock readings. This epoch is then valid for several seconds, while clock drift
44
+ /// has not had enough time to accumulate into any significant clock skew, and while we collect
45
+ /// samples for the next epoch to refresh and replace this one.
46
+ synchronized: ?Marzullo.Interval,
47
+
48
+ /// A guard to prevent synchronizing too often without having learned any new samples.
49
+ learned: bool = false,
50
+
51
+ fn elapsed(epoch: *Epoch, clock: *Self) u64 {
52
+ return clock.monotonic() - epoch.monotonic;
53
+ }
54
+
55
+ fn reset(epoch: *Epoch, clock: *Self) void {
56
+ std.mem.set(?Sample, epoch.sources, null);
57
+ // A replica always has zero clock offset and network delay to its own system time reading:
58
+ epoch.sources[clock.replica] = Sample{
59
+ .clock_offset = 0,
60
+ .one_way_delay = 0,
61
+ };
62
+ epoch.samples = 1;
63
+ epoch.monotonic = clock.monotonic();
64
+ epoch.realtime = clock.realtime();
65
+ epoch.synchronized = null;
66
+ epoch.learned = false;
67
+ }
68
+
69
+ fn sources_sampled(epoch: *Epoch) usize {
70
+ var count: usize = 0;
71
+ for (epoch.sources) |sampled| {
72
+ if (sampled != null) count += 1;
73
+ }
74
+ return count;
75
+ }
76
+ };
77
+
78
+ /// The index of the replica using this clock to provide synchronized time.
79
+ replica: u8,
80
+
81
+ /// The underlying time source for this clock (system time or deterministic time).
82
+ time: *Time,
83
+
84
+ /// An epoch from which the clock can read synchronized clock timestamps within safe bounds.
85
+ /// At least `config.clock_synchronization_window_min_ms` is needed for this to be ready to use.
86
+ epoch: Epoch,
87
+
88
+ /// The next epoch (collecting samples and being synchronized) to replace the current epoch.
89
+ window: Epoch,
90
+
91
+ /// A static allocation to convert window samples into tuple bounds for Marzullo's algorithm.
92
+ marzullo_tuples: []Marzullo.Tuple,
93
+
94
+ /// A kill switch to revert to unsynchronized realtime.
95
+ synchronization_disabled: bool,
96
+
97
+ pub fn init(
98
+ allocator: *std.mem.Allocator,
99
+ /// The size of the cluster, i.e. the number of clock sources (including this replica).
100
+ replica_count: u8,
101
+ replica: u8,
102
+ time: *Time,
103
+ ) !Self {
104
+ assert(replica_count > 0);
105
+ assert(replica < replica_count);
106
+
107
+ var epoch: Epoch = undefined;
108
+ epoch.sources = try allocator.alloc(?Sample, replica_count);
109
+ errdefer allocator.free(epoch.sources);
110
+
111
+ var window: Epoch = undefined;
112
+ window.sources = try allocator.alloc(?Sample, replica_count);
113
+ errdefer allocator.free(window.sources);
114
+
115
+ // There are two Marzullo tuple bounds (lower and upper) per source clock offset sample:
116
+ var marzullo_tuples = try allocator.alloc(Marzullo.Tuple, replica_count * 2);
117
+ errdefer allocator.free(marzullo_tuples);
118
+
119
+ var self = Self{
120
+ .replica = replica,
121
+ .time = time,
122
+ .epoch = epoch,
123
+ .window = window,
124
+ .marzullo_tuples = marzullo_tuples,
125
+ .synchronization_disabled = replica_count == 1, // A cluster of one cannot synchronize.
126
+ };
127
+
128
+ // Reset the current epoch to be unsynchronized,
129
+ self.epoch.reset(&self);
130
+ // and open a new epoch window to start collecting samples...
131
+ self.window.reset(&self);
132
+
133
+ return self;
134
+ }
135
+
136
+ pub fn deinit(self: *Self, allocator: *std.mem.Allocator) void {
137
+ allocator.free(self.epoch.sources);
138
+ allocator.free(self.window.sources);
139
+ allocator.free(self.marzullo_tuples);
140
+ }
141
+
142
+ /// Called by `Replica.on_pong()` with:
143
+ /// * the index of the `replica` that has replied to our ping with a pong,
144
+ /// * our monotonic timestamp `m0` embedded in the ping we sent, carried over into this pong,
145
+ /// * the remote replica's `realtime()` timestamp `t1`, and
146
+ /// * our monotonic timestamp `m2` as captured by our `Replica.on_pong()` handler.
147
+ pub fn learn(self: *Self, replica: u8, m0: u64, t1: i64, m2: u64) void {
148
+ if (self.synchronization_disabled) return;
149
+
150
+ // A network routing fault must have replayed one of our outbound messages back against us:
151
+ if (replica == self.replica) {
152
+ log.warn("{}: learn: replica == self.replica", .{self.replica});
153
+ return;
154
+ }
155
+
156
+ // Our m0 and m2 readings should always be monotonically increasing if not equal.
157
+ // Crucially, it is possible for a very fast network to have m0 == m2, especially where
158
+ // `config.tick_ms` is at a more course granularity. We must therefore tolerate RTT=0 or
159
+ // otherwise we would have a liveness bug simply because we would be throwing away
160
+ // perfectly good clock samples.
161
+ // This condition should never be true. Reject this as a bad sample:
162
+ if (m0 > m2) {
163
+ log.warn("{}: learn: m0={} > m2={}", .{ self.replica, m0, m2 });
164
+ return;
165
+ }
166
+
167
+ // We may receive delayed packets after a reboot, in which case m0/m2 may be invalid:
168
+ if (m0 < self.window.monotonic) {
169
+ log.warn("{}: learn: m0={} < window.monotonic={}", .{
170
+ self.replica,
171
+ m0,
172
+ self.window.monotonic,
173
+ });
174
+ return;
175
+ }
176
+
177
+ if (m2 < self.window.monotonic) {
178
+ log.warn("{}: learn: m2={} < window.monotonic={}", .{
179
+ self.replica,
180
+ m2,
181
+ self.window.monotonic,
182
+ });
183
+ return;
184
+ }
185
+
186
+ const elapsed: u64 = m2 - self.window.monotonic;
187
+ if (elapsed > window_max) {
188
+ log.warn("{}: learn: elapsed={} > window_max={}", .{
189
+ self.replica,
190
+ elapsed,
191
+ window_max,
192
+ });
193
+ return;
194
+ }
195
+
196
+ const round_trip_time: u64 = m2 - m0;
197
+ const one_way_delay: u64 = round_trip_time / 2;
198
+ const t2: i64 = self.window.realtime + @intCast(i64, elapsed);
199
+ const clock_offset: i64 = t1 + @intCast(i64, one_way_delay) - t2;
200
+ const asymmetric_delay = self.estimate_asymmetric_delay(
201
+ replica,
202
+ one_way_delay,
203
+ clock_offset,
204
+ );
205
+ const clock_offset_corrected = clock_offset + asymmetric_delay;
206
+
207
+ log.debug("{}: learn: replica={} m0={} t1={} m2={} t2={} one_way_delay={} " ++
208
+ "asymmetric_delay={} clock_offset={}", .{
209
+ self.replica,
210
+ replica,
211
+ m0,
212
+ t1,
213
+ m2,
214
+ t2,
215
+ one_way_delay,
216
+ asymmetric_delay,
217
+ clock_offset_corrected,
218
+ });
219
+
220
+ // The less network delay, the more likely we have an accurante clock offset measurement:
221
+ self.window.sources[replica] = minimum_one_way_delay(
222
+ self.window.sources[replica],
223
+ Sample{
224
+ .clock_offset = clock_offset_corrected,
225
+ .one_way_delay = one_way_delay,
226
+ },
227
+ );
228
+
229
+ self.window.samples += 1;
230
+
231
+ // We decouple calls to `synchronize()` so that it's not triggered by these network events.
232
+ // Otherwise, excessive duplicate network packets would burn the CPU.
233
+ self.window.learned = true;
234
+ }
235
+
236
+ /// Called by `Replica.on_ping_timeout()` to provide `m0` when we decide to send a ping.
237
+ /// Called by `Replica.on_pong()` to provide `m2` when we receive a pong.
238
+ pub fn monotonic(self: *Self) u64 {
239
+ return self.time.monotonic();
240
+ }
241
+
242
+ /// Called by `Replica.on_ping()` when responding to a ping with a pong.
243
+ /// This should never be used by the state machine, only for measuring clock offsets.
244
+ pub fn realtime(self: *Self) i64 {
245
+ return self.time.realtime();
246
+ }
247
+
248
+ /// Called by `StateMachine.prepare_timestamp()` when the leader wants to timestamp a batch.
249
+ /// If the leader's clock is not synchronized with the cluster, it must wait until it is.
250
+ /// Returns the system time clamped to be within our synchronized lower and upper bounds.
251
+ /// This is complementary to NTP and allows clusters with very accurate time to make use of it,
252
+ /// while providing guard rails for when NTP is partitioned or unable to correct quickly enough.
253
+ pub fn realtime_synchronized(self: *Self) ?i64 {
254
+ if (self.synchronization_disabled) {
255
+ return self.realtime();
256
+ } else if (self.epoch.synchronized) |interval| {
257
+ const elapsed = @intCast(i64, self.epoch.elapsed(self));
258
+ return std.math.clamp(
259
+ self.realtime(),
260
+ self.epoch.realtime + elapsed + interval.lower_bound,
261
+ self.epoch.realtime + elapsed + interval.upper_bound,
262
+ );
263
+ } else {
264
+ return null;
265
+ }
266
+ }
267
+
268
+ pub fn tick(self: *Self) void {
269
+ self.time.tick();
270
+
271
+ if (self.synchronization_disabled) return;
272
+ self.synchronize();
273
+ // Expire the current epoch if successive windows failed to synchronize:
274
+ // Gradual clock drift prevents us from using an epoch for more than a few seconds.
275
+ if (self.epoch.elapsed(self) >= epoch_max) {
276
+ log.alert(
277
+ "{}: no agreement on cluster time (partitioned or too many clock faults)",
278
+ .{self.replica},
279
+ );
280
+ self.epoch.reset(self);
281
+ }
282
+ }
283
+
284
+ /// Estimates the asymmetric delay for a sample compared to the previous window, according to
285
+ /// Algorithm 1 from Section 4.2, "A System for Clock Synchronization in an Internet of Things".
286
+ fn estimate_asymmetric_delay(
287
+ self: *Self,
288
+ replica: u8,
289
+ one_way_delay: u64,
290
+ clock_offset: i64,
291
+ ) i64 {
292
+ // Note that `one_way_delay` may be 0 for very fast networks.
293
+
294
+ const error_margin = 10 * std.time.ns_per_ms;
295
+
296
+ if (self.epoch.sources[replica]) |epoch| {
297
+ if (one_way_delay <= epoch.one_way_delay) {
298
+ return 0;
299
+ } else if (clock_offset > epoch.clock_offset + error_margin) {
300
+ // The asymmetric error is on the forward network path.
301
+ return 0 - @intCast(i64, one_way_delay - epoch.one_way_delay);
302
+ } else if (clock_offset < epoch.clock_offset - error_margin) {
303
+ // The asymmetric error is on the reverse network path.
304
+ return 0 + @intCast(i64, one_way_delay - epoch.one_way_delay);
305
+ } else {
306
+ return 0;
307
+ }
308
+ } else {
309
+ return 0;
310
+ }
311
+ }
312
+
313
+ fn synchronize(self: *Self) void {
314
+ assert(self.window.synchronized == null);
315
+
316
+ // Wait until the window has enough accurate samples:
317
+ const elapsed = self.window.elapsed(self);
318
+ if (elapsed < window_min) return;
319
+ if (elapsed >= window_max) {
320
+ // We took too long to synchronize the window, expire stale samples...
321
+ const sources_sampled = self.window.sources_sampled();
322
+ if (sources_sampled <= @divTrunc(self.window.sources.len, 2)) {
323
+ log.crit("{}: synchronization failed, partitioned (sources={} samples={})", .{
324
+ self.replica,
325
+ sources_sampled,
326
+ self.window.samples,
327
+ });
328
+ } else {
329
+ log.crit("{}: synchronization failed, no agreement (sources={} samples={})", .{
330
+ self.replica,
331
+ sources_sampled,
332
+ self.window.samples,
333
+ });
334
+ }
335
+ self.window.reset(self);
336
+ return;
337
+ }
338
+
339
+ if (!self.window.learned) return;
340
+ // Do not reset `learned` any earlier than this (before we have attempted to synchronize).
341
+ self.window.learned = false;
342
+
343
+ // Starting with the most clock offset tolerance, while we have a majority, find the best
344
+ // smallest interval with the least clock offset tolerance, reducing tolerance at each step:
345
+ var tolerance: u64 = clock_offset_tolerance_max;
346
+ var terminate = false;
347
+ var rounds: usize = 0;
348
+ // Do at least one round if tolerance=0 and cap the number of rounds to avoid runaway loops.
349
+ while (!terminate and rounds < 64) : (tolerance /= 2) {
350
+ if (tolerance == 0) terminate = true;
351
+ rounds += 1;
352
+
353
+ const interval = Marzullo.smallest_interval(self.window_tuples(tolerance));
354
+ const majority = interval.sources_true > @divTrunc(self.window.sources.len, 2);
355
+ if (!majority) break;
356
+
357
+ // The new interval may reduce the number of `sources_true` while also decreasing error.
358
+ // In other words, provided we maintain a majority, we prefer tighter tolerance bounds.
359
+ self.window.synchronized = interval;
360
+ }
361
+
362
+ // Wait for more accurate samples or until we timeout the window for lack of majority:
363
+ if (self.window.synchronized == null) return;
364
+
365
+ var new_window = self.epoch;
366
+ new_window.reset(self);
367
+ self.epoch = self.window;
368
+ self.window = new_window;
369
+
370
+ self.after_synchronization();
371
+ }
372
+
373
+ fn after_synchronization(self: *Self) void {
374
+ const new_interval = self.epoch.synchronized.?;
375
+
376
+ log.debug("{}: synchronized: truechimers={}/{} clock_offset={}..{} accuracy={}", .{
377
+ self.replica,
378
+ new_interval.sources_true,
379
+ self.epoch.sources.len,
380
+ fmtDurationSigned(new_interval.lower_bound),
381
+ fmtDurationSigned(new_interval.upper_bound),
382
+ fmtDurationSigned(new_interval.upper_bound - new_interval.lower_bound),
383
+ });
384
+
385
+ const elapsed = @intCast(i64, self.epoch.elapsed(self));
386
+ const system = self.realtime();
387
+ const lower = self.epoch.realtime + elapsed + new_interval.lower_bound;
388
+ const upper = self.epoch.realtime + elapsed + new_interval.upper_bound;
389
+ const cluster = std.math.clamp(system, lower, upper);
390
+
391
+ if (system == cluster) {} else if (system < lower) {
392
+ const delta = lower - system;
393
+ if (delta < std.time.ns_per_ms) {
394
+ log.info("{}: system time is {} behind", .{
395
+ self.replica,
396
+ fmtDurationSigned(delta),
397
+ });
398
+ } else {
399
+ log.err("{}: system time is {} behind, clamping system time to cluster time", .{
400
+ self.replica,
401
+ fmtDurationSigned(delta),
402
+ });
403
+ }
404
+ } else {
405
+ const delta = system - upper;
406
+ if (delta < std.time.ns_per_ms) {
407
+ log.info("{}: system time is {} ahead", .{
408
+ self.replica,
409
+ fmtDurationSigned(delta),
410
+ });
411
+ } else {
412
+ log.err("{}: system time is {} ahead, clamping system time to cluster time", .{
413
+ self.replica,
414
+ fmtDurationSigned(delta),
415
+ });
416
+ }
417
+ }
418
+ }
419
+
420
+ fn window_tuples(self: *Self, tolerance: u64) []Marzullo.Tuple {
421
+ assert(self.window.sources[self.replica].?.clock_offset == 0);
422
+ assert(self.window.sources[self.replica].?.one_way_delay == 0);
423
+ var count: usize = 0;
424
+ for (self.window.sources) |sampled, source| {
425
+ if (sampled) |sample| {
426
+ self.marzullo_tuples[count] = Marzullo.Tuple{
427
+ .source = @intCast(u8, source),
428
+ .offset = sample.clock_offset - @intCast(i64, sample.one_way_delay + tolerance),
429
+ .bound = .lower,
430
+ };
431
+ count += 1;
432
+ self.marzullo_tuples[count] = Marzullo.Tuple{
433
+ .source = @intCast(u8, source),
434
+ .offset = sample.clock_offset + @intCast(i64, sample.one_way_delay + tolerance),
435
+ .bound = .upper,
436
+ };
437
+ count += 1;
438
+ }
439
+ }
440
+ return self.marzullo_tuples[0..count];
441
+ }
442
+
443
+ fn minimum_one_way_delay(a: ?Sample, b: ?Sample) ?Sample {
444
+ if (a == null) return b;
445
+ if (b == null) return a;
446
+ if (a.?.one_way_delay < b.?.one_way_delay) return a;
447
+ // Choose B if B's one way delay is less or the same (we assume B is the newer sample):
448
+ return b;
449
+ }
450
+ };
451
+ }
452
+
453
+ /// Return a Formatter for a signed number of nanoseconds according to magnitude:
454
+ /// [#y][#w][#d][#h][#m]#[.###][n|u|m]s
455
+ pub fn fmtDurationSigned(ns: i64) std.fmt.Formatter(formatDurationSigned) {
456
+ return .{ .data = ns };
457
+ }
458
+
459
+ fn formatDurationSigned(
460
+ ns: i64,
461
+ comptime fmt: []const u8,
462
+ options: std.fmt.FormatOptions,
463
+ writer: anytype,
464
+ ) !void {
465
+ if (ns < 0) {
466
+ try writer.print("-{}", .{std.fmt.fmtDuration(@intCast(u64, -ns))});
467
+ } else {
468
+ try writer.print("{}", .{std.fmt.fmtDuration(@intCast(u64, ns))});
469
+ }
470
+ }
471
+
472
+ const testing = std.testing;
473
+ const OffsetType = @import("../test/time.zig").OffsetType;
474
+ const DeterministicTime = @import("../test/time.zig").Time;
475
+ const DeterministicClock = Clock(DeterministicTime);
476
+
477
+ const ClockUnitTestContainer = struct {
478
+ const Self = @This();
479
+ clock: DeterministicClock,
480
+ rtt: u64 = 300 * std.time.ns_per_ms,
481
+ owd: u64 = 150 * std.time.ns_per_ms,
482
+ learn_interval: u64 = 5,
483
+
484
+ pub fn init(
485
+ allocator: *std.mem.Allocator,
486
+ offset_type: OffsetType,
487
+ offset_coefficient_A: i64,
488
+ offset_coefficient_B: i64,
489
+ ) !Self {
490
+ const time: DeterministicTime = .{
491
+ .resolution = std.time.ns_per_s / 2,
492
+ .offset_type = offset_type,
493
+ .offset_coefficient_A = offset_coefficient_A,
494
+ .offset_coefficient_B = offset_coefficient_B,
495
+ };
496
+ const self: Self = .{
497
+ .clock = try DeterministicClock.init(allocator, 3, 0, time),
498
+ };
499
+ return self;
500
+ }
501
+
502
+ pub fn run_till_tick(self: *Self, tick: u64) void {
503
+ while (self.clock.time.ticks < tick) {
504
+ self.clock.time.tick();
505
+
506
+ if (@mod(self.clock.time.ticks, self.learn_interval) == 0) {
507
+ const on_pong_time = self.clock.monotonic();
508
+ const m0 = on_pong_time - self.rtt;
509
+ const t1 = @intCast(i64, on_pong_time - self.owd);
510
+
511
+ self.clock.learn(1, m0, t1, on_pong_time);
512
+ self.clock.learn(2, m0, t1, on_pong_time);
513
+ }
514
+
515
+ self.clock.synchronize();
516
+ }
517
+ }
518
+
519
+ const AssertionPoint = struct {
520
+ tick: u64,
521
+ expected_offset: i64,
522
+ };
523
+ pub fn ticks_to_perform_assertions(self: *Self) [3]AssertionPoint {
524
+ var ret: [3]AssertionPoint = undefined;
525
+ switch (self.clock.time.offset_type) {
526
+ .linear => {
527
+ // For the first (OWD/drift per tick) ticks, the offset < OWD. This means that the
528
+ // Marzullo interval is [0,0] (the offset and OWD are 0 for a replica w.r.t. itself).
529
+ // Therefore the offset of `clock.realtime_synchronised` will be the analytically prescribed
530
+ // offset at the start of the window.
531
+ // Beyond this, the offset > OWD and the Marzullo interval will be from replica 1 and
532
+ // replica 2. The `clock.realtime_synchronized` will be clamped to the lower bound.
533
+ // Therefore the `clock.realtime_synchronized` will be offset by the OWD.
534
+ var threshold = self.owd / @intCast(u64, self.clock.time.offset_coefficient_A);
535
+ ret[0] = .{
536
+ .tick = threshold,
537
+ .expected_offset = self.clock.time.offset(threshold - self.learn_interval),
538
+ };
539
+ ret[1] = .{
540
+ .tick = threshold + 100,
541
+ .expected_offset = @intCast(i64, self.owd),
542
+ };
543
+ ret[2] = .{
544
+ .tick = threshold + 200,
545
+ .expected_offset = @intCast(i64, self.owd),
546
+ };
547
+ },
548
+ .periodic => {
549
+ ret[0] = .{
550
+ .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B, 4)),
551
+ .expected_offset = @intCast(i64, self.owd),
552
+ };
553
+ ret[1] = .{
554
+ .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B, 2)),
555
+ .expected_offset = 0,
556
+ };
557
+ ret[2] = .{
558
+ .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B * 3, 4)),
559
+ .expected_offset = -@intCast(i64, self.owd),
560
+ };
561
+ },
562
+ .step => {
563
+ ret[0] = .{
564
+ .tick = @intCast(u64, self.clock.time.offset_coefficient_B - 10),
565
+ .expected_offset = 0,
566
+ };
567
+ ret[1] = .{
568
+ .tick = @intCast(u64, self.clock.time.offset_coefficient_B + 10),
569
+ .expected_offset = -@intCast(i64, self.owd),
570
+ };
571
+ ret[2] = .{
572
+ .tick = @intCast(u64, self.clock.time.offset_coefficient_B + 10),
573
+ .expected_offset = -@intCast(i64, self.owd),
574
+ };
575
+ },
576
+ .non_ideal => unreachable, // use ideal clocks for the unit tests
577
+ }
578
+
579
+ return ret;
580
+ }
581
+ };
582
+
583
+ test "ideal clocks get clamped to cluster time" {
584
+ std.testing.log_level = .crit;
585
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
586
+ defer arena.deinit();
587
+ const allocator = &arena.allocator;
588
+
589
+ var ideal_constant_drift_clock = try ClockUnitTestContainer.init(
590
+ allocator,
591
+ OffsetType.linear,
592
+ std.time.ns_per_ms, // loses 1ms per tick
593
+ 0,
594
+ );
595
+ var linear_clock_assertion_points = ideal_constant_drift_clock.ticks_to_perform_assertions();
596
+ for (linear_clock_assertion_points) |point| {
597
+ ideal_constant_drift_clock.run_till_tick(point.tick);
598
+ try testing.expectEqual(
599
+ point.expected_offset,
600
+ @intCast(i64, ideal_constant_drift_clock.clock.monotonic()) -
601
+ ideal_constant_drift_clock.clock.realtime_synchronized().?,
602
+ );
603
+ }
604
+
605
+ var ideal_periodic_drift_clock = try ClockUnitTestContainer.init(
606
+ allocator,
607
+ OffsetType.periodic,
608
+ std.time.ns_per_s, // loses up to 1s
609
+ 200, // period of 200 ticks
610
+ );
611
+ var ideal_periodic_drift_clock_assertion_points =
612
+ ideal_periodic_drift_clock.ticks_to_perform_assertions();
613
+ for (ideal_periodic_drift_clock_assertion_points) |point| {
614
+ ideal_periodic_drift_clock.run_till_tick(point.tick);
615
+ try testing.expectEqual(
616
+ point.expected_offset,
617
+ @intCast(i64, ideal_periodic_drift_clock.clock.monotonic()) -
618
+ ideal_periodic_drift_clock.clock.realtime_synchronized().?,
619
+ );
620
+ }
621
+
622
+ var ideal_jumping_clock = try ClockUnitTestContainer.init(
623
+ allocator,
624
+ OffsetType.step,
625
+ -5 * std.time.ns_per_day, // jumps 5 days ahead.
626
+ 49, // after 49 ticks
627
+ );
628
+ var ideal_jumping_clock_assertion_points = ideal_jumping_clock.ticks_to_perform_assertions();
629
+ for (ideal_jumping_clock_assertion_points) |point| {
630
+ ideal_jumping_clock.run_till_tick(point.tick);
631
+ try testing.expectEqual(
632
+ point.expected_offset,
633
+ @intCast(i64, ideal_jumping_clock.clock.monotonic()) -
634
+ ideal_jumping_clock.clock.realtime_synchronized().?,
635
+ );
636
+ }
637
+ }
638
+
639
+ const PacketSimulatorOptions = @import("../test/packet_simulator.zig").PacketSimulatorOptions;
640
+ const PacketSimulator = @import("../test/packet_simulator.zig").PacketSimulator;
641
+ const Path = @import("../test/packet_simulator.zig").Path;
642
+ const ClockSimulator = struct {
643
+ const Packet = struct {
644
+ m0: u64,
645
+ t1: ?i64,
646
+ clock_simulator: *ClockSimulator,
647
+
648
+ /// PacketSimulator requires this function, but we don't actually have anything to deinit.
649
+ pub fn deinit(packet: *const Packet, path: Path) void {}
650
+ };
651
+
652
+ const Options = struct {
653
+ ping_timeout: u32,
654
+ clock_count: u8,
655
+ network_options: PacketSimulatorOptions,
656
+ };
657
+
658
+ allocator: *std.mem.Allocator,
659
+ options: Options,
660
+ ticks: u64 = 0,
661
+ network: PacketSimulator(Packet),
662
+ clocks: []DeterministicClock,
663
+ prng: std.rand.DefaultPrng,
664
+
665
+ pub fn init(allocator: *std.mem.Allocator, options: Options) !ClockSimulator {
666
+ var self = ClockSimulator{
667
+ .allocator = allocator,
668
+ .options = options,
669
+ .network = try PacketSimulator(Packet).init(allocator, options.network_options),
670
+ .clocks = try allocator.alloc(DeterministicClock, options.clock_count),
671
+ .prng = std.rand.DefaultPrng.init(options.network_options.seed),
672
+ };
673
+
674
+ for (self.clocks) |*clock, index| {
675
+ clock.* = try self.create_clock(@intCast(u8, index));
676
+ }
677
+
678
+ return self;
679
+ }
680
+
681
+ fn create_clock(self: *ClockSimulator, replica: u8) !DeterministicClock {
682
+ const amplitude = self.prng.random.intRangeAtMost(i64, -10, 10) * std.time.ns_per_s;
683
+ const phase = self.prng.random.intRangeAtMost(i64, 100, 1000) +
684
+ @floatToInt(i64, self.prng.random.floatNorm(f64) * 50);
685
+ const time: DeterministicTime = .{
686
+ .resolution = std.time.ns_per_s / 2, // delta_t = 0.5s
687
+ .offset_type = OffsetType.non_ideal,
688
+ .offset_coefficient_A = amplitude,
689
+ .offset_coefficient_B = phase,
690
+ .offset_coefficient_C = 10,
691
+ };
692
+
693
+ return try DeterministicClock.init(self.allocator, self.options.clock_count, replica, time);
694
+ }
695
+
696
+ pub fn tick(self: *ClockSimulator) void {
697
+ self.ticks += 1;
698
+ self.network.tick();
699
+ for (self.clocks) |*clock| {
700
+ clock.tick();
701
+ }
702
+
703
+ for (self.clocks) |*clock| {
704
+ if (clock.time.ticks % self.options.ping_timeout == 0) {
705
+ const m0 = clock.monotonic();
706
+ for (self.clocks) |_, target| {
707
+ if (target != clock.replica) {
708
+ self.network.submit_packet(
709
+ .{
710
+ .m0 = m0,
711
+ .t1 = null,
712
+ .clock_simulator = self,
713
+ },
714
+ ClockSimulator.handle_packet,
715
+ .{
716
+ .source = clock.replica,
717
+ .target = @intCast(u8, target),
718
+ },
719
+ );
720
+ }
721
+ }
722
+ }
723
+ }
724
+ }
725
+
726
+ fn handle_packet(packet: Packet, path: Path) void {
727
+ const self = packet.clock_simulator;
728
+ const target = &self.clocks[path.target];
729
+
730
+ if (packet.t1) |t1| {
731
+ target.learn(
732
+ path.source,
733
+ packet.m0,
734
+ t1,
735
+ target.monotonic(),
736
+ );
737
+ } else {
738
+ self.network.submit_packet(
739
+ .{
740
+ .m0 = packet.m0,
741
+ .t1 = target.realtime(),
742
+ .clock_simulator = self,
743
+ },
744
+ ClockSimulator.handle_packet,
745
+ .{
746
+ // send the packet back to where it came from.
747
+ .source = path.target,
748
+ .target = path.source,
749
+ },
750
+ );
751
+ }
752
+ }
753
+ };
754
+
755
+ test "fuzz test" {
756
+ std.testing.log_level = .emerg; // silence all clock logs
757
+ var arena_allocator = std.heap.ArenaAllocator.init(std.heap.page_allocator);
758
+ defer arena_allocator.deinit();
759
+ const allocator = &arena_allocator.allocator;
760
+ const ticks_max: u64 = 1_000_000;
761
+ const clock_count: u8 = 3;
762
+ const test_delta_time: u64 = std.time.ns_per_s / 2;
763
+ const SystemTime = @import("../time.zig").Time;
764
+ var system_time = SystemTime{};
765
+ var seed = @intCast(u64, system_time.realtime());
766
+ var min_sync_error: u64 = 1_000_000_000;
767
+ var max_sync_error: u64 = 0;
768
+ var max_clock_offset: u64 = 0;
769
+ var min_clock_offset: u64 = 1_000_000_000;
770
+ var simulator = try ClockSimulator.init(allocator, .{
771
+ .network_options = .{
772
+ .node_count = clock_count,
773
+ .seed = seed,
774
+ .one_way_delay_mean = 25,
775
+ .one_way_delay_min = 10,
776
+ .packet_loss_probability = 10,
777
+ .path_maximum_capacity = 20,
778
+ .path_clog_duration_mean = 200,
779
+ .path_clog_probability = 2,
780
+ .packet_replay_probability = 2,
781
+ },
782
+ .clock_count = clock_count,
783
+ .ping_timeout = 20,
784
+ });
785
+
786
+ var clock_ticks_without_synchronization = [_]u32{0} ** clock_count;
787
+ while (simulator.ticks < ticks_max) {
788
+ simulator.tick();
789
+
790
+ const test_time: u64 = simulator.ticks * test_delta_time;
791
+ for (simulator.clocks) |*clock, index| {
792
+ var offset = clock.time.offset(simulator.ticks);
793
+ var abs_offset = if (offset >= 0) @intCast(u64, offset) else @intCast(u64, -offset);
794
+ max_clock_offset = if (abs_offset > max_clock_offset) abs_offset else max_clock_offset;
795
+ min_clock_offset = if (abs_offset < min_clock_offset) abs_offset else min_clock_offset;
796
+
797
+ var synced_time = clock.realtime_synchronized() orelse {
798
+ clock_ticks_without_synchronization[index] += 1;
799
+ continue;
800
+ };
801
+
802
+ for (simulator.clocks) |*other_clock, other_clock_index| {
803
+ if (index == other_clock_index) continue;
804
+ var other_clock_sync_time = other_clock.realtime_synchronized() orelse {
805
+ continue;
806
+ };
807
+ var err: i64 = synced_time - other_clock_sync_time;
808
+ var abs_err: u64 = if (err >= 0) @intCast(u64, err) else @intCast(u64, -err);
809
+ max_sync_error = if (abs_err > max_sync_error) abs_err else max_sync_error;
810
+ min_sync_error = if (abs_err < min_sync_error) abs_err else min_sync_error;
811
+ }
812
+ }
813
+ }
814
+
815
+ std.debug.print("seed={}, max ticks={}, clock count={}\n", .{
816
+ seed,
817
+ ticks_max,
818
+ clock_count,
819
+ });
820
+ std.debug.print("absolute clock offsets with respect to test time:\n", .{});
821
+ std.debug.print("maximum={}\n", .{fmtDurationSigned(@intCast(i64, max_clock_offset))});
822
+ std.debug.print("minimum={}\n", .{fmtDurationSigned(@intCast(i64, min_clock_offset))});
823
+ std.debug.print("\nabsolute synchronization errors between clocks:\n", .{});
824
+ std.debug.print("maximum={}\n", .{fmtDurationSigned(@intCast(i64, max_sync_error))});
825
+ std.debug.print("minimum={}\n", .{fmtDurationSigned(@intCast(i64, min_sync_error))});
826
+ std.debug.print("clock ticks without synchronization={d}\n", .{
827
+ clock_ticks_without_synchronization,
828
+ });
829
+ }