tigerbeetle-node 0.11.12 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +212 -196
  2. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  3. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  4. package/dist/bin/aarch64-macos/client.node +0 -0
  5. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  6. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  7. package/dist/bin/x86_64-macos/client.node +0 -0
  8. package/dist/index.js +33 -1
  9. package/dist/index.js.map +1 -1
  10. package/package-lock.json +66 -0
  11. package/package.json +8 -17
  12. package/src/index.ts +56 -1
  13. package/src/node.zig +10 -9
  14. package/dist/.client.node.sha256 +0 -1
  15. package/scripts/build_lib.sh +0 -61
  16. package/scripts/download_node_headers.sh +0 -32
  17. package/src/tigerbeetle/scripts/benchmark.bat +0 -48
  18. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  19. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  20. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  21. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  22. package/src/tigerbeetle/scripts/install.bat +0 -7
  23. package/src/tigerbeetle/scripts/install.sh +0 -21
  24. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  25. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  26. package/src/tigerbeetle/scripts/lint.zig +0 -199
  27. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  28. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -48
  29. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  30. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  31. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  32. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  33. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  34. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  35. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  36. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  37. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  38. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  39. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  40. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  41. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  42. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  43. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  44. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  45. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  46. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  47. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  48. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  49. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  50. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  51. package/src/tigerbeetle/src/benchmark.zig +0 -314
  52. package/src/tigerbeetle/src/config.zig +0 -234
  53. package/src/tigerbeetle/src/constants.zig +0 -436
  54. package/src/tigerbeetle/src/ewah.zig +0 -286
  55. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  56. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  57. package/src/tigerbeetle/src/fifo.zig +0 -120
  58. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  59. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  60. package/src/tigerbeetle/src/io/linux.zig +0 -1062
  61. package/src/tigerbeetle/src/io/test.zig +0 -643
  62. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  63. package/src/tigerbeetle/src/io.zig +0 -34
  64. package/src/tigerbeetle/src/iops.zig +0 -107
  65. package/src/tigerbeetle/src/lsm/README.md +0 -308
  66. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  67. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  68. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  69. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  70. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  71. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  72. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  73. package/src/tigerbeetle/src/lsm/forest.zig +0 -204
  74. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -401
  75. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  76. package/src/tigerbeetle/src/lsm/groove.zig +0 -972
  77. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  78. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  79. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  80. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -877
  81. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  82. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  83. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  84. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  85. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -378
  86. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1328
  87. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  88. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  89. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  90. package/src/tigerbeetle/src/lsm/table.zig +0 -1031
  91. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -203
  92. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  93. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -220
  94. package/src/tigerbeetle/src/lsm/test.zig +0 -438
  95. package/src/tigerbeetle/src/lsm/tree.zig +0 -1193
  96. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -474
  97. package/src/tigerbeetle/src/message_bus.zig +0 -1012
  98. package/src/tigerbeetle/src/message_pool.zig +0 -156
  99. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  100. package/src/tigerbeetle/src/simulator.zig +0 -569
  101. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -577
  102. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  103. package/src/tigerbeetle/src/state_machine.zig +0 -1881
  104. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  105. package/src/tigerbeetle/src/stdx.zig +0 -162
  106. package/src/tigerbeetle/src/storage.zig +0 -393
  107. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  108. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  109. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  110. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  111. package/src/tigerbeetle/src/testing/cluster.zig +0 -443
  112. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  113. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  114. package/src/tigerbeetle/src/testing/id.zig +0 -99
  115. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -364
  116. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  117. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  118. package/src/tigerbeetle/src/testing/state_machine.zig +0 -249
  119. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  120. package/src/tigerbeetle/src/testing/table.zig +0 -247
  121. package/src/tigerbeetle/src/testing/time.zig +0 -84
  122. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  123. package/src/tigerbeetle/src/time.zig +0 -112
  124. package/src/tigerbeetle/src/tracer.zig +0 -529
  125. package/src/tigerbeetle/src/unit_tests.zig +0 -42
  126. package/src/tigerbeetle/src/vopr.zig +0 -495
  127. package/src/tigerbeetle/src/vsr/README.md +0 -209
  128. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  129. package/src/tigerbeetle/src/vsr/clock.zig +0 -853
  130. package/src/tigerbeetle/src/vsr/journal.zig +0 -2413
  131. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  132. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  133. package/src/tigerbeetle/src/vsr/replica.zig +0 -6381
  134. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  135. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  136. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  137. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  138. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  139. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  140. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  141. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  142. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  143. package/src/tigerbeetle/src/vsr.zig +0 -1352
@@ -1,853 +0,0 @@
1
- const std = @import("std");
2
- const assert = std.debug.assert;
3
- const log = std.log.scoped(.clock);
4
- const fmt = std.fmt;
5
-
6
- const constants = @import("../constants.zig");
7
-
8
- const clock_offset_tolerance_max: u64 = constants.clock_offset_tolerance_max_ms * std.time.ns_per_ms;
9
- const epoch_max: u64 = constants.clock_epoch_max_ms * std.time.ns_per_ms;
10
- const window_min: u64 = constants.clock_synchronization_window_min_ms * std.time.ns_per_ms;
11
- const window_max: u64 = constants.clock_synchronization_window_max_ms * std.time.ns_per_ms;
12
-
13
- const Marzullo = @import("marzullo.zig").Marzullo;
14
-
15
- pub fn Clock(comptime Time: type) type {
16
- return struct {
17
- const Self = @This();
18
-
19
- const Sample = struct {
20
- /// The relative difference between our wall clock reading and that of the remote clock source.
21
- clock_offset: i64,
22
- one_way_delay: u64,
23
- };
24
-
25
- const Epoch = struct {
26
- /// The best clock offset sample per remote clock source (with minimum one way delay) collected
27
- /// over the course of a window period of several seconds.
28
- sources: []?Sample,
29
-
30
- /// The total number of samples learned while synchronizing this epoch.
31
- samples: usize,
32
-
33
- /// The monotonic clock timestamp when this epoch began. We use this to measure elapsed time.
34
- monotonic: u64,
35
-
36
- /// The wall clock timestamp when this epoch began. We add the elapsed monotonic time to this
37
- /// plus the synchronized clock offset to arrive at a synchronized realtime timestamp. We
38
- /// capture this realtime when starting the epoch, before we take any samples, to guard against
39
- /// any jumps in the system's realtime clock from impacting our measurements.
40
- realtime: i64,
41
-
42
- /// Once we have enough source clock offset samples in agreement, the epoch is synchronized.
43
- /// We then have lower and upper bounds on the true cluster time, and can install this epoch for
44
- /// subsequent clock readings. This epoch is then valid for several seconds, while clock drift
45
- /// has not had enough time to accumulate into any significant clock skew, and while we collect
46
- /// samples for the next epoch to refresh and replace this one.
47
- synchronized: ?Marzullo.Interval,
48
-
49
- /// A guard to prevent synchronizing too often without having learned any new samples.
50
- learned: bool = false,
51
-
52
- fn elapsed(epoch: *Epoch, clock: *Self) u64 {
53
- return clock.monotonic() - epoch.monotonic;
54
- }
55
-
56
- fn reset(epoch: *Epoch, clock: *Self) void {
57
- std.mem.set(?Sample, epoch.sources, null);
58
- // A replica always has zero clock offset and network delay to its own system time reading:
59
- epoch.sources[clock.replica] = Sample{
60
- .clock_offset = 0,
61
- .one_way_delay = 0,
62
- };
63
- epoch.samples = 1;
64
- epoch.monotonic = clock.monotonic();
65
- epoch.realtime = clock.realtime();
66
- epoch.synchronized = null;
67
- epoch.learned = false;
68
- }
69
-
70
- fn sources_sampled(epoch: *Epoch) usize {
71
- var count: usize = 0;
72
- for (epoch.sources) |sampled| {
73
- if (sampled != null) count += 1;
74
- }
75
- return count;
76
- }
77
- };
78
-
79
- /// The index of the replica using this clock to provide synchronized time.
80
- replica: u8,
81
-
82
- /// The underlying time source for this clock (system time or deterministic time).
83
- time: *Time,
84
-
85
- /// An epoch from which the clock can read synchronized clock timestamps within safe bounds.
86
- /// At least `constants.clock_synchronization_window_min_ms` is needed for this to be ready
87
- /// to use.
88
- epoch: Epoch,
89
-
90
- /// The next epoch (collecting samples and being synchronized) to replace the current epoch.
91
- window: Epoch,
92
-
93
- /// A static allocation to convert window samples into tuple bounds for Marzullo's algorithm.
94
- marzullo_tuples: []Marzullo.Tuple,
95
-
96
- /// A kill switch to revert to unsynchronized realtime.
97
- synchronization_disabled: bool,
98
-
99
- pub fn init(
100
- allocator: std.mem.Allocator,
101
- /// The size of the cluster, i.e. the number of clock sources (including this replica).
102
- replica_count: u8,
103
- replica: u8,
104
- time: *Time,
105
- ) !Self {
106
- assert(replica_count > 0);
107
- assert(replica < replica_count);
108
-
109
- var epoch: Epoch = undefined;
110
- epoch.sources = try allocator.alloc(?Sample, replica_count);
111
- errdefer allocator.free(epoch.sources);
112
-
113
- var window: Epoch = undefined;
114
- window.sources = try allocator.alloc(?Sample, replica_count);
115
- errdefer allocator.free(window.sources);
116
-
117
- // There are two Marzullo tuple bounds (lower and upper) per source clock offset sample:
118
- var marzullo_tuples = try allocator.alloc(Marzullo.Tuple, replica_count * 2);
119
- errdefer allocator.free(marzullo_tuples);
120
-
121
- var self = Self{
122
- .replica = replica,
123
- .time = time,
124
- .epoch = epoch,
125
- .window = window,
126
- .marzullo_tuples = marzullo_tuples,
127
- .synchronization_disabled = replica_count == 1, // A cluster of one cannot synchronize.
128
- };
129
-
130
- // Reset the current epoch to be unsynchronized,
131
- self.epoch.reset(&self);
132
- // and open a new epoch window to start collecting samples...
133
- self.window.reset(&self);
134
-
135
- return self;
136
- }
137
-
138
- pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
139
- allocator.free(self.epoch.sources);
140
- allocator.free(self.window.sources);
141
- allocator.free(self.marzullo_tuples);
142
- }
143
-
144
- /// Called by `Replica.on_pong()` with:
145
- /// * the index of the `replica` that has replied to our ping with a pong,
146
- /// * our monotonic timestamp `m0` embedded in the ping we sent, carried over into this pong,
147
- /// * the remote replica's `realtime()` timestamp `t1`, and
148
- /// * our monotonic timestamp `m2` as captured by our `Replica.on_pong()` handler.
149
- pub fn learn(self: *Self, replica: u8, m0: u64, t1: i64, m2: u64) void {
150
- if (self.synchronization_disabled) return;
151
-
152
- // A network routing fault must have replayed one of our outbound messages back against us:
153
- if (replica == self.replica) {
154
- log.warn("{}: learn: replica == self.replica", .{self.replica});
155
- return;
156
- }
157
-
158
- // Our m0 and m2 readings should always be monotonically increasing if not equal.
159
- // Crucially, it is possible for a very fast network to have m0 == m2, especially where
160
- // `constants.tick_ms` is at a more course granularity. We must therefore tolerate RTT=0
161
- // or otherwise we would have a liveness bug simply because we would be throwing away
162
- // perfectly good clock samples.
163
- // This condition should never be true. Reject this as a bad sample:
164
- if (m0 > m2) {
165
- log.warn("{}: learn: m0={} > m2={}", .{ self.replica, m0, m2 });
166
- return;
167
- }
168
-
169
- // We may receive delayed packets after a reboot, in which case m0/m2 may be invalid:
170
- if (m0 < self.window.monotonic) {
171
- log.warn("{}: learn: m0={} < window.monotonic={}", .{
172
- self.replica,
173
- m0,
174
- self.window.monotonic,
175
- });
176
- return;
177
- }
178
-
179
- if (m2 < self.window.monotonic) {
180
- log.warn("{}: learn: m2={} < window.monotonic={}", .{
181
- self.replica,
182
- m2,
183
- self.window.monotonic,
184
- });
185
- return;
186
- }
187
-
188
- const elapsed: u64 = m2 - self.window.monotonic;
189
- if (elapsed > window_max) {
190
- log.warn("{}: learn: elapsed={} > window_max={}", .{
191
- self.replica,
192
- elapsed,
193
- window_max,
194
- });
195
- return;
196
- }
197
-
198
- const round_trip_time: u64 = m2 - m0;
199
- const one_way_delay: u64 = round_trip_time / 2;
200
- const t2: i64 = self.window.realtime + @intCast(i64, elapsed);
201
- const clock_offset: i64 = t1 + @intCast(i64, one_way_delay) - t2;
202
- const asymmetric_delay = self.estimate_asymmetric_delay(
203
- replica,
204
- one_way_delay,
205
- clock_offset,
206
- );
207
- const clock_offset_corrected = clock_offset + asymmetric_delay;
208
-
209
- log.debug("{}: learn: replica={} m0={} t1={} m2={} t2={} one_way_delay={} " ++
210
- "asymmetric_delay={} clock_offset={}", .{
211
- self.replica,
212
- replica,
213
- m0,
214
- t1,
215
- m2,
216
- t2,
217
- one_way_delay,
218
- asymmetric_delay,
219
- clock_offset_corrected,
220
- });
221
-
222
- // The less network delay, the more likely we have an accurante clock offset measurement:
223
- self.window.sources[replica] = minimum_one_way_delay(
224
- self.window.sources[replica],
225
- Sample{
226
- .clock_offset = clock_offset_corrected,
227
- .one_way_delay = one_way_delay,
228
- },
229
- );
230
-
231
- self.window.samples += 1;
232
-
233
- // We decouple calls to `synchronize()` so that it's not triggered by these network events.
234
- // Otherwise, excessive duplicate network packets would burn the CPU.
235
- self.window.learned = true;
236
- }
237
-
238
- /// Called by `Replica.on_ping_timeout()` to provide `m0` when we decide to send a ping.
239
- /// Called by `Replica.on_pong()` to provide `m2` when we receive a pong.
240
- pub fn monotonic(self: *Self) u64 {
241
- return self.time.monotonic();
242
- }
243
-
244
- /// Called by `Replica.on_ping()` when responding to a ping with a pong.
245
- /// This should never be used by the state machine, only for measuring clock offsets.
246
- pub fn realtime(self: *Self) i64 {
247
- return self.time.realtime();
248
- }
249
-
250
- /// Called by `StateMachine.prepare_timestamp()` when the primary wants to timestamp a batch.
251
- /// If the primary's clock is not synchronized with the cluster, it must wait until it is.
252
- /// Returns the system time clamped to be within our synchronized lower and upper bounds.
253
- /// This is complementary to NTP and allows clusters with very accurate time to make use of it,
254
- /// while providing guard rails for when NTP is partitioned or unable to correct quickly enough.
255
- pub fn realtime_synchronized(self: *Self) ?i64 {
256
- if (self.synchronization_disabled) {
257
- return self.realtime();
258
- } else if (self.epoch.synchronized) |interval| {
259
- const elapsed = @intCast(i64, self.epoch.elapsed(self));
260
- return std.math.clamp(
261
- self.realtime(),
262
- self.epoch.realtime + elapsed + interval.lower_bound,
263
- self.epoch.realtime + elapsed + interval.upper_bound,
264
- );
265
- } else {
266
- return null;
267
- }
268
- }
269
-
270
- pub fn tick(self: *Self) void {
271
- self.time.tick();
272
-
273
- if (self.synchronization_disabled) return;
274
- self.synchronize();
275
- // Expire the current epoch if successive windows failed to synchronize:
276
- // Gradual clock drift prevents us from using an epoch for more than a few seconds.
277
- if (self.epoch.elapsed(self) >= epoch_max) {
278
- log.err(
279
- "{}: no agreement on cluster time (partitioned or too many clock faults)",
280
- .{self.replica},
281
- );
282
- self.epoch.reset(self);
283
- }
284
- }
285
-
286
- /// Estimates the asymmetric delay for a sample compared to the previous window, according to
287
- /// Algorithm 1 from Section 4.2, "A System for Clock Synchronization in an Internet of Things".
288
- fn estimate_asymmetric_delay(
289
- self: *Self,
290
- replica: u8,
291
- one_way_delay: u64,
292
- clock_offset: i64,
293
- ) i64 {
294
- // Note that `one_way_delay` may be 0 for very fast networks.
295
-
296
- const error_margin = 10 * std.time.ns_per_ms;
297
-
298
- if (self.epoch.sources[replica]) |epoch| {
299
- if (one_way_delay <= epoch.one_way_delay) {
300
- return 0;
301
- } else if (clock_offset > epoch.clock_offset + error_margin) {
302
- // The asymmetric error is on the forward network path.
303
- return 0 - @intCast(i64, one_way_delay - epoch.one_way_delay);
304
- } else if (clock_offset < epoch.clock_offset - error_margin) {
305
- // The asymmetric error is on the reverse network path.
306
- return 0 + @intCast(i64, one_way_delay - epoch.one_way_delay);
307
- } else {
308
- return 0;
309
- }
310
- } else {
311
- return 0;
312
- }
313
- }
314
-
315
- fn synchronize(self: *Self) void {
316
- assert(self.window.synchronized == null);
317
-
318
- // Wait until the window has enough accurate samples:
319
- const elapsed = self.window.elapsed(self);
320
- if (elapsed < window_min) return;
321
- if (elapsed >= window_max) {
322
- // We took too long to synchronize the window, expire stale samples...
323
- const sources_sampled = self.window.sources_sampled();
324
- if (sources_sampled <= @divTrunc(self.window.sources.len, 2)) {
325
- log.err("{}: synchronization failed, partitioned (sources={} samples={})", .{
326
- self.replica,
327
- sources_sampled,
328
- self.window.samples,
329
- });
330
- } else {
331
- log.err("{}: synchronization failed, no agreement (sources={} samples={})", .{
332
- self.replica,
333
- sources_sampled,
334
- self.window.samples,
335
- });
336
- }
337
- self.window.reset(self);
338
- return;
339
- }
340
-
341
- if (!self.window.learned) return;
342
- // Do not reset `learned` any earlier than this (before we have attempted to synchronize).
343
- self.window.learned = false;
344
-
345
- // Starting with the most clock offset tolerance, while we have a majority, find the best
346
- // smallest interval with the least clock offset tolerance, reducing tolerance at each step:
347
- var tolerance: u64 = clock_offset_tolerance_max;
348
- var terminate = false;
349
- var rounds: usize = 0;
350
- // Do at least one round if tolerance=0 and cap the number of rounds to avoid runaway loops.
351
- while (!terminate and rounds < 64) : (tolerance /= 2) {
352
- if (tolerance == 0) terminate = true;
353
- rounds += 1;
354
-
355
- const interval = Marzullo.smallest_interval(self.window_tuples(tolerance));
356
- const majority = interval.sources_true > @divTrunc(self.window.sources.len, 2);
357
- if (!majority) break;
358
-
359
- // The new interval may reduce the number of `sources_true` while also decreasing error.
360
- // In other words, provided we maintain a majority, we prefer tighter tolerance bounds.
361
- self.window.synchronized = interval;
362
- }
363
-
364
- // Wait for more accurate samples or until we timeout the window for lack of majority:
365
- if (self.window.synchronized == null) return;
366
-
367
- var new_window = self.epoch;
368
- new_window.reset(self);
369
- self.epoch = self.window;
370
- self.window = new_window;
371
-
372
- self.after_synchronization();
373
- }
374
-
375
- fn after_synchronization(self: *Self) void {
376
- const new_interval = self.epoch.synchronized.?;
377
-
378
- log.debug("{}: synchronized: truechimers={}/{} clock_offset={}..{} accuracy={}", .{
379
- self.replica,
380
- new_interval.sources_true,
381
- self.epoch.sources.len,
382
- fmt.fmtDurationSigned(new_interval.lower_bound),
383
- fmt.fmtDurationSigned(new_interval.upper_bound),
384
- fmt.fmtDurationSigned(new_interval.upper_bound - new_interval.lower_bound),
385
- });
386
-
387
- const elapsed = @intCast(i64, self.epoch.elapsed(self));
388
- const system = self.realtime();
389
- const lower = self.epoch.realtime + elapsed + new_interval.lower_bound;
390
- const upper = self.epoch.realtime + elapsed + new_interval.upper_bound;
391
- const cluster = std.math.clamp(system, lower, upper);
392
-
393
- if (system == cluster) {} else if (system < lower) {
394
- const delta = lower - system;
395
- if (delta < std.time.ns_per_ms) {
396
- log.info("{}: system time is {} behind", .{
397
- self.replica,
398
- fmt.fmtDurationSigned(delta),
399
- });
400
- } else {
401
- log.err("{}: system time is {} behind, clamping system time to cluster time", .{
402
- self.replica,
403
- fmt.fmtDurationSigned(delta),
404
- });
405
- }
406
- } else {
407
- const delta = system - upper;
408
- if (delta < std.time.ns_per_ms) {
409
- log.info("{}: system time is {} ahead", .{
410
- self.replica,
411
- fmt.fmtDurationSigned(delta),
412
- });
413
- } else {
414
- log.err("{}: system time is {} ahead, clamping system time to cluster time", .{
415
- self.replica,
416
- fmt.fmtDurationSigned(delta),
417
- });
418
- }
419
- }
420
- }
421
-
422
- fn window_tuples(self: *Self, tolerance: u64) []Marzullo.Tuple {
423
- assert(self.window.sources[self.replica].?.clock_offset == 0);
424
- assert(self.window.sources[self.replica].?.one_way_delay == 0);
425
- var count: usize = 0;
426
- for (self.window.sources) |sampled, source| {
427
- if (sampled) |sample| {
428
- self.marzullo_tuples[count] = Marzullo.Tuple{
429
- .source = @intCast(u8, source),
430
- .offset = sample.clock_offset - @intCast(i64, sample.one_way_delay + tolerance),
431
- .bound = .lower,
432
- };
433
- count += 1;
434
- self.marzullo_tuples[count] = Marzullo.Tuple{
435
- .source = @intCast(u8, source),
436
- .offset = sample.clock_offset + @intCast(i64, sample.one_way_delay + tolerance),
437
- .bound = .upper,
438
- };
439
- count += 1;
440
- }
441
- }
442
- return self.marzullo_tuples[0..count];
443
- }
444
-
445
- fn minimum_one_way_delay(a: ?Sample, b: ?Sample) ?Sample {
446
- if (a == null) return b;
447
- if (b == null) return a;
448
- if (a.?.one_way_delay < b.?.one_way_delay) return a;
449
- // Choose B if B's one way delay is less or the same (we assume B is the newer sample):
450
- return b;
451
- }
452
- };
453
- }
454
-
455
- const testing = std.testing;
456
- const OffsetType = @import("../testing/time.zig").OffsetType;
457
- const DeterministicTime = @import("../testing/time.zig").Time;
458
- const DeterministicClock = Clock(DeterministicTime);
459
-
460
- const ClockUnitTestContainer = struct {
461
- const Self = @This();
462
- time: DeterministicTime,
463
- clock: DeterministicClock,
464
- rtt: u64 = 300 * std.time.ns_per_ms,
465
- owd: u64 = 150 * std.time.ns_per_ms,
466
- learn_interval: u64 = 5,
467
-
468
- pub fn init(
469
- self: *Self,
470
- allocator: std.mem.Allocator,
471
- offset_type: OffsetType,
472
- offset_coefficient_A: i64,
473
- offset_coefficient_B: i64,
474
- ) !void {
475
- // TODO(Zig) Use @returnAddress() when available.
476
- self.* = .{
477
- .time = .{
478
- .resolution = std.time.ns_per_s / 2,
479
- .offset_type = offset_type,
480
- .offset_coefficient_A = offset_coefficient_A,
481
- .offset_coefficient_B = offset_coefficient_B,
482
- },
483
- .clock = try DeterministicClock.init(allocator, 3, 0, &self.time),
484
- };
485
- }
486
-
487
- pub fn run_till_tick(self: *Self, tick: u64) void {
488
- while (self.clock.time.ticks < tick) {
489
- self.clock.time.tick();
490
-
491
- if (@mod(self.clock.time.ticks, self.learn_interval) == 0) {
492
- const on_pong_time = self.clock.monotonic();
493
- const m0 = on_pong_time - self.rtt;
494
- const t1 = @intCast(i64, on_pong_time - self.owd);
495
-
496
- self.clock.learn(1, m0, t1, on_pong_time);
497
- self.clock.learn(2, m0, t1, on_pong_time);
498
- }
499
-
500
- self.clock.synchronize();
501
- }
502
- }
503
-
504
- const AssertionPoint = struct {
505
- tick: u64,
506
- expected_offset: i64,
507
- };
508
- pub fn ticks_to_perform_assertions(self: *Self) [3]AssertionPoint {
509
- var ret: [3]AssertionPoint = undefined;
510
- switch (self.clock.time.offset_type) {
511
- .linear => {
512
- // For the first (OWD/drift per tick) ticks, the offset < OWD. This means that the
513
- // Marzullo interval is [0,0] (the offset and OWD are 0 for a replica w.r.t. itself).
514
- // Therefore the offset of `clock.realtime_synchronised` will be the analytically prescribed
515
- // offset at the start of the window.
516
- // Beyond this, the offset > OWD and the Marzullo interval will be from replica 1 and
517
- // replica 2. The `clock.realtime_synchronized` will be clamped to the lower bound.
518
- // Therefore the `clock.realtime_synchronized` will be offset by the OWD.
519
- var threshold = self.owd / @intCast(u64, self.clock.time.offset_coefficient_A);
520
- ret[0] = .{
521
- .tick = threshold,
522
- .expected_offset = self.clock.time.offset(threshold - self.learn_interval),
523
- };
524
- ret[1] = .{
525
- .tick = threshold + 100,
526
- .expected_offset = @intCast(i64, self.owd),
527
- };
528
- ret[2] = .{
529
- .tick = threshold + 200,
530
- .expected_offset = @intCast(i64, self.owd),
531
- };
532
- },
533
- .periodic => {
534
- ret[0] = .{
535
- .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B, 4)),
536
- .expected_offset = @intCast(i64, self.owd),
537
- };
538
- ret[1] = .{
539
- .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B, 2)),
540
- .expected_offset = 0,
541
- };
542
- ret[2] = .{
543
- .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B * 3, 4)),
544
- .expected_offset = -@intCast(i64, self.owd),
545
- };
546
- },
547
- .step => {
548
- ret[0] = .{
549
- .tick = @intCast(u64, self.clock.time.offset_coefficient_B - 10),
550
- .expected_offset = 0,
551
- };
552
- ret[1] = .{
553
- .tick = @intCast(u64, self.clock.time.offset_coefficient_B + 10),
554
- .expected_offset = -@intCast(i64, self.owd),
555
- };
556
- ret[2] = .{
557
- .tick = @intCast(u64, self.clock.time.offset_coefficient_B + 10),
558
- .expected_offset = -@intCast(i64, self.owd),
559
- };
560
- },
561
- .non_ideal => unreachable, // use ideal clocks for the unit tests
562
- }
563
-
564
- return ret;
565
- }
566
- };
567
-
568
- test "ideal clocks get clamped to cluster time" {
569
- std.testing.log_level = .err;
570
- var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
571
- defer arena.deinit();
572
- const allocator = arena.allocator();
573
-
574
- var ideal_constant_drift_clock: ClockUnitTestContainer = undefined;
575
- try ideal_constant_drift_clock.init(
576
- allocator,
577
- OffsetType.linear,
578
- std.time.ns_per_ms, // loses 1ms per tick
579
- 0,
580
- );
581
- var linear_clock_assertion_points = ideal_constant_drift_clock.ticks_to_perform_assertions();
582
- for (linear_clock_assertion_points) |point| {
583
- ideal_constant_drift_clock.run_till_tick(point.tick);
584
- try testing.expectEqual(
585
- point.expected_offset,
586
- @intCast(i64, ideal_constant_drift_clock.clock.monotonic()) -
587
- ideal_constant_drift_clock.clock.realtime_synchronized().?,
588
- );
589
- }
590
-
591
- var ideal_periodic_drift_clock: ClockUnitTestContainer = undefined;
592
- try ideal_periodic_drift_clock.init(
593
- allocator,
594
- OffsetType.periodic,
595
- std.time.ns_per_s, // loses up to 1s
596
- 200, // period of 200 ticks
597
- );
598
- var ideal_periodic_drift_clock_assertion_points =
599
- ideal_periodic_drift_clock.ticks_to_perform_assertions();
600
- for (ideal_periodic_drift_clock_assertion_points) |point| {
601
- ideal_periodic_drift_clock.run_till_tick(point.tick);
602
- try testing.expectEqual(
603
- point.expected_offset,
604
- @intCast(i64, ideal_periodic_drift_clock.clock.monotonic()) -
605
- ideal_periodic_drift_clock.clock.realtime_synchronized().?,
606
- );
607
- }
608
-
609
- var ideal_jumping_clock: ClockUnitTestContainer = undefined;
610
- try ideal_jumping_clock.init(
611
- allocator,
612
- OffsetType.step,
613
- -5 * std.time.ns_per_day, // jumps 5 days ahead.
614
- 49, // after 49 ticks
615
- );
616
- var ideal_jumping_clock_assertion_points = ideal_jumping_clock.ticks_to_perform_assertions();
617
- for (ideal_jumping_clock_assertion_points) |point| {
618
- ideal_jumping_clock.run_till_tick(point.tick);
619
- try testing.expectEqual(
620
- point.expected_offset,
621
- @intCast(i64, ideal_jumping_clock.clock.monotonic()) -
622
- ideal_jumping_clock.clock.realtime_synchronized().?,
623
- );
624
- }
625
- }
626
-
627
- const PacketSimulatorOptions = @import("../testing/packet_simulator.zig").PacketSimulatorOptions;
628
- const PacketSimulatorType = @import("../testing/packet_simulator.zig").PacketSimulatorType;
629
- const Path = @import("../testing/packet_simulator.zig").Path;
630
- const Command = @import("../vsr.zig").Command;
631
- const ClockSimulator = struct {
632
- const Packet = struct {
633
- m0: u64,
634
- t1: ?i64,
635
- clock_simulator: *ClockSimulator,
636
-
637
- /// PacketSimulator requires this function, but we don't actually have anything to deinit.
638
- pub fn deinit(packet: *const Packet) void {
639
- _ = packet;
640
- }
641
- };
642
-
643
- const Options = struct {
644
- ping_timeout: u32,
645
- clock_count: u8,
646
- network_options: PacketSimulatorOptions,
647
- };
648
-
649
- allocator: std.mem.Allocator,
650
- options: Options,
651
- ticks: u64 = 0,
652
- network: PacketSimulatorType(Packet),
653
- times: []DeterministicTime,
654
- clocks: []DeterministicClock,
655
- prng: std.rand.DefaultPrng,
656
-
657
- pub fn init(allocator: std.mem.Allocator, options: Options) !ClockSimulator {
658
- var network = try PacketSimulatorType(Packet).init(allocator, options.network_options);
659
- errdefer network.deinit(allocator);
660
-
661
- var times = try allocator.alloc(DeterministicTime, options.clock_count);
662
- errdefer allocator.free(times);
663
-
664
- var clocks = try allocator.alloc(DeterministicClock, options.clock_count);
665
- errdefer allocator.free(clocks);
666
-
667
- var prng = std.rand.DefaultPrng.init(options.network_options.seed);
668
-
669
- for (clocks) |*clock, replica| {
670
- errdefer for (clocks[0..replica]) |*c| c.deinit(allocator);
671
-
672
- const amplitude = prng.random().intRangeAtMost(i64, -10, 10) * std.time.ns_per_s;
673
- const phase = prng.random().intRangeAtMost(i64, 100, 1000) +
674
- @floatToInt(i64, prng.random().floatNorm(f64) * 50);
675
- times[replica] = .{
676
- .resolution = std.time.ns_per_s / 2, // delta_t = 0.5s
677
- .offset_type = OffsetType.non_ideal,
678
- .offset_coefficient_A = amplitude,
679
- .offset_coefficient_B = phase,
680
- .offset_coefficient_C = 10,
681
- };
682
-
683
- clock.* = try DeterministicClock.init(
684
- allocator,
685
- options.clock_count,
686
- @intCast(u8, replica),
687
- &times[replica],
688
- );
689
- errdefer clock.deinit(allocator);
690
- }
691
- errdefer for (clocks) |*clock| clock.deinit(allocator);
692
-
693
- return ClockSimulator{
694
- .allocator = allocator,
695
- .options = options,
696
- .network = network,
697
- .times = times,
698
- .clocks = clocks,
699
- .prng = prng,
700
- };
701
- }
702
-
703
- pub fn deinit(self: *ClockSimulator) void {
704
- for (self.clocks) |*clock| clock.deinit(self.allocator);
705
- self.allocator.free(self.clocks);
706
- self.allocator.free(self.times);
707
- self.network.deinit(self.allocator);
708
- }
709
-
710
- pub fn tick(self: *ClockSimulator) void {
711
- self.ticks += 1;
712
- self.network.tick();
713
- for (self.clocks) |*clock| {
714
- clock.tick();
715
- }
716
-
717
- for (self.clocks) |*clock| {
718
- if (clock.time.ticks % self.options.ping_timeout == 0) {
719
- const m0 = clock.monotonic();
720
- for (self.clocks) |_, target| {
721
- if (target != clock.replica) {
722
- self.network.submit_packet(
723
- .{
724
- .m0 = m0,
725
- .t1 = null,
726
- .clock_simulator = self,
727
- },
728
- ClockSimulator.handle_packet,
729
- .{
730
- .source = clock.replica,
731
- .target = @intCast(u8, target),
732
- },
733
- );
734
- }
735
- }
736
- }
737
- }
738
- }
739
-
740
- fn handle_packet(packet: Packet, path: Path) void {
741
- const self = packet.clock_simulator;
742
- const target = &self.clocks[path.target];
743
-
744
- if (packet.t1) |t1| {
745
- target.learn(
746
- path.source,
747
- packet.m0,
748
- t1,
749
- target.monotonic(),
750
- );
751
- } else {
752
- self.network.submit_packet(
753
- .{
754
- .m0 = packet.m0,
755
- .t1 = target.realtime(),
756
- .clock_simulator = self,
757
- },
758
- ClockSimulator.handle_packet,
759
- .{
760
- // send the packet back to where it came from.
761
- .source = path.target,
762
- .target = path.source,
763
- },
764
- );
765
- }
766
- }
767
- };
768
-
769
- test "clock: fuzz test" {
770
- std.testing.log_level = .err; // silence all clock logs
771
-
772
- const ticks_max: u64 = 1_000_000;
773
- const clock_count: u8 = 3;
774
- const SystemTime = @import("../testing/time.zig").Time;
775
- var system_time = SystemTime{
776
- .resolution = constants.tick_ms * std.time.ns_per_ms,
777
- .offset_type = .linear,
778
- .offset_coefficient_A = 0,
779
- .offset_coefficient_B = 0,
780
- };
781
- var seed = @intCast(u64, system_time.realtime());
782
- var min_sync_error: u64 = 1_000_000_000;
783
- var max_sync_error: u64 = 0;
784
- var max_clock_offset: u64 = 0;
785
- var min_clock_offset: u64 = 1_000_000_000;
786
- var simulator = try ClockSimulator.init(std.testing.allocator, .{
787
- .network_options = .{
788
- .replica_count = 3,
789
- .client_count = 0,
790
- .seed = seed,
791
-
792
- .one_way_delay_mean = 25,
793
- .one_way_delay_min = 10,
794
- .packet_loss_probability = 10,
795
- .path_maximum_capacity = 20,
796
- .path_clog_duration_mean = 200,
797
- .path_clog_probability = 2,
798
- .packet_replay_probability = 2,
799
-
800
- .partition_mode = .isolate_single,
801
- .partition_probability = 25,
802
- .unpartition_probability = 5,
803
- .partition_stability = 100,
804
- .unpartition_stability = 10,
805
- },
806
- .clock_count = clock_count,
807
- .ping_timeout = 20,
808
- });
809
- defer simulator.deinit();
810
-
811
- var clock_ticks_without_synchronization = [_]u32{0} ** clock_count;
812
- while (simulator.ticks < ticks_max) {
813
- simulator.tick();
814
-
815
- for (simulator.clocks) |*clock, index| {
816
- var offset = clock.time.offset(simulator.ticks);
817
- var abs_offset = if (offset >= 0) @intCast(u64, offset) else @intCast(u64, -offset);
818
- max_clock_offset = if (abs_offset > max_clock_offset) abs_offset else max_clock_offset;
819
- min_clock_offset = if (abs_offset < min_clock_offset) abs_offset else min_clock_offset;
820
-
821
- var synced_time = clock.realtime_synchronized() orelse {
822
- clock_ticks_without_synchronization[index] += 1;
823
- continue;
824
- };
825
-
826
- for (simulator.clocks) |*other_clock, other_clock_index| {
827
- if (index == other_clock_index) continue;
828
- var other_clock_sync_time = other_clock.realtime_synchronized() orelse {
829
- continue;
830
- };
831
- var err: i64 = synced_time - other_clock_sync_time;
832
- var abs_err: u64 = if (err >= 0) @intCast(u64, err) else @intCast(u64, -err);
833
- max_sync_error = if (abs_err > max_sync_error) abs_err else max_sync_error;
834
- min_sync_error = if (abs_err < min_sync_error) abs_err else min_sync_error;
835
- }
836
- }
837
- }
838
-
839
- std.debug.print("seed={}, max ticks={}, clock count={}\n", .{
840
- seed,
841
- ticks_max,
842
- clock_count,
843
- });
844
- std.debug.print("absolute clock offsets with respect to test time:\n", .{});
845
- std.debug.print("maximum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, max_clock_offset))});
846
- std.debug.print("minimum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, min_clock_offset))});
847
- std.debug.print("\nabsolute synchronization errors between clocks:\n", .{});
848
- std.debug.print("maximum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, max_sync_error))});
849
- std.debug.print("minimum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, min_sync_error))});
850
- std.debug.print("clock ticks without synchronization={d}\n", .{
851
- clock_ticks_without_synchronization,
852
- });
853
- }